Initial community commit

2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit 20d28e80a5
16810 changed files with 4640254 additions and 2 deletions
@@ -0,0 +1,312 @@
+/****************************************************************************
+*
+*   Module Title :     fdct.c
+*
+*   Description  :     Fast 8x8 DCT C-Implementation.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "dct.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define SIGNBITDUPPED(X) ( (signed )((X & 0x80000000)) >> 31 )
+#define DOROUND(X) X = ( (SIGNBITDUPPED(X) & (0xffff)) + X ); 
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/
+static INT32 xC1S7 = 64277;
+static INT32 xC2S6 = 60547;
+static INT32 xC3S5 = 54491;
+static INT32 xC4S4 = 46341;
+static INT32 xC5S3 = 36410;
+static INT32 xC6S2 = 25080;
+static INT32 xC7S1 = 12785;
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : fdct_short_C_orig
+ *
+ *  INPUTS        : INT16 *InputData  : 16-bit input data.
+ *
+ *  OUTPUTS       : INT16 *OutputData : 16-bit transform coefficients.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs an 8x8 2-D fast DCT.
+ *
+ *                  The algorithm used is derived from the flowgraph for
+ *                  the Vetterli and Ligtenberg fast 1-D dct given in the
+ *                  JPEG reference book by Pennebaker and Mitchell.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void fdct_short_C_orig ( INT16 *InputData, INT16 *OutputData )
+{
+	int    loop;
+	INT32  is07, is12, is34, is56;
+	INT32  is0734, is1256;
+	INT32  id07, id12, id34, id56; 
+	INT32  irot_input_x, irot_input_y;
+	INT32  icommon_product1;            // Re-used product  (c4s4 * (s12 - s56)). 
+	INT32  icommon_product2;            // Re-used product  (c4s4 * (d12 + d56)).
+	INT32  temp1, temp2;	            // intermediate variable for computation
+	INT32  InterData[64];
+
+    INT32 *ip = InterData;
+	INT16 *op = OutputData;
+	
+    for ( loop=0; loop<8; loop++ )
+	{
+		// Pre calculate some common sums and differences.
+		is07 = InputData[0] + InputData[7];
+		is12 = InputData[1] + InputData[2];
+		is34 = InputData[3] + InputData[4];
+		is56 = InputData[5] + InputData[6];
+
+		id07 = InputData[0] - InputData[7];
+		id12 = InputData[1] - InputData[2];
+		id34 = InputData[3] - InputData[4];
+		id56 = InputData[5] - InputData[6];
+	
+		is0734 = is07 + is34;
+		is1256 = is12 + is56;
+		
+		// Pre-Calculate some common product terms.
+		icommon_product1 = xC4S4*(is12 - is56); 
+		DOROUND ( icommon_product1 )
+		icommon_product1 >>= 16;
+		
+		icommon_product2 = xC4S4*(id12 + id56);
+		DOROUND ( icommon_product2 )
+		icommon_product2 >>= 16;
+
+		ip[0] = (xC4S4*(is0734 + is1256));
+		DOROUND ( ip[0] );
+		ip[0] >>= 16;
+
+		ip[4] = (xC4S4*(is0734 - is1256));
+		DOROUND ( ip[4] );
+		ip[4] >>= 16;
+
+		// Define inputs to rotation for outputs 2 and 6 
+		irot_input_x = id12 - id56;
+		irot_input_y = is07 - is34;
+
+		// Apply rotation for outputs 2 and 6. 
+		temp1 = xC6S2*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[2] = temp1 + temp2;
+
+		temp1 = xC6S2*irot_input_y;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_x;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[6] = temp1 -temp2;
+
+		// Define inputs to rotation for outputs 1 and 7 
+		irot_input_x = icommon_product1 + id07;
+		irot_input_y = -( id34 + icommon_product2 );
+
+		// Apply rotation for outputs 1 and 7. 
+		temp1 = xC1S7*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC7S1*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[1] = temp1 - temp2;
+
+		temp1 = xC7S1*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC1S7*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[7] = temp1 + temp2;
+		
+		// Define inputs to rotation for outputs 3 and 5 
+		irot_input_x = id07 - icommon_product1;
+		irot_input_y = id34 - icommon_product2;
+
+		// Apply rotation for outputs 3 and 5. 
+		temp1 = xC3S5 * irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC5S3*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[3] = temp1 - temp2;
+
+		temp1 = xC5S3*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC3S5*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		ip[5] = temp1 + temp2;
+		
+		// Increment data pointer for next row. 
+		InputData += 8;
+		ip += 8;		// advance pointer to next row 
+	}
+
+	//	Performed DCT on rows, now transform the columns	
+	ip = InterData;
+	for ( loop=0; loop<8; loop++ )
+	{
+		// Pre calculate some common sums and differences. 
+		is07 = ip[0 * 8] + ip[7 * 8];
+		is12 = ip[1 * 8] + ip[2 * 8];
+		is34 = ip[3 * 8] + ip[4 * 8];
+		is56 = ip[5 * 8] + ip[6 * 8];
+
+		id07 = ip[0 * 8] - ip[7 * 8];
+		id12 = ip[1 * 8] - ip[2 * 8];
+		id34 = ip[3 * 8] - ip[4 * 8];
+		id56 = ip[5 * 8] - ip[6 * 8];
+	
+		is0734 = is07 + is34;
+		is1256 = is12 + is56;
+		
+		// Pre-Calculate some common product terms.
+		icommon_product1 = xC4S4*(is12 - is56); 
+		icommon_product2 = xC4S4*(id12 + id56);
+		DOROUND ( icommon_product1 )
+		DOROUND ( icommon_product2 )
+		icommon_product1 >>= 16;
+		icommon_product2 >>= 16;
+
+		temp1 = xC4S4*(is0734 + is1256);
+		temp2 = xC4S4*(is0734 - is1256);
+		DOROUND ( temp1 );
+		DOROUND ( temp2 );
+		temp1 >>= 16;
+		temp2 >>= 16;
+		op[0*8] = (INT16)temp1;
+		op[4*8] = (INT16)temp2;
+
+		// Define inputs to rotation for outputs 2 and 6 
+		irot_input_x = id12 - id56;
+		irot_input_y = is07 - is34;
+
+		// Apply rotation for outputs 2 and 6. 
+		temp1 = xC6S2*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[2*8] = (INT16)(temp1 + temp2);
+
+		temp1 = xC6S2*irot_input_y;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC2S6*irot_input_x;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[6*8] = (INT16)(temp1 -temp2);
+
+		// Define inputs to rotation for outputs 1 and 7 
+		irot_input_x = icommon_product1 + id07;
+		irot_input_y = -( id34 + icommon_product2 );
+
+		// Apply rotation for outputs 1 and 7. 
+		temp1 = xC1S7*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC7S1*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[1*8] = (INT16) (temp1 - temp2);
+
+		temp1 = xC7S1*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC1S7*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[7*8] = (INT16)(temp1 + temp2);
+
+		// Define inputs to rotation for outputs 3 and 5 
+		irot_input_x = id07 - icommon_product1;
+		irot_input_y = id34 - icommon_product2;
+
+		// Apply rotation for outputs 3 and 5. 
+		temp1 = xC3S5*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC5S3*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[3*8] = (INT16)(temp1 - temp2);
+
+		temp1 = xC5S3*irot_input_x;
+		DOROUND ( temp1 );
+		temp1 >>= 16;
+		temp2 = xC3S5*irot_input_y;
+		DOROUND ( temp2 );
+		temp2 >>= 16;
+		op[5*8] = (INT16) (temp1 + temp2);
+
+		// Increment data pointer for next column. 
+		ip ++;
+		op ++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : fdct_short_C
+ *
+ *  INPUTS        : INT16 *InputData  : 16-bit input data.
+ *
+ *  OUTPUTS       : INT16 *OutputData : 16-bit transform coefficients.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Performs an 8x8 2-D fast DCT.
+ *
+ *                  The function to up the precision of FDCT by number of bits 
+ *                  defined by FDCT_PRECISION_BITS.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void fdct_short_C ( INT16 *DCTDataBuffer, INT16 *DCT_codes )
+{
+
+    INT32 i;
+
+	// Increase precision on input to fdct
+	for ( i = 0; i < 64; i++ )
+		DCTDataBuffer[i] = DCTDataBuffer[i] << FDCT_PRECISION_BITS;
+
+	// Transform the error signal using the forward DCT to get set of transform coefficients
+	fdct_short_C_orig ( DCTDataBuffer, DCT_codes );
+
+	// Strip off the extra bits from the DCT output.
+	// This should ultimately be merged into the quantize process but there are also
+	// implications for DC prediction that would then need to be sorted
+	for ( i = 0; i < 64; i++ )
+	{	
+		// signed shift modified so behaves like "/" (truncates towards 0 for + and -)
+		if ( DCT_codes[i]  >= 0 )
+			DCT_codes[i] = (DCT_codes[i]) >> FDCT_PRECISION_BITS;
+		else
+			DCT_codes[i] = (DCT_codes[i] + FDCT_PRECISION_NEG_ADJ) >> FDCT_PRECISION_BITS;
+	}
+
+}
@@ -0,0 +1,921 @@
+/****************************************************************************
+*
+*   Module Title :     idctpart.c
+*
+*   Description  :     IDCT with multiple versions based on # of non 0 coeffs
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+
+#include "dct.h"
+#include "string.h"
+
+/****************************************************************************
+*  Macros
+****************************************************************************/
+#define int32 int
+#define int16 short
+#define IdctAdjustBeforeShift 8
+
+#define xC1S7 64277
+#define xC2S6 60547
+#define xC3S5 54491
+#define xC4S4 46341
+#define xC5S3 36410
+#define xC6S2 25080
+#define xC7S1 12785
+
+/****************************************************************************
+*  Module statics
+****************************************************************************/
+static const UINT32 dequant_index[64] = 
+{	
+    0,  1,  8,  16,  9,  2,  3, 10,
+	17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+#if 0   // AWG CODE NO LONGER USED IN CODEBASE.
+/*	Cos and Sin constant multipliers used during DCT and IDCT */
+const double C1S7 = (double)0.9807852804032;
+const double C2S6 = (double)0.9238795325113;
+const double C3S5 = (double)0.8314696123025;
+const double C4S4 = (double)0.7071067811865;
+const double C5S3 = (double)0.5555702330196;
+const double C6S2 = (double)0.3826834323651;
+const double C7S1 = (double)0.1950903220161;
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+
+// DCT lookup tables
+INT32 * C4S4_TablePtr;
+INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
+
+INT32 * C6S2_TablePtr;
+INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C2S6_TablePtr;
+INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C1S7_TablePtr;
+INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C7S1_TablePtr;
+INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C3S5_TablePtr;
+INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
+
+INT32 * C5S3_TablePtr;
+INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     InitDctTables
+ *
+ *  INPUTS        :     None.
+ *
+ *  OUTPUTS       :     None.
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     Initialises lookup tables used in IDCT.
+ *
+ *  SPECIAL NOTES :     NO LONGER USED IN CODEBASE. 
+ *
+ ****************************************************************************/
+void InitDctTables ( void )
+{
+    INT32 i;
+
+    C4S4_TablePtr = &C4S4_Table[COEFF_MAX*2];
+    for( i = -(2 * COEFF_MAX); i < (2 * COEFF_MAX); i++ )
+    {
+        if ( i < 0 )
+            C4S4_TablePtr[i] = (INT32)((i * C4S4) - 0.5);
+        else
+            C4S4_TablePtr[i] = (INT32)((i * C4S4) + 0.5);
+    }
+
+    C6S2_TablePtr = &C6S2_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C6S2_TablePtr[i] = (INT32)((i * C6S2) - 0.5);
+        else
+            C6S2_TablePtr[i] = (INT32)((i * C6S2) + 0.5);
+    }
+
+    C2S6_TablePtr = &C2S6_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C2S6_TablePtr[i] = (INT32)((i * C2S6) - 0.5);
+        else
+            C2S6_TablePtr[i] = (INT32)((i * C2S6) + 0.5);
+    }
+
+    C1S7_TablePtr = &C1S7_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C1S7_TablePtr[i] = (INT32)((i * C1S7) - 0.5);
+        else
+            C1S7_TablePtr[i] = (INT32)((i * C1S7) + 0.5);
+    }
+
+    C7S1_TablePtr = &C7S1_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C7S1_TablePtr[i] = (INT32)((i * C7S1) - 0.5);
+        else
+            C7S1_TablePtr[i] = (INT32)((i * C7S1) + 0.5);
+    }
+
+    C3S5_TablePtr = &C3S5_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C3S5_TablePtr[i] = (INT32)((i * C3S5) - 0.5);
+        else
+            C3S5_TablePtr[i] = (INT32)((i * C3S5) + 0.5);
+    }
+
+    C5S3_TablePtr = &C5S3_Table[COEFF_MAX];
+    for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
+    {
+        if ( i < 0 )
+            C5S3_TablePtr[i] = (INT32)((i * C5S3) - 0.5);
+        else
+            C5S3_TablePtr[i] = (INT32)((i * C5S3) + 0.5);
+    }
+}
+#endif
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : dequant_slow
+ *
+ *  INPUTS        : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
+ *                  INT16 *quantized_list : Pointer to quantized DCT coeffs
+ *                                          (in zig-zag order).
+ *
+ *  OUTPUTS       : INT32 *DCT_block      : Pointer to 8x8 de-quantized block
+ *                                          (in 2-D raster order).
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-quantizes an 8x8 block of quantized DCT coeffs.
+ *
+ *  SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering. 
+ *
+ ****************************************************************************/
+void dequant_slow ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
+{
+    // Loop fully expanded for maximum speed
+    DCT_block[dequant_index[0]]  = quantized_list[0]  * dequant_coeffs[0];
+    DCT_block[dequant_index[1]]  = quantized_list[1]  * dequant_coeffs[1];
+    DCT_block[dequant_index[2]]  = quantized_list[2]  * dequant_coeffs[2];
+    DCT_block[dequant_index[3]]  = quantized_list[3]  * dequant_coeffs[3];
+    DCT_block[dequant_index[4]]  = quantized_list[4]  * dequant_coeffs[4];
+    DCT_block[dequant_index[5]]  = quantized_list[5]  * dequant_coeffs[5];
+    DCT_block[dequant_index[6]]  = quantized_list[6]  * dequant_coeffs[6];
+    DCT_block[dequant_index[7]]  = quantized_list[7]  * dequant_coeffs[7];
+    DCT_block[dequant_index[8]]  = quantized_list[8]  * dequant_coeffs[8];
+    DCT_block[dequant_index[9]]  = quantized_list[9]  * dequant_coeffs[9];
+    DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
+    DCT_block[dequant_index[11]] = quantized_list[11] * dequant_coeffs[11];
+    DCT_block[dequant_index[12]] = quantized_list[12] * dequant_coeffs[12];
+    DCT_block[dequant_index[13]] = quantized_list[13] * dequant_coeffs[13];
+    DCT_block[dequant_index[14]] = quantized_list[14] * dequant_coeffs[14];
+    DCT_block[dequant_index[15]] = quantized_list[15] * dequant_coeffs[15];
+    DCT_block[dequant_index[16]] = quantized_list[16] * dequant_coeffs[16];
+    DCT_block[dequant_index[17]] = quantized_list[17] * dequant_coeffs[17];
+    DCT_block[dequant_index[18]] = quantized_list[18] * dequant_coeffs[18];
+    DCT_block[dequant_index[19]] = quantized_list[19] * dequant_coeffs[19];
+    DCT_block[dequant_index[20]] = quantized_list[20] * dequant_coeffs[20];
+    DCT_block[dequant_index[21]] = quantized_list[21] * dequant_coeffs[21];
+    DCT_block[dequant_index[22]] = quantized_list[22] * dequant_coeffs[22];
+    DCT_block[dequant_index[23]] = quantized_list[23] * dequant_coeffs[23];
+    DCT_block[dequant_index[24]] = quantized_list[24] * dequant_coeffs[24];
+    DCT_block[dequant_index[25]] = quantized_list[25] * dequant_coeffs[25];
+    DCT_block[dequant_index[26]] = quantized_list[26] * dequant_coeffs[26];
+    DCT_block[dequant_index[27]] = quantized_list[27] * dequant_coeffs[27];
+    DCT_block[dequant_index[28]] = quantized_list[28] * dequant_coeffs[28];
+    DCT_block[dequant_index[29]] = quantized_list[29] * dequant_coeffs[29];
+    DCT_block[dequant_index[30]] = quantized_list[30] * dequant_coeffs[30];
+    DCT_block[dequant_index[31]] = quantized_list[31] * dequant_coeffs[31];
+    DCT_block[dequant_index[32]] = quantized_list[32] * dequant_coeffs[32];
+    DCT_block[dequant_index[33]] = quantized_list[33] * dequant_coeffs[33];
+    DCT_block[dequant_index[34]] = quantized_list[34] * dequant_coeffs[34];
+    DCT_block[dequant_index[35]] = quantized_list[35] * dequant_coeffs[35];
+    DCT_block[dequant_index[36]] = quantized_list[36] * dequant_coeffs[36];
+    DCT_block[dequant_index[37]] = quantized_list[37] * dequant_coeffs[37];
+    DCT_block[dequant_index[38]] = quantized_list[38] * dequant_coeffs[38];
+    DCT_block[dequant_index[39]] = quantized_list[39] * dequant_coeffs[39];
+    DCT_block[dequant_index[40]] = quantized_list[40] * dequant_coeffs[40];
+    DCT_block[dequant_index[41]] = quantized_list[41] * dequant_coeffs[41];
+    DCT_block[dequant_index[42]] = quantized_list[42] * dequant_coeffs[42];
+    DCT_block[dequant_index[43]] = quantized_list[43] * dequant_coeffs[43];
+    DCT_block[dequant_index[44]] = quantized_list[44] * dequant_coeffs[44];
+    DCT_block[dequant_index[45]] = quantized_list[45] * dequant_coeffs[45];
+    DCT_block[dequant_index[46]] = quantized_list[46] * dequant_coeffs[46];
+    DCT_block[dequant_index[47]] = quantized_list[47] * dequant_coeffs[47];
+    DCT_block[dequant_index[48]] = quantized_list[48] * dequant_coeffs[48];
+    DCT_block[dequant_index[49]] = quantized_list[49] * dequant_coeffs[49];
+    DCT_block[dequant_index[50]] = quantized_list[50] * dequant_coeffs[50];
+    DCT_block[dequant_index[51]] = quantized_list[51] * dequant_coeffs[51];
+    DCT_block[dequant_index[52]] = quantized_list[52] * dequant_coeffs[52];
+    DCT_block[dequant_index[53]] = quantized_list[53] * dequant_coeffs[53];
+    DCT_block[dequant_index[54]] = quantized_list[54] * dequant_coeffs[54];
+    DCT_block[dequant_index[55]] = quantized_list[55] * dequant_coeffs[55];
+    DCT_block[dequant_index[56]] = quantized_list[56] * dequant_coeffs[56];
+    DCT_block[dequant_index[57]] = quantized_list[57] * dequant_coeffs[57];
+    DCT_block[dequant_index[58]] = quantized_list[58] * dequant_coeffs[58];
+    DCT_block[dequant_index[59]] = quantized_list[59] * dequant_coeffs[59];
+    DCT_block[dequant_index[60]] = quantized_list[60] * dequant_coeffs[60];
+    DCT_block[dequant_index[61]] = quantized_list[61] * dequant_coeffs[61];
+    DCT_block[dequant_index[62]] = quantized_list[62] * dequant_coeffs[62];
+    DCT_block[dequant_index[63]] = quantized_list[63] * dequant_coeffs[63];
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDctSlow
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 quantized DCT coefficients.
+ *                  int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse quantizes and inverse DCT's input 8x8 block
+ *                  to reproduce prediction error.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void IDctSlow ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
+{
+	int   loop;
+	int32 t1, t2;
+    int32 IntermediateData[64];
+	int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
+	int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
+	
+    int32 *ip = IntermediateData;
+	int16 *op = OutputData;
+	
+	// dequantize the input 
+	dequant_slow ( QuantMatrix, InputData, IntermediateData );
+
+	// Inverse DCT on the rows now
+	for ( loop=0; loop<8; loop++ )
+	{
+		// Check for non-zero values
+		if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] )
+		{
+			t1 = (int32)(xC1S7 * ip[1]);
+            t2 = (int32)(xC7S1 * ip[7]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_A = t1 + t2;
+
+			t1 = (int32)(xC7S1 * ip[1]);
+			t2 = (int32)(xC1S7 * ip[7]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_B = t1 - t2;
+
+			t1 = (int32)(xC3S5 * ip[3]);
+			t2 = (int32)(xC5S3 * ip[5]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_C = t1 + t2;
+
+			t1 = (int32)(xC3S5 * ip[5]);
+			t2 = (int32)(xC5S3 * ip[3]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_D = t1 - t2;
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+			
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * (ip[0] + ip[4]));
+            t1 >>= 16;
+			_E = t1;
+
+			t1 = (int32)(xC4S4 * (ip[0] - ip[4]));
+            t1 >>= 16;
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2]);
+			t2 = (int32)(xC6S2 * ip[6]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_G = t1 + t2;
+
+			t1 = (int32)(xC6S2 * ip[2]);
+			t2 = (int32)(xC2S6 * ip[6]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_H = t1 - t2;
+
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			// Final sequence of operations over-write original inputs.
+			ip[0] = (int16)((_Gd + _Cd )  >> 0);
+			ip[7] = (int16)((_Gd - _Cd )  >> 0);
+
+			ip[1] = (int16)((_Add + _Hd ) >> 0);
+			ip[2] = (int16)((_Add - _Hd ) >> 0);
+
+			ip[3] = (int16)((_Ed + _Dd )  >> 0);
+			ip[4] = (int16)((_Ed - _Dd )  >> 0);
+
+			ip[5] = (int16)((_Fd + _Bdd ) >> 0);
+			ip[6] = (int16)((_Fd - _Bdd ) >> 0);
+		}
+
+		ip += 8;			/* next row */
+	}
+
+	ip = IntermediateData;
+
+	for ( loop=0; loop<8; loop++ )
+	{
+		// Check for non-zero values (bitwise | faster than logical ||)
+		if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
+			 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] )
+		{
+
+			t1 = (int32)(xC1S7 * ip[1*8]);
+            t2 = (int32)(xC7S1 * ip[7*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_A = t1 + t2;
+
+			t1 = (int32)(xC7S1 * ip[1*8]);
+			t2 = (int32)(xC1S7 * ip[7*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_B = t1 - t2;
+
+			t1 = (int32)(xC3S5 * ip[3*8]);
+			t2 = (int32)(xC5S3 * ip[5*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_C = t1 + t2;
+
+			t1 = (int32)(xC3S5 * ip[5*8]);
+			t2 = (int32)(xC5S3 * ip[3*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_D = t1 - t2;
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * (ip[0*8] + ip[4*8]));
+            t1 >>= 16;
+			_E = t1;
+
+			t1 = (int32)(xC4S4 * (ip[0*8] - ip[4*8]));
+            t1 >>= 16;
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2*8]);
+			t2 = (int32)(xC6S2 * ip[6*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_G = t1 + t2;
+
+			t1 = (int32)(xC6S2 * ip[2*8]);
+			t2 = (int32)(xC2S6 * ip[6*8]);
+            t1 >>= 16;
+            t2 >>= 16;
+			_H = t1 - t2;
+			
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			_Gd += IdctAdjustBeforeShift;
+			_Add += IdctAdjustBeforeShift;
+			_Ed += IdctAdjustBeforeShift;
+			_Fd += IdctAdjustBeforeShift;
+
+			// Final sequence of operations over-write original inputs.
+			op[0*8] = (int16)((_Gd + _Cd )  >> 4);
+			op[7*8] = (int16)((_Gd - _Cd )  >> 4);
+
+			op[1*8] = (int16)((_Add + _Hd ) >> 4);
+			op[2*8] = (int16)((_Add - _Hd ) >> 4);
+
+			op[3*8] = (int16)((_Ed + _Dd )  >> 4);
+			op[4*8] = (int16)((_Ed - _Dd )  >> 4);
+
+			op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
+			op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
+		}
+		else
+		{
+			op[0*8] = 0;
+			op[7*8] = 0;
+			op[1*8] = 0;
+			op[2*8] = 0;
+			op[3*8] = 0;
+			op[4*8] = 0;
+			op[5*8] = 0;
+			op[6*8] = 0;
+		}
+
+		ip++;			// next column
+        op++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : dequant_slow10
+ *
+ *  INPUTS        : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
+ *                  INT16 *quantized_list : Pointer to quantized DCT coeffs
+ *                                          (in zig-zag order).
+ *
+ *  OUTPUTS       : INT32 *DCT_block      : Pointer to 8x8 de-quantized block
+ *                                          (in 2-D raster order).
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : De-quantizes an 8x8 block of quantized DCT coeffs that
+ *                  only has non-zero coefficients in the first 10, i.e.
+ *                  only DC & AC1-9 are non-zero, AC10-63 __MUST_BE_ zero.
+ *
+ *  SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering. 
+ *
+ ****************************************************************************/
+void dequant_slow10 ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
+{
+	memset(DCT_block,0, 128);
+
+	// Loop fully expanded for maximum speed
+    DCT_block[dequant_index[0]]  = quantized_list[0]  * dequant_coeffs[0];
+    DCT_block[dequant_index[1]]  = quantized_list[1]  * dequant_coeffs[1];
+    DCT_block[dequant_index[2]]  = quantized_list[2]  * dequant_coeffs[2];
+    DCT_block[dequant_index[3]]  = quantized_list[3]  * dequant_coeffs[3];
+    DCT_block[dequant_index[4]]  = quantized_list[4]  * dequant_coeffs[4];
+    DCT_block[dequant_index[5]]  = quantized_list[5]  * dequant_coeffs[5];
+    DCT_block[dequant_index[6]]  = quantized_list[6]  * dequant_coeffs[6];
+    DCT_block[dequant_index[7]]  = quantized_list[7]  * dequant_coeffs[7];
+    DCT_block[dequant_index[8]]  = quantized_list[8]  * dequant_coeffs[8];
+    DCT_block[dequant_index[9]]  = quantized_list[9]  * dequant_coeffs[9];
+    DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
+
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDctSlow10
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 quantized DCT coefficients.
+ *                  int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse quantizes and inverse DCT's input 8x8 block
+ *                  with non-zero coeffs only in DC & the first 9 AC coeffs.
+ *                  i.e. non-zeros ONLY in the following 10 positions:
+ *                  
+ *                          x  x  x  x  0  0  0  0
+ *                          x  x  x  0  0  0  0  0
+ *                          x  x  0  0  0  0  0  0
+ *                          x  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *                          0  0  0  0  0  0  0  0
+ *
+ *  SPECIAL NOTES : Output data is in raster, not zig-zag, order.
+ *
+ ****************************************************************************/
+void IDct10 ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
+{
+	int   loop;
+	int32 t1, t2;
+	int32 IntermediateData[64];
+	int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
+	int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
+
+    int32 *ip = IntermediateData;
+	int16 *op = OutputData;
+	
+	// dequantize the input 
+	dequant_slow10 ( QuantMatrix, InputData, IntermediateData );
+
+	// Inverse DCT on the rows now
+	for ( loop=0; loop<4; loop++ )
+	{
+		// Check for non-zero values
+		if ( ip[0] | ip[1] | ip[2] | ip[3] )
+		{
+			t1 = (int32)(xC1S7 * ip[1]);
+            t1 >>= 16;
+			_A = t1; 
+
+			t1 = (int32)(xC7S1 * ip[1]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC3S5 * ip[3]);
+            t1 >>= 16;
+			_C = t1; 
+
+			t2 = (int32)(xC5S3 * ip[3]);
+            t2 >>= 16;
+			_D = -t2; 
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+			
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * ip[0] );
+            t1 >>= 16;
+			_E = t1;
+
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2]);
+            t1 >>= 16;
+			_G = t1; 
+
+			t1 = (int32)(xC6S2 * ip[2]);
+            t1 >>= 16;
+			_H = t1 ;
+			
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			// Final sequence of operations over-write original inputs.
+			ip[0] = (int16)((_Gd + _Cd )   >> 0);
+			ip[7] = (int16)((_Gd - _Cd )   >> 0);
+
+			ip[1] = (int16)((_Add + _Hd )  >> 0);
+			ip[2] = (int16)((_Add - _Hd )  >> 0);
+
+			ip[3] = (int16)((_Ed + _Dd )   >> 0);
+			ip[4] = (int16)((_Ed - _Dd )   >> 0);
+
+			ip[5] = (int16)((_Fd + _Bdd )  >> 0);
+			ip[6] = (int16)((_Fd - _Bdd )  >> 0);
+		}
+
+		ip += 8;			/* next row */
+	}
+
+	ip = IntermediateData;
+
+	for ( loop=0; loop<8; loop++ )
+	{	
+		// Check for non-zero values (bitwise or faster than ||)
+		if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] )
+		{
+			t1 = (int32)(xC1S7 * ip[1*8]);
+            t1 >>= 16;
+			_A = t1 ;
+
+			t1 = (int32)(xC7S1 * ip[1*8]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC3S5 * ip[3*8]);
+            t1 >>= 16;
+			_C = t1 ;
+
+			t2 = (int32)(xC5S3 * ip[3*8]);
+            t2 >>= 16;
+			_D = - t2;
+
+			t1 = (int32)(xC4S4 * (_A - _C));
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * (_B - _D));
+            t1 >>= 16;
+			_Bd = t1;
+
+			_Cd = _A + _C;
+			_Dd = _B + _D;
+
+			t1 = (int32)(xC4S4 * ip[0*8]);
+            t1 >>= 16;
+			_E = t1;
+			_F = t1;
+			
+			t1 = (int32)(xC2S6 * ip[2*8]);
+            t1 >>= 16;
+			_G = t1;
+
+			t1 = (int32)(xC6S2 * ip[2*8]);
+            t1 >>= 16;
+			_H = t1;
+			
+			_Ed = _E - _G;
+			_Gd = _E + _G;
+
+			_Add = _F + _Ad;
+			_Bdd = _Bd - _H;
+			
+			_Fd = _F - _Ad;
+			_Hd = _Bd + _H;
+	
+			_Gd += IdctAdjustBeforeShift;
+			_Add += IdctAdjustBeforeShift;
+			_Ed += IdctAdjustBeforeShift;
+			_Fd += IdctAdjustBeforeShift;
+
+			// Final sequence of operations over-write original inputs.
+			op[0*8] = (int16)((_Gd + _Cd )  >> 4);
+			op[7*8] = (int16)((_Gd - _Cd )  >> 4);
+
+			op[1*8] = (int16)((_Add + _Hd ) >> 4);
+			op[2*8] = (int16)((_Add - _Hd ) >> 4);
+
+			op[3*8] = (int16)((_Ed + _Dd )  >> 4);
+			op[4*8] = (int16)((_Ed - _Dd )  >> 4);
+
+			op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
+			op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
+		}
+		else
+		{
+			op[0*8] = 0;
+			op[7*8] = 0;
+			op[1*8] = 0;
+			op[2*8] = 0;
+			op[3*8] = 0;
+			op[4*8] = 0;
+			op[5*8] = 0;
+			op[6*8] = 0;
+		}
+
+		ip++;	// next column
+        op++;
+	}
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDct1
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 quantized DCT coefficients.
+ *                  int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse DCT's input 8x8 block with only one non-zero
+ *                  coeff in the DC position:
+ *                  
+ *                          x   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *
+ *  SPECIAL NOTES : Output data is in raster, not zig-zag, order.
+ *
+ ****************************************************************************/
+void IDct1 ( int16 *InputData, int16 *QuantMatrix, INT16 *OutputData )
+{
+    INT32 loop;
+	INT16 OutD;
+	
+	OutD = (INT16)((INT32)(InputData[0]*QuantMatrix[0]+15)>>5);
+
+	for ( loop=0; loop<64; loop++ )
+		OutputData[loop] = OutD;
+}
+
+
+#if 0
+/****************************************************************************
+ * 
+ *  ROUTINE       : IDct4
+ *
+ *  INPUTS        : int16 *InputData   : Pointer to 8x8 DCT coefficients.
+ *
+ *  OUTPUTS       : int16 *OutputData  : Pointer to 8x8 block to hold output.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Inverse DCT's input 8x8 block with at most four non-zero
+ *                  coeffs in the following positions:
+ *                  
+ *                          x   x   0  0  0  0  0  0
+ *                          x   x   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *                          0   0   0  0  0  0  0  0
+ *
+ *  SPECIAL NOTES : CURRENTLY NOT USED IN CODEBASE.
+ *
+ ****************************************************************************/
+void IDct4 ( int16 *InputData, int16 *OutputData )
+{
+	int32 t1;
+	int loop;
+	int32 _Add, _Fd;
+	int32 _A, _B, _Ad, _Bd, _Cd, _Dd, _E;
+
+    int16 *ip = InputData;
+	int16 *op = OutputData;
+
+	// Unzigzag the coefficents
+	ip[8] = ip[2];
+	ip[9] = ip[4];
+	ip[2] = 0;
+	ip[5] = 0;
+
+	// Inverse DCT on the rows now
+	for ( loop = 0; loop < 2; loop++)
+	{
+		// Check for non-zero values
+		if ( ip[0] | ip[1] )
+		{
+			t1 = (int32)(xC1S7 * ip[1]);
+            t1 >>= 16;
+			_A = t1; 
+
+			t1 = (int32)(xC7S1 * ip[1]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC4S4 * _A );
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * _B );
+            t1 >>= 16;
+			_Bd = t1;
+
+			_Cd = _A ;
+			_Dd = _B ;
+
+			t1 = (int32)(xC4S4 * ip[0] );
+            t1 >>= 16;
+			_E = t1;
+
+			_Add = _E + _Ad;
+			
+			_Fd = _E - _Ad;
+	
+			// Final sequence of operations over-write original inputs.
+			ip[0] = (int16)((_E + _Cd )   >> 0);
+			ip[7] = (int16)((_E - _Cd )   >> 0);
+
+			ip[1] = (int16)((_Add + _Bd ) >> 0);
+			ip[2] = (int16)((_Add - _Bd ) >> 0);
+
+			ip[3] = (int16)((_E + _Dd )   >> 0);
+			ip[4] = (int16)((_E - _Dd )   >> 0);
+
+			ip[5] = (int16)((_Fd + _Bd )  >> 0);
+			ip[6] = (int16)((_Fd - _Bd )  >> 0);
+		}
+
+		ip += 8;			/* next row */
+	}
+
+	ip = InputData;
+
+	for ( loop=0; loop<8; loop++ )
+	{	
+		// Check for non-zero values (bitwise or faster than ||)
+		if ( ip[0 * 8] | ip[1 * 8] )
+		{
+
+			t1 = (int32)(xC1S7 * ip[1*8]);
+            t1 >>= 16;
+			_A = t1 ;
+
+			t1 = (int32)(xC7S1 * ip[1*8]);
+            t1 >>= 16;
+			_B = t1 ;
+
+			t1 = (int32)(xC4S4 * _A );
+            t1 >>= 16;
+			_Ad = t1;
+
+			t1 = (int32)(xC4S4 * _B );
+            t1 >>= 16;
+			_Bd = t1;
+			
+			_Cd = _A ;
+			_Dd = _B ;
+
+			t1 = (int32)(xC4S4 * ip[0*8]);
+            t1 >>= 16;
+			_E = t1;
+
+			_Add = _E + _Ad;
+			
+			_Fd = _E - _Ad;
+	
+			_Add += IdctAdjustBeforeShift;
+			_E   += IdctAdjustBeforeShift;
+			_Fd  += IdctAdjustBeforeShift;
+
+			// Final sequence of operations over-write original inputs.
+			op[0*8] = (int16)((_E + _Cd )   >> 4);
+			op[7*8] = (int16)((_E - _Cd )   >> 4);
+
+			op[1*8] = (int16)((_Add + _Bd ) >> 4);
+			op[2*8] = (int16)((_Add - _Bd ) >> 4);
+
+			op[3*8] = (int16)((_E + _Dd )   >> 4);
+			op[4*8] = (int16)((_E - _Dd )   >> 4);
+
+			op[5*8] = (int16)((_Fd + _Bd )   >> 4);
+			op[6*8] = (int16)((_Fd - _Bd )   >> 4);
+		}
+		else
+		{
+			op[0*8] = 0;
+			op[7*8] = 0;
+			op[1*8] = 0;
+			op[2*8] = 0;
+			op[3*8] = 0;
+			op[4*8] = 0;
+			op[5*8] = 0;
+			op[6*8] = 0;
+		}
+
+		ip++;	// next column
+        op++;
+	}
+}
+#endif
@@ -0,0 +1,243 @@
+/****************************************************************************
+*
+*   Module Title :     Reconstruct.c
+*
+*   Description  :     Block reconstruction functions.
+*
+****************************************************************************/
+#define STRICT              // Strict type checking 
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "reconstruct.h"
+#include "codec_common.h"
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : SatUnsigned8
+ *
+ *  INPUTS        : INT16 *DataBlock      : Pointer to 8x8 input block.
+ *                  UINT32 ResultLineStep : Stride of output block.
+ *                  UINT32 DataLineStep   : Stride of input block.
+ *
+ *  OUTPUTS       : UINT8 *ResultPtr      : Pointer to 8x8 output block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Saturates the input data to 8 bits unsigned and stores
+ *                  in the output buffer.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void SatUnsigned8 ( UINT8 *ResultPtr, INT16 *DataBlock, UINT32 ResultLineStep, UINT32 DataLineStep )
+{
+    INT32 i;
+       
+     // Partly expanded loop
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        ResultPtr[0] = (char) LIMIT(DataBlock[0]);
+        ResultPtr[1] = (char) LIMIT(DataBlock[1]);
+        ResultPtr[2] = (char) LIMIT(DataBlock[2]);
+        ResultPtr[3] = (char) LIMIT(DataBlock[3]);
+        ResultPtr[4] = (char) LIMIT(DataBlock[4]);
+        ResultPtr[5] = (char) LIMIT(DataBlock[5]);
+        ResultPtr[6] = (char) LIMIT(DataBlock[6]);
+        ResultPtr[7] = (char) LIMIT(DataBlock[7]);
+
+        DataBlock += DataLineStep;
+        ResultPtr += ResultLineStep;
+    }
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ScalarReconIntra
+ *
+ *  INPUTS        : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ *                  UINT16 *ChangePtr    : Pointer to 8x8 intra prediction block.
+ *                  UINT32 LineStep      : Stride of reconstruction block.
+ *
+ *  OUTPUTS       : UINT8 *ReconPtr      : Pointer to 8x8 block to hold reconstructed block.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : Reconstructs an intra block.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep )
+{
+    UINT32 i;
+	INT16 *TmpDataPtr = TmpDataBuffer;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+   	{	
+        TmpDataPtr[0] = (INT16) ( ChangePtr[0] + 128 );
+        TmpDataPtr[1] = (INT16) ( ChangePtr[1] + 128 );
+        TmpDataPtr[2] = (INT16) ( ChangePtr[2] + 128 );
+        TmpDataPtr[3] = (INT16) ( ChangePtr[3] + 128 );
+        TmpDataPtr[4] = (INT16) ( ChangePtr[4] + 128 );
+        TmpDataPtr[5] = (INT16) ( ChangePtr[5] + 128 );
+        TmpDataPtr[6] = (INT16) ( ChangePtr[6] + 128 );
+        TmpDataPtr[7] = (INT16) ( ChangePtr[7] + 128 );
+
+        TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+        ChangePtr  += BLOCK_HEIGHT_WIDTH;
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ScalarReconInter
+ *
+ *  INPUTS        : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ *                  UINT8 *RefPtr        : Pointer to 8x8 reference block.
+ *                  INT16 *ChangePtr     : Pointer to 8x8 inter prediction error block.
+ *                  UINT32 LineStep      : Stride of reference and output blocks.
+ *
+ *  OUTPUTS       : UINT8 *ReconPtr      : Pointer to 8x8 block to hold reconstructed block.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : Reconstructs an inter-coded block by adding a prediction
+ *                  error to a reference block in the previous frame 
+ *                  reconstruction buffer.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep )
+{
+    UINT32 i;
+	INT16 *TmpDataPtr = TmpDataBuffer;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+   	{	
+		// Form each row
+   	    TmpDataPtr[0] = (INT16)(RefPtr[0] + ChangePtr[0]);
+   	    TmpDataPtr[1] = (INT16)(RefPtr[1] + ChangePtr[1]);
+   	    TmpDataPtr[2] = (INT16)(RefPtr[2] + ChangePtr[2]);
+   	    TmpDataPtr[3] = (INT16)(RefPtr[3] + ChangePtr[3]);
+   	    TmpDataPtr[4] = (INT16)(RefPtr[4] + ChangePtr[4]);
+   	    TmpDataPtr[5] = (INT16)(RefPtr[5] + ChangePtr[5]);
+   	    TmpDataPtr[6] = (INT16)(RefPtr[6] + ChangePtr[6]);
+   	    TmpDataPtr[7] = (INT16)(RefPtr[7] + ChangePtr[7]);
+
+        // Next row of Block
+		ChangePtr  += BLOCK_HEIGHT_WIDTH;
+        TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+        RefPtr     += LineStep; 
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ScalarReconInterHalfPixel2
+ *
+ *  INPUTS        : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
+ *                  UINT8 *RefPtr1       : Pointer to first 8x8 reference block.
+ *                  UINT8 *RefPtr2       : Pointer to second 8x8 reference block.
+ *                  INT16 *ChangePtr     : Pointer to 8x8 inter prediction error block.
+ *                  UINT32 LineStep      : Stride of reference blocks.
+ *
+ *  OUTPUTS       : UINT8 *ReconPtr      : Pointer to 8x8 block to hold reconstructed block.
+ *
+ *  RETURNS       : None
+ *
+ *  FUNCTION      : Reconstructs an inter-coded block by adding a prediction
+ *                  error to a reference block computed by averaging the two
+ *                  specified reference blocks. The two reference blocks are
+ *                  those that bracket the 1/2-pixel accuracy motion vector.
+ *
+ *  SPECIAL NOTES : None. 
+ *
+ ****************************************************************************/
+void ScalarReconInterHalfPixel2 
+(
+    INT16 *TmpDataBuffer,
+    UINT8 *ReconPtr, 
+    UINT8 *RefPtr1,
+    UINT8 *RefPtr2, 
+    INT16 *ChangePtr,
+    UINT32 LineStep 
+)
+{
+    UINT32  i;
+	INT16 *TmpDataPtr = TmpDataBuffer;
+
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+   	{	
+		// Form each row
+        TmpDataPtr[0] = (INT16)( (((INT32)RefPtr1[0] + (INT32)RefPtr2[0]) >> 1) + ChangePtr[0] );
+   	    TmpDataPtr[1] = (INT16)( (((INT32)RefPtr1[1] + (INT32)RefPtr2[1]) >> 1) + ChangePtr[1] );
+   	    TmpDataPtr[2] = (INT16)( (((INT32)RefPtr1[2] + (INT32)RefPtr2[2]) >> 1) + ChangePtr[2] );
+   	    TmpDataPtr[3] = (INT16)( (((INT32)RefPtr1[3] + (INT32)RefPtr2[3]) >> 1) + ChangePtr[3] );
+   	    TmpDataPtr[4] = (INT16)( (((INT32)RefPtr1[4] + (INT32)RefPtr2[4]) >> 1) + ChangePtr[4] );
+   	    TmpDataPtr[5] = (INT16)( (((INT32)RefPtr1[5] + (INT32)RefPtr2[5]) >> 1) + ChangePtr[5] );
+   	    TmpDataPtr[6] = (INT16)( (((INT32)RefPtr1[6] + (INT32)RefPtr2[6]) >> 1) + ChangePtr[6] );
+   	    TmpDataPtr[7] = (INT16)( (((INT32)RefPtr1[7] + (INT32)RefPtr2[7]) >> 1) + ChangePtr[7] );
+
+        // Next row of Block
+		ChangePtr  += BLOCK_HEIGHT_WIDTH;
+        TmpDataPtr += BLOCK_HEIGHT_WIDTH;
+        RefPtr1    += LineStep; 
+        RefPtr2    += LineStep; 
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       : ReconBlock_C
+ *  
+ *  INPUTS        : INT16 *SrcBlock    : Pointer to 8x8 prediction error.
+ *					INT16 *ReconRefPtr : Pointer to 8x8 block prediction.
+ *                  UINT32 LineStep    : Stride of output block.
+ *
+ *  OUTPUTS       : UINT8 *DestBlock   : Pointer to 8x8 reconstructed block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Reconstrut a block by adding the prediction error
+ *                  block to the source block and clipping values.
+ *
+ *  SPECIAL NOTES : None.
+ *
+ ****************************************************************************/
+void ReconBlock_C ( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep )
+{
+    UINT32 i;
+    INT16 *SrcBlockPtr = SrcBlock;
+
+    // For each block row
+    for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
+    {
+        SrcBlock[0] = (INT16)(SrcBlock[0] + ReconRefPtr[0]);
+        SrcBlock[1] = (INT16)(SrcBlock[1] + ReconRefPtr[1]);
+        SrcBlock[2] = (INT16)(SrcBlock[2] + ReconRefPtr[2]);
+        SrcBlock[3] = (INT16)(SrcBlock[3] + ReconRefPtr[3]);
+        SrcBlock[4] = (INT16)(SrcBlock[4] + ReconRefPtr[4]);
+        SrcBlock[5] = (INT16)(SrcBlock[5] + ReconRefPtr[5]);
+        SrcBlock[6] = (INT16)(SrcBlock[6] + ReconRefPtr[6]);
+        SrcBlock[7] = (INT16)(SrcBlock[7] + ReconRefPtr[7]);
+        
+        // Next row...
+        SrcBlock    += BLOCK_HEIGHT_WIDTH;
+        ReconRefPtr += BLOCK_HEIGHT_WIDTH;
+    }
+
+    // Saturate the output to unsigned 8 bit values in recon buffer
+    SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
+}
@@ -0,0 +1,100 @@
+/****************************************************************************
+*
+*   Module Title :     SystemDependant.c
+*
+*   Description  :     Miscellaneous system dependant functions.
+*
+****************************************************************************/
+
+/****************************************************************************
+*  Header Files
+****************************************************************************/
+#include "codec_common.h"
+#include "vputil_if.h"
+
+/****************************************************************************
+*  Exports
+****************************************************************************/
+// Scalar (no mmx) reconstruction functions
+extern void ClearSysState_C ( void );
+extern void IDctSlow ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
+extern void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
+extern void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
+extern void ScalarReconInterHalfPixel2 ( INT16 *TmpDataBuffer, UINT8 *ReconPtr,UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
+extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
+extern void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
+extern void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+extern void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
+extern void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride );
+extern void Copy12x12_C ( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
+extern void fdct_short_C ( INT16 *InputData, INT16 *OutputData );
+extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
+extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
+extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     fillidctconstants
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     void
+ *
+ *  FUNCTION      :     STUB FUNCTION.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void fillidctconstants ( void )
+{
+}
+
+/****************************************************************************
+ * 
+ *  ROUTINE       :     MachineSpecificConfig
+ *
+ *  INPUTS        :     None
+ *
+ *  OUTPUTS       :     None
+ *
+ *  RETURNS       :     None
+ *
+ *  FUNCTION      :     Checks for machine specifc features such as MMX support 
+ *                      sets approipriate flags and function pointers.
+ *
+ *  SPECIAL NOTES :     None. 
+ *
+ ****************************************************************************/
+void UtilMachineSpecificConfig ( void )
+{
+		int i;
+		for(i=0;i<=64;i++)
+		{
+			if(i<=1)idctc[i]=IDct1;
+			else if(i<=10)idctc[i]=IDct10;
+			else idctc[i]=IDctSlow;
+		}
+		fdct_short=fdct_short_C ;
+		for(i=0;i<=64;i++)
+		{
+			if(i<=1)idct[i]=IDct1;
+			else if(i<=10)idct[i]=IDct10;
+			else idct[i]=IDctSlow;
+		}
+		ClearSysState = ClearSysState_C;
+		ReconIntra = ScalarReconIntra;
+		ReconInter = ScalarReconInter;
+		ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
+		AverageBlock = AverageBlock_C;
+		UnpackBlock = UnpackBlock_C;
+		ReconBlock = ReconBlock_C;
+		SubtractBlock = SubtractBlock_C;
+		CopyBlock = CopyBlock_C;
+        Copy12x12 = Copy12x12_C;
+        FilterBlockBil_8 = FilterBlockBil_8_C;
+        FilterBlock=FilterBlock_C;
+}