Initial community commit

This commit is contained in:
Jef
2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit 20d28e80a5
16810 changed files with 4640254 additions and 2 deletions

View File

@@ -0,0 +1,312 @@
/****************************************************************************
*
* Module Title : fdct.c
*
* Description : Fast 8x8 DCT C-Implementation.
*
****************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "dct.h"
/****************************************************************************
* Macros
****************************************************************************/
#define SIGNBITDUPPED(X) ( (signed )((X & 0x80000000)) >> 31 )
#define DOROUND(X) X = ( (SIGNBITDUPPED(X) & (0xffff)) + X );
/****************************************************************************
* Module statics
****************************************************************************/
static INT32 xC1S7 = 64277;
static INT32 xC2S6 = 60547;
static INT32 xC3S5 = 54491;
static INT32 xC4S4 = 46341;
static INT32 xC5S3 = 36410;
static INT32 xC6S2 = 25080;
static INT32 xC7S1 = 12785;
/****************************************************************************
*
* ROUTINE : fdct_short_C_orig
*
* INPUTS : INT16 *InputData : 16-bit input data.
*
* OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
*
* RETURNS : void
*
* FUNCTION : Performs an 8x8 2-D fast DCT.
*
* The algorithm used is derived from the flowgraph for
* the Vetterli and Ligtenberg fast 1-D dct given in the
* JPEG reference book by Pennebaker and Mitchell.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void fdct_short_C_orig ( INT16 *InputData, INT16 *OutputData )
{
int loop;
INT32 is07, is12, is34, is56;
INT32 is0734, is1256;
INT32 id07, id12, id34, id56;
INT32 irot_input_x, irot_input_y;
INT32 icommon_product1; // Re-used product (c4s4 * (s12 - s56)).
INT32 icommon_product2; // Re-used product (c4s4 * (d12 + d56)).
INT32 temp1, temp2; // intermediate variable for computation
INT32 InterData[64];
INT32 *ip = InterData;
INT16 *op = OutputData;
for ( loop=0; loop<8; loop++ )
{
// Pre calculate some common sums and differences.
is07 = InputData[0] + InputData[7];
is12 = InputData[1] + InputData[2];
is34 = InputData[3] + InputData[4];
is56 = InputData[5] + InputData[6];
id07 = InputData[0] - InputData[7];
id12 = InputData[1] - InputData[2];
id34 = InputData[3] - InputData[4];
id56 = InputData[5] - InputData[6];
is0734 = is07 + is34;
is1256 = is12 + is56;
// Pre-Calculate some common product terms.
icommon_product1 = xC4S4*(is12 - is56);
DOROUND ( icommon_product1 )
icommon_product1 >>= 16;
icommon_product2 = xC4S4*(id12 + id56);
DOROUND ( icommon_product2 )
icommon_product2 >>= 16;
ip[0] = (xC4S4*(is0734 + is1256));
DOROUND ( ip[0] );
ip[0] >>= 16;
ip[4] = (xC4S4*(is0734 - is1256));
DOROUND ( ip[4] );
ip[4] >>= 16;
// Define inputs to rotation for outputs 2 and 6
irot_input_x = id12 - id56;
irot_input_y = is07 - is34;
// Apply rotation for outputs 2 and 6.
temp1 = xC6S2*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC2S6*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
ip[2] = temp1 + temp2;
temp1 = xC6S2*irot_input_y;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC2S6*irot_input_x;
DOROUND ( temp2 );
temp2 >>= 16;
ip[6] = temp1 -temp2;
// Define inputs to rotation for outputs 1 and 7
irot_input_x = icommon_product1 + id07;
irot_input_y = -( id34 + icommon_product2 );
// Apply rotation for outputs 1 and 7.
temp1 = xC1S7*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC7S1*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
ip[1] = temp1 - temp2;
temp1 = xC7S1*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC1S7*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
ip[7] = temp1 + temp2;
// Define inputs to rotation for outputs 3 and 5
irot_input_x = id07 - icommon_product1;
irot_input_y = id34 - icommon_product2;
// Apply rotation for outputs 3 and 5.
temp1 = xC3S5 * irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC5S3*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
ip[3] = temp1 - temp2;
temp1 = xC5S3*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC3S5*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
ip[5] = temp1 + temp2;
// Increment data pointer for next row.
InputData += 8;
ip += 8; // advance pointer to next row
}
// Performed DCT on rows, now transform the columns
ip = InterData;
for ( loop=0; loop<8; loop++ )
{
// Pre calculate some common sums and differences.
is07 = ip[0 * 8] + ip[7 * 8];
is12 = ip[1 * 8] + ip[2 * 8];
is34 = ip[3 * 8] + ip[4 * 8];
is56 = ip[5 * 8] + ip[6 * 8];
id07 = ip[0 * 8] - ip[7 * 8];
id12 = ip[1 * 8] - ip[2 * 8];
id34 = ip[3 * 8] - ip[4 * 8];
id56 = ip[5 * 8] - ip[6 * 8];
is0734 = is07 + is34;
is1256 = is12 + is56;
// Pre-Calculate some common product terms.
icommon_product1 = xC4S4*(is12 - is56);
icommon_product2 = xC4S4*(id12 + id56);
DOROUND ( icommon_product1 )
DOROUND ( icommon_product2 )
icommon_product1 >>= 16;
icommon_product2 >>= 16;
temp1 = xC4S4*(is0734 + is1256);
temp2 = xC4S4*(is0734 - is1256);
DOROUND ( temp1 );
DOROUND ( temp2 );
temp1 >>= 16;
temp2 >>= 16;
op[0*8] = (INT16)temp1;
op[4*8] = (INT16)temp2;
// Define inputs to rotation for outputs 2 and 6
irot_input_x = id12 - id56;
irot_input_y = is07 - is34;
// Apply rotation for outputs 2 and 6.
temp1 = xC6S2*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC2S6*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
op[2*8] = (INT16)(temp1 + temp2);
temp1 = xC6S2*irot_input_y;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC2S6*irot_input_x;
DOROUND ( temp2 );
temp2 >>= 16;
op[6*8] = (INT16)(temp1 -temp2);
// Define inputs to rotation for outputs 1 and 7
irot_input_x = icommon_product1 + id07;
irot_input_y = -( id34 + icommon_product2 );
// Apply rotation for outputs 1 and 7.
temp1 = xC1S7*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC7S1*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
op[1*8] = (INT16) (temp1 - temp2);
temp1 = xC7S1*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC1S7*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
op[7*8] = (INT16)(temp1 + temp2);
// Define inputs to rotation for outputs 3 and 5
irot_input_x = id07 - icommon_product1;
irot_input_y = id34 - icommon_product2;
// Apply rotation for outputs 3 and 5.
temp1 = xC3S5*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC5S3*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
op[3*8] = (INT16)(temp1 - temp2);
temp1 = xC5S3*irot_input_x;
DOROUND ( temp1 );
temp1 >>= 16;
temp2 = xC3S5*irot_input_y;
DOROUND ( temp2 );
temp2 >>= 16;
op[5*8] = (INT16) (temp1 + temp2);
// Increment data pointer for next column.
ip ++;
op ++;
}
}
/****************************************************************************
*
* ROUTINE : fdct_short_C
*
* INPUTS : INT16 *InputData : 16-bit input data.
*
* OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
*
* RETURNS : void
*
* FUNCTION : Performs an 8x8 2-D fast DCT.
*
* The function to up the precision of FDCT by number of bits
* defined by FDCT_PRECISION_BITS.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void fdct_short_C ( INT16 *DCTDataBuffer, INT16 *DCT_codes )
{
INT32 i;
// Increase precision on input to fdct
for ( i = 0; i < 64; i++ )
DCTDataBuffer[i] = DCTDataBuffer[i] << FDCT_PRECISION_BITS;
// Transform the error signal using the forward DCT to get set of transform coefficients
fdct_short_C_orig ( DCTDataBuffer, DCT_codes );
// Strip off the extra bits from the DCT output.
// This should ultimately be merged into the quantize process but there are also
// implications for DC prediction that would then need to be sorted
for ( i = 0; i < 64; i++ )
{
// signed shift modified so behaves like "/" (truncates towards 0 for + and -)
if ( DCT_codes[i] >= 0 )
DCT_codes[i] = (DCT_codes[i]) >> FDCT_PRECISION_BITS;
else
DCT_codes[i] = (DCT_codes[i] + FDCT_PRECISION_NEG_ADJ) >> FDCT_PRECISION_BITS;
}
}

View File

@@ -0,0 +1,921 @@
/****************************************************************************
*
* Module Title : idctpart.c
*
* Description : IDCT with multiple versions based on # of non 0 coeffs
*
****************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "dct.h"
#include "string.h"
/****************************************************************************
* Macros
****************************************************************************/
#define int32 int
#define int16 short
#define IdctAdjustBeforeShift 8
#define xC1S7 64277
#define xC2S6 60547
#define xC3S5 54491
#define xC4S4 46341
#define xC5S3 36410
#define xC6S2 25080
#define xC7S1 12785
/****************************************************************************
* Module statics
****************************************************************************/
static const UINT32 dequant_index[64] =
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};
#if 0 // AWG CODE NO LONGER USED IN CODEBASE.
/* Cos and Sin constant multipliers used during DCT and IDCT */
const double C1S7 = (double)0.9807852804032;
const double C2S6 = (double)0.9238795325113;
const double C3S5 = (double)0.8314696123025;
const double C4S4 = (double)0.7071067811865;
const double C5S3 = (double)0.5555702330196;
const double C6S2 = (double)0.3826834323651;
const double C7S1 = (double)0.1950903220161;
/****************************************************************************
* Exports
****************************************************************************/
// DCT lookup tables
INT32 * C4S4_TablePtr;
INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
INT32 * C6S2_TablePtr;
INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
INT32 * C2S6_TablePtr;
INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
INT32 * C1S7_TablePtr;
INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
INT32 * C7S1_TablePtr;
INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
INT32 * C3S5_TablePtr;
INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
INT32 * C5S3_TablePtr;
INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
/****************************************************************************
*
* ROUTINE : InitDctTables
*
* INPUTS : None.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Initialises lookup tables used in IDCT.
*
* SPECIAL NOTES : NO LONGER USED IN CODEBASE.
*
****************************************************************************/
void InitDctTables ( void )
{
INT32 i;
C4S4_TablePtr = &C4S4_Table[COEFF_MAX*2];
for( i = -(2 * COEFF_MAX); i < (2 * COEFF_MAX); i++ )
{
if ( i < 0 )
C4S4_TablePtr[i] = (INT32)((i * C4S4) - 0.5);
else
C4S4_TablePtr[i] = (INT32)((i * C4S4) + 0.5);
}
C6S2_TablePtr = &C6S2_Table[COEFF_MAX];
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
{
if ( i < 0 )
C6S2_TablePtr[i] = (INT32)((i * C6S2) - 0.5);
else
C6S2_TablePtr[i] = (INT32)((i * C6S2) + 0.5);
}
C2S6_TablePtr = &C2S6_Table[COEFF_MAX];
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
{
if ( i < 0 )
C2S6_TablePtr[i] = (INT32)((i * C2S6) - 0.5);
else
C2S6_TablePtr[i] = (INT32)((i * C2S6) + 0.5);
}
C1S7_TablePtr = &C1S7_Table[COEFF_MAX];
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
{
if ( i < 0 )
C1S7_TablePtr[i] = (INT32)((i * C1S7) - 0.5);
else
C1S7_TablePtr[i] = (INT32)((i * C1S7) + 0.5);
}
C7S1_TablePtr = &C7S1_Table[COEFF_MAX];
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
{
if ( i < 0 )
C7S1_TablePtr[i] = (INT32)((i * C7S1) - 0.5);
else
C7S1_TablePtr[i] = (INT32)((i * C7S1) + 0.5);
}
C3S5_TablePtr = &C3S5_Table[COEFF_MAX];
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
{
if ( i < 0 )
C3S5_TablePtr[i] = (INT32)((i * C3S5) - 0.5);
else
C3S5_TablePtr[i] = (INT32)((i * C3S5) + 0.5);
}
C5S3_TablePtr = &C5S3_Table[COEFF_MAX];
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
{
if ( i < 0 )
C5S3_TablePtr[i] = (INT32)((i * C5S3) - 0.5);
else
C5S3_TablePtr[i] = (INT32)((i * C5S3) + 0.5);
}
}
#endif
/****************************************************************************
*
* ROUTINE : dequant_slow
*
* INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
* INT16 *quantized_list : Pointer to quantized DCT coeffs
* (in zig-zag order).
*
* OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
* (in 2-D raster order).
*
* RETURNS : void
*
* FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs.
*
* SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
*
****************************************************************************/
void dequant_slow ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
{
// Loop fully expanded for maximum speed
DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
DCT_block[dequant_index[11]] = quantized_list[11] * dequant_coeffs[11];
DCT_block[dequant_index[12]] = quantized_list[12] * dequant_coeffs[12];
DCT_block[dequant_index[13]] = quantized_list[13] * dequant_coeffs[13];
DCT_block[dequant_index[14]] = quantized_list[14] * dequant_coeffs[14];
DCT_block[dequant_index[15]] = quantized_list[15] * dequant_coeffs[15];
DCT_block[dequant_index[16]] = quantized_list[16] * dequant_coeffs[16];
DCT_block[dequant_index[17]] = quantized_list[17] * dequant_coeffs[17];
DCT_block[dequant_index[18]] = quantized_list[18] * dequant_coeffs[18];
DCT_block[dequant_index[19]] = quantized_list[19] * dequant_coeffs[19];
DCT_block[dequant_index[20]] = quantized_list[20] * dequant_coeffs[20];
DCT_block[dequant_index[21]] = quantized_list[21] * dequant_coeffs[21];
DCT_block[dequant_index[22]] = quantized_list[22] * dequant_coeffs[22];
DCT_block[dequant_index[23]] = quantized_list[23] * dequant_coeffs[23];
DCT_block[dequant_index[24]] = quantized_list[24] * dequant_coeffs[24];
DCT_block[dequant_index[25]] = quantized_list[25] * dequant_coeffs[25];
DCT_block[dequant_index[26]] = quantized_list[26] * dequant_coeffs[26];
DCT_block[dequant_index[27]] = quantized_list[27] * dequant_coeffs[27];
DCT_block[dequant_index[28]] = quantized_list[28] * dequant_coeffs[28];
DCT_block[dequant_index[29]] = quantized_list[29] * dequant_coeffs[29];
DCT_block[dequant_index[30]] = quantized_list[30] * dequant_coeffs[30];
DCT_block[dequant_index[31]] = quantized_list[31] * dequant_coeffs[31];
DCT_block[dequant_index[32]] = quantized_list[32] * dequant_coeffs[32];
DCT_block[dequant_index[33]] = quantized_list[33] * dequant_coeffs[33];
DCT_block[dequant_index[34]] = quantized_list[34] * dequant_coeffs[34];
DCT_block[dequant_index[35]] = quantized_list[35] * dequant_coeffs[35];
DCT_block[dequant_index[36]] = quantized_list[36] * dequant_coeffs[36];
DCT_block[dequant_index[37]] = quantized_list[37] * dequant_coeffs[37];
DCT_block[dequant_index[38]] = quantized_list[38] * dequant_coeffs[38];
DCT_block[dequant_index[39]] = quantized_list[39] * dequant_coeffs[39];
DCT_block[dequant_index[40]] = quantized_list[40] * dequant_coeffs[40];
DCT_block[dequant_index[41]] = quantized_list[41] * dequant_coeffs[41];
DCT_block[dequant_index[42]] = quantized_list[42] * dequant_coeffs[42];
DCT_block[dequant_index[43]] = quantized_list[43] * dequant_coeffs[43];
DCT_block[dequant_index[44]] = quantized_list[44] * dequant_coeffs[44];
DCT_block[dequant_index[45]] = quantized_list[45] * dequant_coeffs[45];
DCT_block[dequant_index[46]] = quantized_list[46] * dequant_coeffs[46];
DCT_block[dequant_index[47]] = quantized_list[47] * dequant_coeffs[47];
DCT_block[dequant_index[48]] = quantized_list[48] * dequant_coeffs[48];
DCT_block[dequant_index[49]] = quantized_list[49] * dequant_coeffs[49];
DCT_block[dequant_index[50]] = quantized_list[50] * dequant_coeffs[50];
DCT_block[dequant_index[51]] = quantized_list[51] * dequant_coeffs[51];
DCT_block[dequant_index[52]] = quantized_list[52] * dequant_coeffs[52];
DCT_block[dequant_index[53]] = quantized_list[53] * dequant_coeffs[53];
DCT_block[dequant_index[54]] = quantized_list[54] * dequant_coeffs[54];
DCT_block[dequant_index[55]] = quantized_list[55] * dequant_coeffs[55];
DCT_block[dequant_index[56]] = quantized_list[56] * dequant_coeffs[56];
DCT_block[dequant_index[57]] = quantized_list[57] * dequant_coeffs[57];
DCT_block[dequant_index[58]] = quantized_list[58] * dequant_coeffs[58];
DCT_block[dequant_index[59]] = quantized_list[59] * dequant_coeffs[59];
DCT_block[dequant_index[60]] = quantized_list[60] * dequant_coeffs[60];
DCT_block[dequant_index[61]] = quantized_list[61] * dequant_coeffs[61];
DCT_block[dequant_index[62]] = quantized_list[62] * dequant_coeffs[62];
DCT_block[dequant_index[63]] = quantized_list[63] * dequant_coeffs[63];
}
/****************************************************************************
*
* ROUTINE : IDctSlow
*
* INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
* int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
*
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
*
* RETURNS : void
*
* FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
* to reproduce prediction error.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void IDctSlow ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
{
int loop;
int32 t1, t2;
int32 IntermediateData[64];
int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
int32 *ip = IntermediateData;
int16 *op = OutputData;
// dequantize the input
dequant_slow ( QuantMatrix, InputData, IntermediateData );
// Inverse DCT on the rows now
for ( loop=0; loop<8; loop++ )
{
// Check for non-zero values
if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] )
{
t1 = (int32)(xC1S7 * ip[1]);
t2 = (int32)(xC7S1 * ip[7]);
t1 >>= 16;
t2 >>= 16;
_A = t1 + t2;
t1 = (int32)(xC7S1 * ip[1]);
t2 = (int32)(xC1S7 * ip[7]);
t1 >>= 16;
t2 >>= 16;
_B = t1 - t2;
t1 = (int32)(xC3S5 * ip[3]);
t2 = (int32)(xC5S3 * ip[5]);
t1 >>= 16;
t2 >>= 16;
_C = t1 + t2;
t1 = (int32)(xC3S5 * ip[5]);
t2 = (int32)(xC5S3 * ip[3]);
t1 >>= 16;
t2 >>= 16;
_D = t1 - t2;
t1 = (int32)(xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (int32)(xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (int32)(xC4S4 * (ip[0] + ip[4]));
t1 >>= 16;
_E = t1;
t1 = (int32)(xC4S4 * (ip[0] - ip[4]));
t1 >>= 16;
_F = t1;
t1 = (int32)(xC2S6 * ip[2]);
t2 = (int32)(xC6S2 * ip[6]);
t1 >>= 16;
t2 >>= 16;
_G = t1 + t2;
t1 = (int32)(xC6S2 * ip[2]);
t2 = (int32)(xC2S6 * ip[6]);
t1 >>= 16;
t2 >>= 16;
_H = t1 - t2;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
// Final sequence of operations over-write original inputs.
ip[0] = (int16)((_Gd + _Cd ) >> 0);
ip[7] = (int16)((_Gd - _Cd ) >> 0);
ip[1] = (int16)((_Add + _Hd ) >> 0);
ip[2] = (int16)((_Add - _Hd ) >> 0);
ip[3] = (int16)((_Ed + _Dd ) >> 0);
ip[4] = (int16)((_Ed - _Dd ) >> 0);
ip[5] = (int16)((_Fd + _Bdd ) >> 0);
ip[6] = (int16)((_Fd - _Bdd ) >> 0);
}
ip += 8; /* next row */
}
ip = IntermediateData;
for ( loop=0; loop<8; loop++ )
{
// Check for non-zero values (bitwise | faster than logical ||)
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] )
{
t1 = (int32)(xC1S7 * ip[1*8]);
t2 = (int32)(xC7S1 * ip[7*8]);
t1 >>= 16;
t2 >>= 16;
_A = t1 + t2;
t1 = (int32)(xC7S1 * ip[1*8]);
t2 = (int32)(xC1S7 * ip[7*8]);
t1 >>= 16;
t2 >>= 16;
_B = t1 - t2;
t1 = (int32)(xC3S5 * ip[3*8]);
t2 = (int32)(xC5S3 * ip[5*8]);
t1 >>= 16;
t2 >>= 16;
_C = t1 + t2;
t1 = (int32)(xC3S5 * ip[5*8]);
t2 = (int32)(xC5S3 * ip[3*8]);
t1 >>= 16;
t2 >>= 16;
_D = t1 - t2;
t1 = (int32)(xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (int32)(xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (int32)(xC4S4 * (ip[0*8] + ip[4*8]));
t1 >>= 16;
_E = t1;
t1 = (int32)(xC4S4 * (ip[0*8] - ip[4*8]));
t1 >>= 16;
_F = t1;
t1 = (int32)(xC2S6 * ip[2*8]);
t2 = (int32)(xC6S2 * ip[6*8]);
t1 >>= 16;
t2 >>= 16;
_G = t1 + t2;
t1 = (int32)(xC6S2 * ip[2*8]);
t2 = (int32)(xC2S6 * ip[6*8]);
t1 >>= 16;
t2 >>= 16;
_H = t1 - t2;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
_Gd += IdctAdjustBeforeShift;
_Add += IdctAdjustBeforeShift;
_Ed += IdctAdjustBeforeShift;
_Fd += IdctAdjustBeforeShift;
// Final sequence of operations over-write original inputs.
op[0*8] = (int16)((_Gd + _Cd ) >> 4);
op[7*8] = (int16)((_Gd - _Cd ) >> 4);
op[1*8] = (int16)((_Add + _Hd ) >> 4);
op[2*8] = (int16)((_Add - _Hd ) >> 4);
op[3*8] = (int16)((_Ed + _Dd ) >> 4);
op[4*8] = (int16)((_Ed - _Dd ) >> 4);
op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
}
else
{
op[0*8] = 0;
op[7*8] = 0;
op[1*8] = 0;
op[2*8] = 0;
op[3*8] = 0;
op[4*8] = 0;
op[5*8] = 0;
op[6*8] = 0;
}
ip++; // next column
op++;
}
}
/****************************************************************************
*
* ROUTINE : dequant_slow10
*
* INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
* INT16 *quantized_list : Pointer to quantized DCT coeffs
* (in zig-zag order).
*
* OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
* (in 2-D raster order).
*
* RETURNS : void
*
* FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs that
* only has non-zero coefficients in the first 10, i.e.
* only DC & AC1-9 are non-zero, AC10-63 __MUST_BE_ zero.
*
* SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
*
****************************************************************************/
void dequant_slow10 ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
{
memset(DCT_block,0, 128);
// Loop fully expanded for maximum speed
DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
}
/****************************************************************************
*
* ROUTINE : IDctSlow10
*
* INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
* int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
*
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
*
* RETURNS : void
*
* FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
* with non-zero coeffs only in DC & the first 9 AC coeffs.
* i.e. non-zeros ONLY in the following 10 positions:
*
* x x x x 0 0 0 0
* x x x 0 0 0 0 0
* x x 0 0 0 0 0 0
* x 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
*
* SPECIAL NOTES : Output data is in raster, not zig-zag, order.
*
****************************************************************************/
void IDct10 ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
{
int loop;
int32 t1, t2;
int32 IntermediateData[64];
int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
int32 *ip = IntermediateData;
int16 *op = OutputData;
// dequantize the input
dequant_slow10 ( QuantMatrix, InputData, IntermediateData );
// Inverse DCT on the rows now
for ( loop=0; loop<4; loop++ )
{
// Check for non-zero values
if ( ip[0] | ip[1] | ip[2] | ip[3] )
{
t1 = (int32)(xC1S7 * ip[1]);
t1 >>= 16;
_A = t1;
t1 = (int32)(xC7S1 * ip[1]);
t1 >>= 16;
_B = t1 ;
t1 = (int32)(xC3S5 * ip[3]);
t1 >>= 16;
_C = t1;
t2 = (int32)(xC5S3 * ip[3]);
t2 >>= 16;
_D = -t2;
t1 = (int32)(xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (int32)(xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (int32)(xC4S4 * ip[0] );
t1 >>= 16;
_E = t1;
_F = t1;
t1 = (int32)(xC2S6 * ip[2]);
t1 >>= 16;
_G = t1;
t1 = (int32)(xC6S2 * ip[2]);
t1 >>= 16;
_H = t1 ;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
// Final sequence of operations over-write original inputs.
ip[0] = (int16)((_Gd + _Cd ) >> 0);
ip[7] = (int16)((_Gd - _Cd ) >> 0);
ip[1] = (int16)((_Add + _Hd ) >> 0);
ip[2] = (int16)((_Add - _Hd ) >> 0);
ip[3] = (int16)((_Ed + _Dd ) >> 0);
ip[4] = (int16)((_Ed - _Dd ) >> 0);
ip[5] = (int16)((_Fd + _Bdd ) >> 0);
ip[6] = (int16)((_Fd - _Bdd ) >> 0);
}
ip += 8; /* next row */
}
ip = IntermediateData;
for ( loop=0; loop<8; loop++ )
{
// Check for non-zero values (bitwise or faster than ||)
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] )
{
t1 = (int32)(xC1S7 * ip[1*8]);
t1 >>= 16;
_A = t1 ;
t1 = (int32)(xC7S1 * ip[1*8]);
t1 >>= 16;
_B = t1 ;
t1 = (int32)(xC3S5 * ip[3*8]);
t1 >>= 16;
_C = t1 ;
t2 = (int32)(xC5S3 * ip[3*8]);
t2 >>= 16;
_D = - t2;
t1 = (int32)(xC4S4 * (_A - _C));
t1 >>= 16;
_Ad = t1;
t1 = (int32)(xC4S4 * (_B - _D));
t1 >>= 16;
_Bd = t1;
_Cd = _A + _C;
_Dd = _B + _D;
t1 = (int32)(xC4S4 * ip[0*8]);
t1 >>= 16;
_E = t1;
_F = t1;
t1 = (int32)(xC2S6 * ip[2*8]);
t1 >>= 16;
_G = t1;
t1 = (int32)(xC6S2 * ip[2*8]);
t1 >>= 16;
_H = t1;
_Ed = _E - _G;
_Gd = _E + _G;
_Add = _F + _Ad;
_Bdd = _Bd - _H;
_Fd = _F - _Ad;
_Hd = _Bd + _H;
_Gd += IdctAdjustBeforeShift;
_Add += IdctAdjustBeforeShift;
_Ed += IdctAdjustBeforeShift;
_Fd += IdctAdjustBeforeShift;
// Final sequence of operations over-write original inputs.
op[0*8] = (int16)((_Gd + _Cd ) >> 4);
op[7*8] = (int16)((_Gd - _Cd ) >> 4);
op[1*8] = (int16)((_Add + _Hd ) >> 4);
op[2*8] = (int16)((_Add - _Hd ) >> 4);
op[3*8] = (int16)((_Ed + _Dd ) >> 4);
op[4*8] = (int16)((_Ed - _Dd ) >> 4);
op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
}
else
{
op[0*8] = 0;
op[7*8] = 0;
op[1*8] = 0;
op[2*8] = 0;
op[3*8] = 0;
op[4*8] = 0;
op[5*8] = 0;
op[6*8] = 0;
}
ip++; // next column
op++;
}
}
/****************************************************************************
*
* ROUTINE : IDct1
*
* INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
* int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
*
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
*
* RETURNS : void
*
* FUNCTION : Inverse DCT's input 8x8 block with only one non-zero
* coeff in the DC position:
*
* x 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
*
* SPECIAL NOTES : Output data is in raster, not zig-zag, order.
*
****************************************************************************/
void IDct1 ( int16 *InputData, int16 *QuantMatrix, INT16 *OutputData )
{
INT32 loop;
INT16 OutD;
OutD = (INT16)((INT32)(InputData[0]*QuantMatrix[0]+15)>>5);
for ( loop=0; loop<64; loop++ )
OutputData[loop] = OutD;
}
#if 0
/****************************************************************************
*
* ROUTINE : IDct4
*
* INPUTS : int16 *InputData : Pointer to 8x8 DCT coefficients.
*
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
*
* RETURNS : void
*
* FUNCTION : Inverse DCT's input 8x8 block with at most four non-zero
* coeffs in the following positions:
*
* x x 0 0 0 0 0 0
* x x 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
* 0 0 0 0 0 0 0 0
*
* SPECIAL NOTES : CURRENTLY NOT USED IN CODEBASE.
*
****************************************************************************/
void IDct4 ( int16 *InputData, int16 *OutputData )
{
int32 t1;
int loop;
int32 _Add, _Fd;
int32 _A, _B, _Ad, _Bd, _Cd, _Dd, _E;
int16 *ip = InputData;
int16 *op = OutputData;
// Unzigzag the coefficents
ip[8] = ip[2];
ip[9] = ip[4];
ip[2] = 0;
ip[5] = 0;
// Inverse DCT on the rows now
for ( loop = 0; loop < 2; loop++)
{
// Check for non-zero values
if ( ip[0] | ip[1] )
{
t1 = (int32)(xC1S7 * ip[1]);
t1 >>= 16;
_A = t1;
t1 = (int32)(xC7S1 * ip[1]);
t1 >>= 16;
_B = t1 ;
t1 = (int32)(xC4S4 * _A );
t1 >>= 16;
_Ad = t1;
t1 = (int32)(xC4S4 * _B );
t1 >>= 16;
_Bd = t1;
_Cd = _A ;
_Dd = _B ;
t1 = (int32)(xC4S4 * ip[0] );
t1 >>= 16;
_E = t1;
_Add = _E + _Ad;
_Fd = _E - _Ad;
// Final sequence of operations over-write original inputs.
ip[0] = (int16)((_E + _Cd ) >> 0);
ip[7] = (int16)((_E - _Cd ) >> 0);
ip[1] = (int16)((_Add + _Bd ) >> 0);
ip[2] = (int16)((_Add - _Bd ) >> 0);
ip[3] = (int16)((_E + _Dd ) >> 0);
ip[4] = (int16)((_E - _Dd ) >> 0);
ip[5] = (int16)((_Fd + _Bd ) >> 0);
ip[6] = (int16)((_Fd - _Bd ) >> 0);
}
ip += 8; /* next row */
}
ip = InputData;
for ( loop=0; loop<8; loop++ )
{
// Check for non-zero values (bitwise or faster than ||)
if ( ip[0 * 8] | ip[1 * 8] )
{
t1 = (int32)(xC1S7 * ip[1*8]);
t1 >>= 16;
_A = t1 ;
t1 = (int32)(xC7S1 * ip[1*8]);
t1 >>= 16;
_B = t1 ;
t1 = (int32)(xC4S4 * _A );
t1 >>= 16;
_Ad = t1;
t1 = (int32)(xC4S4 * _B );
t1 >>= 16;
_Bd = t1;
_Cd = _A ;
_Dd = _B ;
t1 = (int32)(xC4S4 * ip[0*8]);
t1 >>= 16;
_E = t1;
_Add = _E + _Ad;
_Fd = _E - _Ad;
_Add += IdctAdjustBeforeShift;
_E += IdctAdjustBeforeShift;
_Fd += IdctAdjustBeforeShift;
// Final sequence of operations over-write original inputs.
op[0*8] = (int16)((_E + _Cd ) >> 4);
op[7*8] = (int16)((_E - _Cd ) >> 4);
op[1*8] = (int16)((_Add + _Bd ) >> 4);
op[2*8] = (int16)((_Add - _Bd ) >> 4);
op[3*8] = (int16)((_E + _Dd ) >> 4);
op[4*8] = (int16)((_E - _Dd ) >> 4);
op[5*8] = (int16)((_Fd + _Bd ) >> 4);
op[6*8] = (int16)((_Fd - _Bd ) >> 4);
}
else
{
op[0*8] = 0;
op[7*8] = 0;
op[1*8] = 0;
op[2*8] = 0;
op[3*8] = 0;
op[4*8] = 0;
op[5*8] = 0;
op[6*8] = 0;
}
ip++; // next column
op++;
}
}
#endif

View File

@@ -0,0 +1,243 @@
/****************************************************************************
*
* Module Title : Reconstruct.c
*
* Description : Block reconstruction functions.
*
****************************************************************************/
#define STRICT // Strict type checking
/****************************************************************************
* Header Files
****************************************************************************/
#include "reconstruct.h"
#include "codec_common.h"
/****************************************************************************
*
* ROUTINE : SatUnsigned8
*
* INPUTS : INT16 *DataBlock : Pointer to 8x8 input block.
* UINT32 ResultLineStep : Stride of output block.
* UINT32 DataLineStep : Stride of input block.
*
* OUTPUTS : UINT8 *ResultPtr : Pointer to 8x8 output block.
*
* RETURNS : void
*
* FUNCTION : Saturates the input data to 8 bits unsigned and stores
* in the output buffer.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void SatUnsigned8 ( UINT8 *ResultPtr, INT16 *DataBlock, UINT32 ResultLineStep, UINT32 DataLineStep )
{
INT32 i;
// Partly expanded loop
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
{
ResultPtr[0] = (char) LIMIT(DataBlock[0]);
ResultPtr[1] = (char) LIMIT(DataBlock[1]);
ResultPtr[2] = (char) LIMIT(DataBlock[2]);
ResultPtr[3] = (char) LIMIT(DataBlock[3]);
ResultPtr[4] = (char) LIMIT(DataBlock[4]);
ResultPtr[5] = (char) LIMIT(DataBlock[5]);
ResultPtr[6] = (char) LIMIT(DataBlock[6]);
ResultPtr[7] = (char) LIMIT(DataBlock[7]);
DataBlock += DataLineStep;
ResultPtr += ResultLineStep;
}
}
/****************************************************************************
*
* ROUTINE : ScalarReconIntra
*
* INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
* UINT16 *ChangePtr : Pointer to 8x8 intra prediction block.
* UINT32 LineStep : Stride of reconstruction block.
*
* OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
*
* RETURNS : None
*
* FUNCTION : Reconstructs an intra block.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep )
{
UINT32 i;
INT16 *TmpDataPtr = TmpDataBuffer;
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
{
TmpDataPtr[0] = (INT16) ( ChangePtr[0] + 128 );
TmpDataPtr[1] = (INT16) ( ChangePtr[1] + 128 );
TmpDataPtr[2] = (INT16) ( ChangePtr[2] + 128 );
TmpDataPtr[3] = (INT16) ( ChangePtr[3] + 128 );
TmpDataPtr[4] = (INT16) ( ChangePtr[4] + 128 );
TmpDataPtr[5] = (INT16) ( ChangePtr[5] + 128 );
TmpDataPtr[6] = (INT16) ( ChangePtr[6] + 128 );
TmpDataPtr[7] = (INT16) ( ChangePtr[7] + 128 );
TmpDataPtr += BLOCK_HEIGHT_WIDTH;
ChangePtr += BLOCK_HEIGHT_WIDTH;
}
// Saturate the output to unsigned 8 bit values in recon buffer
SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
}
/****************************************************************************
*
* ROUTINE : ScalarReconInter
*
* INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
* UINT8 *RefPtr : Pointer to 8x8 reference block.
* INT16 *ChangePtr : Pointer to 8x8 inter prediction error block.
* UINT32 LineStep : Stride of reference and output blocks.
*
* OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
*
* RETURNS : None
*
* FUNCTION : Reconstructs an inter-coded block by adding a prediction
* error to a reference block in the previous frame
* reconstruction buffer.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep )
{
UINT32 i;
INT16 *TmpDataPtr = TmpDataBuffer;
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
{
// Form each row
TmpDataPtr[0] = (INT16)(RefPtr[0] + ChangePtr[0]);
TmpDataPtr[1] = (INT16)(RefPtr[1] + ChangePtr[1]);
TmpDataPtr[2] = (INT16)(RefPtr[2] + ChangePtr[2]);
TmpDataPtr[3] = (INT16)(RefPtr[3] + ChangePtr[3]);
TmpDataPtr[4] = (INT16)(RefPtr[4] + ChangePtr[4]);
TmpDataPtr[5] = (INT16)(RefPtr[5] + ChangePtr[5]);
TmpDataPtr[6] = (INT16)(RefPtr[6] + ChangePtr[6]);
TmpDataPtr[7] = (INT16)(RefPtr[7] + ChangePtr[7]);
// Next row of Block
ChangePtr += BLOCK_HEIGHT_WIDTH;
TmpDataPtr += BLOCK_HEIGHT_WIDTH;
RefPtr += LineStep;
}
// Saturate the output to unsigned 8 bit values in recon buffer
SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
}
/****************************************************************************
*
* ROUTINE : ScalarReconInterHalfPixel2
*
* INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
* UINT8 *RefPtr1 : Pointer to first 8x8 reference block.
* UINT8 *RefPtr2 : Pointer to second 8x8 reference block.
* INT16 *ChangePtr : Pointer to 8x8 inter prediction error block.
* UINT32 LineStep : Stride of reference blocks.
*
* OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
*
* RETURNS : None
*
* FUNCTION : Reconstructs an inter-coded block by adding a prediction
* error to a reference block computed by averaging the two
* specified reference blocks. The two reference blocks are
* those that bracket the 1/2-pixel accuracy motion vector.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void ScalarReconInterHalfPixel2
(
INT16 *TmpDataBuffer,
UINT8 *ReconPtr,
UINT8 *RefPtr1,
UINT8 *RefPtr2,
INT16 *ChangePtr,
UINT32 LineStep
)
{
UINT32 i;
INT16 *TmpDataPtr = TmpDataBuffer;
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
{
// Form each row
TmpDataPtr[0] = (INT16)( (((INT32)RefPtr1[0] + (INT32)RefPtr2[0]) >> 1) + ChangePtr[0] );
TmpDataPtr[1] = (INT16)( (((INT32)RefPtr1[1] + (INT32)RefPtr2[1]) >> 1) + ChangePtr[1] );
TmpDataPtr[2] = (INT16)( (((INT32)RefPtr1[2] + (INT32)RefPtr2[2]) >> 1) + ChangePtr[2] );
TmpDataPtr[3] = (INT16)( (((INT32)RefPtr1[3] + (INT32)RefPtr2[3]) >> 1) + ChangePtr[3] );
TmpDataPtr[4] = (INT16)( (((INT32)RefPtr1[4] + (INT32)RefPtr2[4]) >> 1) + ChangePtr[4] );
TmpDataPtr[5] = (INT16)( (((INT32)RefPtr1[5] + (INT32)RefPtr2[5]) >> 1) + ChangePtr[5] );
TmpDataPtr[6] = (INT16)( (((INT32)RefPtr1[6] + (INT32)RefPtr2[6]) >> 1) + ChangePtr[6] );
TmpDataPtr[7] = (INT16)( (((INT32)RefPtr1[7] + (INT32)RefPtr2[7]) >> 1) + ChangePtr[7] );
// Next row of Block
ChangePtr += BLOCK_HEIGHT_WIDTH;
TmpDataPtr += BLOCK_HEIGHT_WIDTH;
RefPtr1 += LineStep;
RefPtr2 += LineStep;
}
// Saturate the output to unsigned 8 bit values in recon buffer
SatUnsigned8( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
}
/****************************************************************************
*
* ROUTINE : ReconBlock_C
*
* INPUTS : INT16 *SrcBlock : Pointer to 8x8 prediction error.
* INT16 *ReconRefPtr : Pointer to 8x8 block prediction.
* UINT32 LineStep : Stride of output block.
*
* OUTPUTS : UINT8 *DestBlock : Pointer to 8x8 reconstructed block.
*
* RETURNS : void
*
* FUNCTION : Reconstrut a block by adding the prediction error
* block to the source block and clipping values.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void ReconBlock_C ( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep )
{
UINT32 i;
INT16 *SrcBlockPtr = SrcBlock;
// For each block row
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
{
SrcBlock[0] = (INT16)(SrcBlock[0] + ReconRefPtr[0]);
SrcBlock[1] = (INT16)(SrcBlock[1] + ReconRefPtr[1]);
SrcBlock[2] = (INT16)(SrcBlock[2] + ReconRefPtr[2]);
SrcBlock[3] = (INT16)(SrcBlock[3] + ReconRefPtr[3]);
SrcBlock[4] = (INT16)(SrcBlock[4] + ReconRefPtr[4]);
SrcBlock[5] = (INT16)(SrcBlock[5] + ReconRefPtr[5]);
SrcBlock[6] = (INT16)(SrcBlock[6] + ReconRefPtr[6]);
SrcBlock[7] = (INT16)(SrcBlock[7] + ReconRefPtr[7]);
// Next row...
SrcBlock += BLOCK_HEIGHT_WIDTH;
ReconRefPtr += BLOCK_HEIGHT_WIDTH;
}
// Saturate the output to unsigned 8 bit values in recon buffer
SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
}

View File

@@ -0,0 +1,100 @@
/****************************************************************************
*
* Module Title : SystemDependant.c
*
* Description : Miscellaneous system dependant functions.
*
****************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "codec_common.h"
#include "vputil_if.h"
/****************************************************************************
* Exports
****************************************************************************/
// Scalar (no mmx) reconstruction functions
extern void ClearSysState_C ( void );
extern void IDctSlow ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
extern void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
extern void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
extern void ScalarReconInterHalfPixel2 ( INT16 *TmpDataBuffer, UINT8 *ReconPtr,UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
extern void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
extern void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
extern void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
extern void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride );
extern void Copy12x12_C ( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
extern void fdct_short_C ( INT16 *InputData, INT16 *OutputData );
extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
/****************************************************************************
*
* ROUTINE : fillidctconstants
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : void
*
* FUNCTION : STUB FUNCTION.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void fillidctconstants ( void )
{
}
/****************************************************************************
*
* ROUTINE : MachineSpecificConfig
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Checks for machine specifc features such as MMX support
* sets approipriate flags and function pointers.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void UtilMachineSpecificConfig ( void )
{
int i;
for(i=0;i<=64;i++)
{
if(i<=1)idctc[i]=IDct1;
else if(i<=10)idctc[i]=IDct10;
else idctc[i]=IDctSlow;
}
fdct_short=fdct_short_C ;
for(i=0;i<=64;i++)
{
if(i<=1)idct[i]=IDct1;
else if(i<=10)idct[i]=IDct10;
else idct[i]=IDctSlow;
}
ClearSysState = ClearSysState_C;
ReconIntra = ScalarReconIntra;
ReconInter = ScalarReconInter;
ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
AverageBlock = AverageBlock_C;
UnpackBlock = UnpackBlock_C;
ReconBlock = ReconBlock_C;
SubtractBlock = SubtractBlock_C;
CopyBlock = CopyBlock_C;
Copy12x12 = Copy12x12_C;
FilterBlockBil_8 = FilterBlockBil_8_C;
FilterBlock=FilterBlock_C;
}

File diff suppressed because it is too large Load Diff