Initial community commit

This commit is contained in:
Jef
2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit 20d28e80a5
16810 changed files with 4640254 additions and 2 deletions
@@ -0,0 +1,257 @@
/****************************************************************************
*
* Module Title : PreProcOptFunctions.c
*
* Description : MMX or otherwise processor specific
* optimised versions of pre-processor functions
*
* AUTHOR : Paul Wilkins
*
*****************************************************************************
* Revision History
*
* 1.04 YWX 30-Nov-00 Added support for WMT cpu
* 1.03 PGW 24 Jul 00 Added Column SAD function.
* 1.02 YX 06/04/00 Optimized get row sad for xmm
* 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
* 1.00 PGW 14/06/99 Configuration baseline
*
*****************************************************************************
*/
/****************************************************************************
* Header Files
*****************************************************************************
*/
#define STRICT /* Strict type checking. */
#include "preproc.h"
#include "cpuidlib.h"
#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
/****************************************************************************
* Module constants.
*****************************************************************************
*/
/****************************************************************************
* Imports.
*****************************************************************************
*/
/****************************************************************************
* Exported Global Variables
*****************************************************************************
*/
/****************************************************************************
* Exported Functions
*****************************************************************************
*/
/****************************************************************************
* Module Statics
*****************************************************************************
*/
/****************************************************************************
* Forward References
*****************************************************************************
*/
UINT32 MmxRowSAD( UINT8 * Src1, UINT8 * Src2 );
extern UINT32 XmmRowSAD( UINT8 * Src1, UINT8 * Src2 );
/****************************************************************************
*
* ROUTINE : MachineSpecificConfig
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Checks for machine specifc features such as MMX support
* sets approipriate flags and function pointers.
*
* SPECIAL NOTES : None.
*
*
* ERRORS : None.
*
****************************************************************************/
#define MMX_ENABLED 1
void MachineSpecificConfig(PP_INSTANCE *ppi)
{
UINT32 FeatureFlags = 0;
BOOL CPUID_Supported = TRUE; // Is the CPUID instruction supported
BOOL TestMmx = TRUE;
PROCTYPE CPUType = findCPUId();
switch(CPUType)
{
case X86 :
case PPRO :
case C6X86 :
case C6X86MX:
case AMDK5 :
case MACG3 :
case MAC68K :
ppi->MmxEnabled = FALSE;
ppi->XmmEnabled = FALSE;
break;
case PII :
case AMDK63D:
case AMDK6 :
case PMMX :
ppi->MmxEnabled = TRUE;
ppi->XmmEnabled = FALSE;
break;
case XMM :
case WMT :
ppi->MmxEnabled = TRUE;
ppi->XmmEnabled = TRUE;
break;
}
//To test We force the cpu type here
//ppi->MmxEnabled = FALSE;
//ppi->XmmEnabled = FALSE;
// If MMX supported then set to use MMX versions of functions else
// use original 'C' versions.
if (ppi->XmmEnabled)
{
ppi->RowSAD=XmmRowSAD;
ppi->ColSAD = ScalarColSAD;
}
else if ( ppi->MmxEnabled )
{
ppi->RowSAD = MmxRowSAD;
ppi->ColSAD = ScalarColSAD;
}
else
{
ppi->RowSAD = ScalarRowSAD;
ppi->ColSAD = ScalarColSAD;
}
}
/****************************************************************************
*
* ROUTINE : MmxRowSAD
*
* INPUTS : UINT8 * NewDataPtr (New Data)
* UINT8 * RefDataPtr
*
* OUTPUTS :
*
* RETURNS : Highest of two S.A.D. values.
*
*
* FUNCTION : Calculates the sum of the absolute differences for two groups of
* four pixels and returns the larger of the two.
*
* SPECIAL NOTES : None.
*
*
* ERRORS : None.
*
****************************************************************************/
UINT32 MmxRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr )
{
UINT32 SadValue;
UINT32 SadValue1;
UINT32 AbsValues[2];
// MMX code for calculating absolute difference values
__asm
{
pxor mm6, mm6 ; Blank mmx6
pxor mm7, mm7 ; Blank mmx6
mov eax,dword ptr [NewDataPtr] ; Load base addresses
mov ebx,dword ptr [RefDataPtr]
// Calculate eight ABS difference values.
movq mm0, [eax] ; Copy eight bytes to mm0
movq mm1, [ebx] ; Copy eight bytes to mm1
movq mm2, mm0 ; Take copy of MM0
psubusb mm0, mm1 ; A-B to MM0
psubusb mm1, mm2 ; B-A to MM1
por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
movq mm1, mm0 ; keep a copy
// Sum together the low four bytes and the high four bytes
punpcklbw mm0, mm6 ; unpack low four bytes to higher precision
punpckhbw mm1, mm7 ; unpack high four bytes to higher precision
movq mm2, mm0 ; take a copy
movq mm3, mm1 ; take a copy
punpcklwd mm0, mm6 ; unpack low two words to higher precision
punpcklwd mm1, mm7 ; unpack low two words to higher precision
punpckhwd mm2, mm6 ; unpack high low two words to higher precision
punpckhwd mm3, mm7 ; unpack high low two words to higher precision
paddd mm0, mm2 ; Accumulate intermediate results
paddd mm1, mm3 ; Accumulate intermediate results
movq mm2, mm0 ; take a copy
movq mm3, mm1 ; take a copy
punpckhdq mm0, mm6 ; Unpack and accumulate again
punpckhdq mm1, mm7 ; Unpack and accumulate again
punpckldq mm2, mm6
punpckldq mm3, mm7
paddd mm0, mm2 ; Accumulate final result
paddd mm1, mm3 ; Accumulate final result
// Interleave the two SAD results
punpckldq mm0, mm1
// Write back the abs values
movq dword ptr [AbsValues], mm0
}
SadValue = AbsValues[0];
SadValue1 = AbsValues[1];
SadValue = (SadValue > SadValue1) ? SadValue : SadValue1;
return SadValue;
}
/****************************************************************************
*
* ROUTINE : ClearMmxState()
*
*
* INPUTS : None
*
* OUTPUTS :
*
* RETURNS :
*
*
* FUNCTION : Clears down the MMX state
*
* SPECIAL NOTES : None.
*
*
* ERRORS : None.
*
****************************************************************************/
void ClearMmxState(PP_INSTANCE *ppi)
{
if ( ppi->MmxEnabled )
{
__asm
{
emms ; Clear the MMX state.
}
}
}
@@ -0,0 +1,43 @@
//{{NO_DEPENDENCIES}}
// Microsoft Developer Studio generated include file.
// Used by PreprocParams.rc
//
#define IDD_VCAP_PARAMS 101
#define IDD_PREPROC_PARAMS 101
#define IDC_VCAP_P_TRESH_SB 1000
#define IDC_VCAP_NOISE_SUP_SB 1001
#define IDC_VCAP_TRIG_SB 1002
#define IDC_SRF_CHECK 1004
#define IDC_SRF_TEMPORAL_CHECK 1005
#define IDC_SC_TRADE_OFF_SB 1005
#define IDC_RSAD_LOW_SB 1006
#define IDC_VCAP_PUV_TRESH_SB 1007
#define IDC_SGC_TRESH_SB 1008
#define IDC_SGC_TRIGGER_SB 1009
#define IDC_SGC_UV_TRESH_SB 1010
#define IDC_VCAP_BAR_THRESH_SB 1011
#define IDC_VCAP_P_TRESH_ED 1012
#define IDC_VCAP_PUV_TRESH_ED 1013
#define IDC_VCAP_NOISE_SUP_ED 1014
#define IDC_VCAP_TRIG_ED 1015
#define IDC_VCAP_BAR_THRESH_ED 1016
#define IDC_SGC_TRESH_ED 1017
#define IDC_SGC_UV_TRESH_ED 1018
#define IDC_SGC_TRIGGER_ED 1019
#define IDC_SRF_MEDIAN_CHECK 1020
#define IDC_RSAD_HIGH_SB 1020
#define IDC_PAK_ENABLED_CHECK 1023
#define IDC_SC_TRADE_OFF_ED 1024
#define IDC_RSAD_LOW_ED 1025
#define IDC_RSAD_HIGH_ED 1026
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 103
#define _APS_NEXT_COMMAND_VALUE 40001
#define _APS_NEXT_CONTROL_VALUE 1021
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif
@@ -0,0 +1,765 @@
/****************************************************************************
*
* Module Title : RowDiffScan.c
*
* Description : Pre-processor row difference Scan
*
* AUTHOR : Paul Wilkins
*
*****************************************************************************
* Revision History
*
* 1.00 JBB 22 AUG 00 Configuration baseline
*
*****************************************************************************
*/
/****************************************************************************
* Header Frames
*****************************************************************************
*/
#define STRICT /* Strict type checking. */
#include "type_aliases.h"
#include "preproc.h"
/****************************************************************************
* Module constants.
*****************************************************************************
/****************************************************************************
*
* ROUTINE : RowDiffScan
*
* INPUTS : UINT8 * YuvPtr1, YuvPtr2
* Pointers into current and previous frame
* BOOL EdgeRow
* Is this row an edge row.
*
* OUTPUTS : UINT16 * YUVDiffsPtr
* Differnece map
* UINT8 * bits_map_ptr
* Pixels changed map
* UINT8 * SgcPtr
* Level change score.
* INT8 * DispFragPtr
* Block update map.
* INT32 * RowDiffsPtr
* Total sig changes for row
* UINT8 * ChLocalsPtr
* Changed locals data structure
*
*
* RETURNS :
*
* FUNCTION : Initial pixel differences scan
*
* SPECIAL NOTES : None.
*
*
* ERRORS : None.
*
****************************************************************************/
void RowDiffScan( PP_INSTANCE *ppi, UINT8 * YuvPtr1, UINT8 * YuvPtr2,
INT16 * YUVDiffsPtr, UINT8 * bits_map_ptr,
INT8 * SgcPtr, INT8 * DispFragPtr,
UINT8 * FDiffPixels, INT32 * RowDiffsPtr,
UINT8 * ChLocalsPtr, BOOL EdgeRow )
{
INT32 i;
INT32 FragChangedPixels;
INT16 Diff[8];
UINT32 ZeroData[2] = { 0,0 };
UINT8 OneData[8] = { 1,1,1,1,1,1,1,1 };
UINT8 ChlocalsDummyData[8] = { 8,8,8,8,8,8,8,8 };
// Cannot use kernel if at edge or if PAK disabled
if ( (!ppi->PAKEnabled) || EdgeRow )
{
for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
{
// Reset count of pixels changed for the current fragment.
FragChangedPixels = 0;
// Test for break out conditions to save time.
if ((*DispFragPtr == CANDIDATE_BLOCK) )//|| !ppi->EarlyBreakAllowed)
{
__asm
{
movd esi, [YuvPtr1];
movd ebx, [YuvPtr2];
movd edx, FragChangedPixels
pxor mm7, mm7;
movq mm0, [esi] ;76543210
movq mm1, [ebx] ;76543210
movq mm2, mm0 ;make a copy
movq mm3, mm1 ;make a copy
punpcklbw mm0, mm7 ; 3 2 1 0
punpcklbw mm1, mm7 ; 3 2 1 0
punpckhbw mm2, mm7 ; 7 6 5 4
punpckhbw mm3, mm7 ; 7 6 5 4
psubw mm0 mm1 ; Diff[3,2,1,0]
psubw mm2, mm3 ; Diff[7,6,5,4]
movq QWORD PTR [YUVDiffsPtr], mm0
movq QWORD PTR [YUVDiffsPtr], mm2
;------------------------------------------------------
; mm0, mm1, mm3, mm4, m5, mm6, mm7, Free
; mm2, keep the Diff[7 6 5 4]
;------------------------------------------------------
movd eax, ppi->LevelThresh
movd mm1, eax ;
movd mm3, eax ;
packsdw mm1, mm3 ;
movq mm4, mm1 ;
psllw mm1, 16
por mm1, mm4 ;4 ppi->LevelThresh
;-------------------------------------------------------
; mm3, mm4, mm5, mm6, mm7 Free
;
;-------------------------------------------------------
movd eax, ppi->SrfThresh
movd mm3, eax ;
movd mm4, eax ;
packsdw mm3, mm4 ;
movq mm5, mm3 ;
psllw mm3, 16
por mm3, mm6 ;4 ppi->SrfThresh
;--------------------------------------------------------
; mm0 mm2 diff[0]-diff[7]
; mm1 ppi->LevelThresh
; mm3 ppi->SrfThresh
; mm4-mm7 free
; Note, ppi->NegLevelThresh = - ppi->LevelThresh
; ppi->NegSrfThresh = - ppi->SrfThresh
;--------------------------------------------------------
movq mm4, mm0 ; diff[0][1][2][3]
movq mm5, mm0 ;
psubsw mm4, mm1 ; if diff >= LevelThresh
psraw mm4, 15 ; 00s(True) and ffs (False)
pandn mm4, FFFFFFFFh ; ffs(True) and 00s (False)
psrlw mm4, 15 ; 01 (True) and 00 (False)
pcmpgtw mm5, mm3 ; if diff > SrfThresh
; ffs(True) and 00s (False)
psrlw mm5, 15 ; 01 (True) and 00 (False)
pand mm5, mm4 ;
movq mm7, mm0 ; save a copy of diff[0][1][2][3]
pxor mm6, mm6 ; clear MM6
psubsw mm6, mm1 ; mm6 = NegLevelThresh
pcmpgtw mm0, mm6 ; if diff > NegLevelThresh
; ffs(True) and 00s (False)
pandn mm0, FFFFFFFFh ; if diff <= NegLevelThresh
; ffs(True) and 00 (False)
psrlw mm0, 15 ; 01 (True) and 00 (False)
paddsw mm7, mm3 ; if diff < -NegSrfThresh
psraw mm7, 15 ; ffs(True) and 00s (False)
psrlw mm7, 15 ; 01 (True) and 00s (False)
pand mm7, mm0 ;
;----------------------------------------------------------------------------
; mm0, mm1, mm2, mm3, mm4, mm5, mm7 in use
; mm6 free
;----------------------------------------------------------------------------
por mm5, mm7 ; mm7 is free now
pxor mm6, mm6 ;
movq mm7, mm5 ;
punpcklwd mm5, mm6 ;
punpckhwd mm7, mm6 ;
paddw mm5, mm7 ;
movq mm7, mm5 ;
psrlq mm7, 32 ;
paddd mm7, mm5 ;
movd eax, mm7 ;
add eax, ebx
// Calculate the diference values and copy to YUVDiffsPtr
Diff[0] = ((INT16)YuvPtr1[0]) - ((INT16)YuvPtr2[0]);
Diff[1] = ((INT16)YuvPtr1[1]) - ((INT16)YuvPtr2[1]);
Diff[2] = ((INT16)YuvPtr1[2]) - ((INT16)YuvPtr2[2]);
Diff[3] = ((INT16)YuvPtr1[3]) - ((INT16)YuvPtr2[3]);
((INT32 *)YUVDiffsPtr)[0] = ((INT32 *)Diff)[0];
((INT32 *)YUVDiffsPtr)[1] = ((INT32 *)Diff)[1];
// Test against the Level and ppi->SRF thresholds and record the results
// Pixel 0
if ( Diff[0] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[0] > ppi->SrfThresh )
{
bits_map_ptr[0] = 1;
FragChangedPixels++;
}
}
else if ( Diff[0] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[0] < ppi->NegSrfThresh )
{
bits_map_ptr[0] = 1;
FragChangedPixels++;
}
}
// Pixel 1
if ( Diff[1] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[1] > ppi->SrfThresh )
{
bits_map_ptr[1] = 1;
FragChangedPixels++;
}
}
else if ( Diff[1] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[1] < ppi->NegSrfThresh )
{
bits_map_ptr[1] = 1;
FragChangedPixels++;
}
}
// Pixel 2
if ( Diff[2] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[2] > ppi->SrfThresh )
{
bits_map_ptr[2] = 1;
FragChangedPixels++;
}
}
else if ( Diff[2] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[2] < ppi->NegSrfThresh )
{
bits_map_ptr[2] = 1;
FragChangedPixels++;
}
}
// Pixel 3
if ( Diff[3] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[3] > ppi->SrfThresh )
{
bits_map_ptr[3] = 1;
FragChangedPixels++;
}
}
else if ( Diff[3] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[3] < ppi->NegSrfThresh )
{
bits_map_ptr[3] = 1;
FragChangedPixels++;
}
}
// Clear down entries in changed locals array
((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
// Calculate the diference values and copy to YUVDiffsPtr
Diff[4] = ((INT16)YuvPtr1[4]) - ((INT16)YuvPtr2[4]);
Diff[5] = ((INT16)YuvPtr1[5]) - ((INT16)YuvPtr2[5]);
Diff[6] = ((INT16)YuvPtr1[6]) - ((INT16)YuvPtr2[6]);
Diff[7] = ((INT16)YuvPtr1[7]) - ((INT16)YuvPtr2[7]);
((INT32 *)YUVDiffsPtr)[2] = ((INT32 *)Diff)[2];
((INT32 *)YUVDiffsPtr)[3] = ((INT32 *)Diff)[3];
// Test against the Level and ppi->SRF thresholds and record the results
// Pixel 4
if ( Diff[4] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[4] > ppi->SrfThresh )
{
bits_map_ptr[4] = 1;
FragChangedPixels++;
}
}
else if ( Diff[4] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[4] < ppi->NegSrfThresh )
{
bits_map_ptr[4] = 1;
FragChangedPixels++;
}
}
// Pixel 5
if ( Diff[5] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[5] > ppi->SrfThresh )
{
bits_map_ptr[5] = 1;
FragChangedPixels++;
}
}
else if ( Diff[5] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[5] < ppi->NegSrfThresh )
{
bits_map_ptr[5] = 1;
FragChangedPixels++;
}
}
// Pixel 6
if ( Diff[6] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[6] > ppi->SrfThresh )
{
bits_map_ptr[6] = 1;
FragChangedPixels++;
}
}
else if ( Diff[6] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[6] < ppi->NegSrfThresh )
{
bits_map_ptr[6] = 1;
FragChangedPixels++;
}
}
// Pixel 7
if ( Diff[7] >= ppi->LevelThresh )
{
SgcPtr[0]++;
if ( Diff[7] > ppi->SrfThresh )
{
bits_map_ptr[7] = 1;
FragChangedPixels++;
}
}
else if ( Diff[7] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
if ( Diff[7] < ppi->NegSrfThresh )
{
bits_map_ptr[7] = 1;
FragChangedPixels++;
}
}
// Clear down entries in changed locals array
((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
}
else
{
// For EBO coded blocks mark all pixels as changed.
if ( *DispFragPtr > BLOCK_NOT_CODED )
{
((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
}
else
{
((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
}
}
*RowDiffsPtr += FragChangedPixels;
*FDiffPixels += (UINT8)FragChangedPixels;
YuvPtr1 += ppi->HFragPixels;
YuvPtr2 += ppi->HFragPixels;
bits_map_ptr += ppi->HFragPixels;
ChLocalsPtr += ppi->HFragPixels;
YUVDiffsPtr += ppi->HFragPixels;
SgcPtr ++;
FDiffPixels ++;
// If we have a lot of changed pixels for this fragment on this row then
// the fragment is almost sure to be picked (e.g. through the line search) so we
// can mark it as selected and then ignore it.
// if ( ppi->EarlyBreakAllowed )
{
if (FragChangedPixels >= 7)
{
*DispFragPtr = BLOCK_CODED;
}
}
DispFragPtr++;
}
}
else
{
for ( i = 0; i < ppi->PlaneWidth; i += ppi->HFragPixels )
{
// Reset count of pixels changed for the current fragment.
FragChangedPixels = 0;
// Test for break out conditions to save time.
if ((*DispFragPtr == CANDIDATE_BLOCK) )//|| !ppi->EarlyBreakAllowed)
{
// Calculate the diference values and copy to YUVDiffsPtr
Diff[0] = ((INT16)YuvPtr1[0]) - ((INT16)YuvPtr2[0]);
Diff[1] = ((INT16)YuvPtr1[1]) - ((INT16)YuvPtr2[1]);
Diff[2] = ((INT16)YuvPtr1[2]) - ((INT16)YuvPtr2[2]);
Diff[3] = ((INT16)YuvPtr1[3]) - ((INT16)YuvPtr2[3]);
((INT32 *)YUVDiffsPtr)[0] = ((INT32 *)Diff)[0];
((INT32 *)YUVDiffsPtr)[1] = ((INT32 *)Diff)[1];
// Test against the Level and ppi->SRF thresholds and record the results
// Pixel 0
if ( Diff[0] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[0] > ppi->SrfThresh) && (Diff[0] <= ppi->HighChange) )
Diff[0] = (int)ApplyPakLowPass( ppi, &YuvPtr1[0] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[0] );
if ( Diff[0] > ppi->SrfThresh )
{
bits_map_ptr[0] = 1;
FragChangedPixels++;
}
}
else if ( Diff[0] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[0] < ppi->NegSrfThresh) && (Diff[0] >= ppi->NegHighChange) )
Diff[0] = (int)ApplyPakLowPass( ppi, &YuvPtr1[0] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[0] );
if ( Diff[0] < ppi->NegSrfThresh )
{
bits_map_ptr[0] = 1;
FragChangedPixels++;
}
}
// Pixel 1
if ( Diff[1] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[1] > ppi->SrfThresh) && (Diff[1] <= ppi->HighChange) )
Diff[1] = (int)ApplyPakLowPass( ppi, &YuvPtr1[1] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[1] );
if ( Diff[1] > ppi->SrfThresh )
{
bits_map_ptr[1] = 1;
FragChangedPixels++;
}
}
else if ( Diff[1] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[1] < ppi->NegSrfThresh) && (Diff[1] >= ppi->NegHighChange) )
Diff[1] = (int)ApplyPakLowPass( ppi, &YuvPtr1[1] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[1] );
if ( Diff[1] < ppi->NegSrfThresh )
{
bits_map_ptr[1] = 1;
FragChangedPixels++;
}
}
// Pixel 2
if ( Diff[2] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[2] > ppi->SrfThresh) && (Diff[2] <= ppi->HighChange) )
Diff[2] = (int)ApplyPakLowPass( ppi, &YuvPtr1[2] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[2] );
if ( Diff[2] > ppi->SrfThresh )
{
bits_map_ptr[2] = 1;
FragChangedPixels++;
}
}
else if ( Diff[2] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[2] < ppi->NegSrfThresh) && (Diff[2] >= ppi->NegHighChange) )
Diff[2] = (int)ApplyPakLowPass( ppi, &YuvPtr1[2] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[2] );
if ( Diff[2] < ppi->NegSrfThresh )
{
bits_map_ptr[2] = 1;
FragChangedPixels++;
}
}
// Pixel 3
if ( Diff[3] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[3] > ppi->SrfThresh) && (Diff[3] <= ppi->HighChange) )
Diff[3] = (int)ApplyPakLowPass( ppi, &YuvPtr1[3] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[3] );
if ( Diff[3] > ppi->SrfThresh )
{
bits_map_ptr[3] = 1;
FragChangedPixels++;
}
}
else if ( Diff[3] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[3] < ppi->NegSrfThresh) && (Diff[3] >= ppi->NegHighChange) )
Diff[3] = (int)ApplyPakLowPass( ppi, &YuvPtr1[3] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[3] );
if ( Diff[3] < ppi->NegSrfThresh )
{
bits_map_ptr[3] = 1;
FragChangedPixels++;
}
}
// Clear down entries in changed locals array
((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
// Calculate the diference values and copy to YUVDiffsPtr
Diff[4] = ((INT16)YuvPtr1[4]) - ((INT16)YuvPtr2[4]);
Diff[5] = ((INT16)YuvPtr1[5]) - ((INT16)YuvPtr2[5]);
Diff[6] = ((INT16)YuvPtr1[6]) - ((INT16)YuvPtr2[6]);
Diff[7] = ((INT16)YuvPtr1[7]) - ((INT16)YuvPtr2[7]);
((INT32 *)YUVDiffsPtr)[2] = ((INT32 *)Diff)[2];
((INT32 *)YUVDiffsPtr)[3] = ((INT32 *)Diff)[3];
// Test against the Level and ppi->SRF thresholds and record the results
// Pixel 4
if ( Diff[4] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[4] > ppi->SrfThresh) && (Diff[4] <= ppi->HighChange) )
Diff[4] = (int)ApplyPakLowPass( ppi, &YuvPtr1[4] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[4] );
if ( Diff[4] > ppi->SrfThresh )
{
bits_map_ptr[4] = 1;
FragChangedPixels++;
}
}
else if ( Diff[4] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[4] < ppi->NegSrfThresh) && (Diff[4] >= ppi->NegHighChange) )
Diff[4] = (int)ApplyPakLowPass( ppi, &YuvPtr1[4] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[4] );
if ( Diff[4] < ppi->NegSrfThresh )
{
bits_map_ptr[4] = 1;
FragChangedPixels++;
}
}
// Pixel 5
if ( Diff[5] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[5] > ppi->SrfThresh) && (Diff[5] <= ppi->HighChange) )
Diff[5] = (int)ApplyPakLowPass( ppi, &YuvPtr1[5] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[5] );
if ( Diff[5] > ppi->SrfThresh )
{
bits_map_ptr[5] = 1;
FragChangedPixels++;
}
}
else if ( Diff[5] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[5] < ppi->NegSrfThresh) && (Diff[5] >= ppi->NegHighChange) )
Diff[5] = (int)ApplyPakLowPass( ppi, &YuvPtr1[5] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[5] );
if ( Diff[5] < ppi->NegSrfThresh )
{
bits_map_ptr[5] = 1;
FragChangedPixels++;
}
}
// Pixel 6
if ( Diff[6] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[6] > ppi->SrfThresh) && (Diff[6] <= ppi->HighChange) )
Diff[6] = (int)ApplyPakLowPass( ppi, &YuvPtr1[6] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[6] );
if ( Diff[6] > ppi->SrfThresh )
{
bits_map_ptr[6] = 1;
FragChangedPixels++;
}
}
else if ( Diff[6] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[6] < ppi->NegSrfThresh) && (Diff[6] >= ppi->NegHighChange) )
Diff[6] = (int)ApplyPakLowPass( ppi, &YuvPtr1[6] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[6] );
if ( Diff[6] < ppi->NegSrfThresh )
{
bits_map_ptr[6] = 1;
FragChangedPixels++;
}
}
// Pixel 7
if ( Diff[7] >= ppi->LevelThresh )
{
SgcPtr[0]++;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[7] > ppi->SrfThresh) && (Diff[7] <= ppi->HighChange) )
Diff[7] = (int)ApplyPakLowPass( ppi, &YuvPtr1[7] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[7] );
if ( Diff[7] > ppi->SrfThresh )
{
bits_map_ptr[7] = 1;
FragChangedPixels++;
}
}
else if ( Diff[7] <= ppi->NegLevelThresh )
{
SgcPtr[0]--;
// If the level change is still suspect then apply PAK kernel.
if ( (Diff[7] < ppi->NegSrfThresh) && (Diff[7] >= ppi->NegHighChange) )
Diff[7] = (int)ApplyPakLowPass( ppi, &YuvPtr1[7] ) -
(int)ApplyPakLowPass( ppi, &YuvPtr2[7] );
if ( Diff[7] < ppi->NegSrfThresh )
{
bits_map_ptr[7] = 1;
FragChangedPixels++;
}
}
// Clear down entries in changed locals array
((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
}
else
{
// For EBO coded blocks mark all pixels as changed.
if ( *DispFragPtr > BLOCK_NOT_CODED )
{
((UINT32 *)bits_map_ptr)[0] = ((UINT32 *)OneData)[0];
((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ChlocalsDummyData)[0];
((UINT32 *)bits_map_ptr)[1] = ((UINT32 *)OneData)[1];
((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ChlocalsDummyData)[1];
}
else
{
((UINT32 *)ChLocalsPtr)[0] = ((UINT32 *)ZeroData)[0];
((UINT32 *)ChLocalsPtr)[1] = ((UINT32 *)ZeroData)[1];
}
}
*RowDiffsPtr += FragChangedPixels;
*FDiffPixels += (UINT8)FragChangedPixels;
YuvPtr1 += ppi->HFragPixels;
YuvPtr2 += ppi->HFragPixels;
bits_map_ptr += ppi->HFragPixels;
ChLocalsPtr += ppi->HFragPixels;
YUVDiffsPtr += ppi->HFragPixels;
SgcPtr ++;
FDiffPixels ++;
// If we have a lot of changed pixels for this fragment on this row then
// the fragment is almost sure to be picked (e.g. through the line search) so we
// can mark it as selected and then ignore it.
// if ( ppi->EarlyBreakAllowed )
{
if (FragChangedPixels >= 7)
{
*DispFragPtr = BLOCK_CODED;
}
}
DispFragPtr++;
}
}
}
@@ -0,0 +1,88 @@
;------------------------------------------------
XmmRowSADParams STRUC
dd ? ;1 pushed regs
dd ? ;return address
NewDataPtr dd ?
RefDataPtr dd ?
XmmRowSADParams ENDS
;------------------------------------------------
INCLUDE iaxmm.inc
.586
.387
.MODEL flat, SYSCALL, os_dos
.MMX
; macros
.DATA
TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
ALIGN 32
.CODE
NAME XmmRowSAD
PUBLIC XmmRowSAD_
PUBLIC _XmmRowSAD
;------------------------------------------------
; local vars
LOCAL_SPACE EQU 0
;------------------------------------------------
;UINT32 XmmRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr)
;
XmmRowSAD_:
_XmmRowSAD:
push ebx
mov eax,(XmmRowSADParams PTR [esp]).NewDataPtr ; Load base addresses
mov ebx,(XmmRowSADParams PTR [esp]).RefDataPtr
;
; ESP = Stack Pointer MM0 = Free
; ESI = Free MM1 = Free
; EDI = Free MM2 = Free
; EBP = Free MM3 = Free
; EBX = RefDataPtr MM4 = Free
; ECX = PixelsPerLine MM5 = Free
; EDX = PixelsPerLine + STRIDE_EXTRA MM6 = Free
; EAX = NewDataPtr MM7 = Free
;
movq mm0, QWORD PTR [eax] ; copy eight bytes from NewDataPtr to mm0
movq mm3, QWORD PTR [ebx] ; copy eight bytes from ReconDataPtr to mm3
pxor mm1, mm1 ; clear mm1 for unpacking
movq mm2, mm0 ; make a copy
movq mm4, mm3 ; make a copy
punpcklbw mm0, mm1 ; unpack the lower four bytes
punpcklbw mm3, mm1 ; unpack the lower four bytes
psadbw mm0, mm3 ; sum of absolute difference of four bytes
punpckhbw mm2, mm1 ; unpack the higher four bytes
punpckhbw mm4, mm1 ; unpack the higher four bytes
psadbw mm2, mm4 ; sum of absolute difference of another four
pop ebx
pmaxsw mm0, mm2 ; get the max
movd eax, mm0 ; return value
ret
;************************************************
END
END