Added CPUID fetch assembly functions (x86 and x64)

This commit is contained in:
Marko Viitanen 2013-04-10 16:55:31 +03:00
parent 560917a532
commit 01c7f267d7
11 changed files with 161 additions and 15 deletions

View file

@ -48,6 +48,7 @@
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings"> <ImportGroup Label="ExtensionSettings">
<Import Project="..\..\..\..\..\yasm\vsyasm.props" />
</ImportGroup> </ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -99,7 +100,7 @@
</PrecompiledHeader> </PrecompiledHeader>
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CompileAs>CompileAsC</CompileAs> <CompileAs>CompileAsC</CompileAs>
</ClCompile> </ClCompile>
<Link> <Link>
@ -145,6 +146,7 @@
<InlineFunctionExpansion>Default</InlineFunctionExpansion> <InlineFunctionExpansion>Default</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Console</SubSystem> <SubSystem>Console</SubSystem>
@ -179,8 +181,15 @@
<ClInclude Include="..\..\src\nal.h" /> <ClInclude Include="..\..\src\nal.h" />
<ClInclude Include="..\..\src\picture.h" /> <ClInclude Include="..\..\src\picture.h" />
<ClInclude Include="..\..\src\transform.h" /> <ClInclude Include="..\..\src\transform.h" />
<ClInclude Include="..\..\src\x64\test64.h" />
<ClInclude Include="..\..\src\x86\test.h" />
</ItemGroup>
<ItemGroup>
<YASM Include="..\..\src\x64\test64.asm" />
<YASM Include="..\..\src\x86\test.asm" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
<Import Project="..\..\..\..\..\yasm\vsyasm.targets" />
</ImportGroup> </ImportGroup>
</Project> </Project>

View file

@ -13,6 +13,18 @@
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter> </Filter>
<Filter Include="Source Files\x86">
<UniqueIdentifier>{9e551421-7564-43be-a2b6-1ffb16d744a4}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\x86">
<UniqueIdentifier>{d1533c70-cb9a-419d-9a38-5161894ec359}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\x64">
<UniqueIdentifier>{cbe787b6-15a0-4c72-b658-6f444735fd73}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\x64">
<UniqueIdentifier>{39fb72d7-bfcd-4505-9a00-7ed6bf67d9ad}</UniqueIdentifier>
</Filter>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\..\src\encmain.c"> <ClCompile Include="..\..\src\encmain.c">
@ -83,5 +95,19 @@
<ClInclude Include="..\..\src\filter.h"> <ClInclude Include="..\..\src\filter.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\x86\test.h">
<Filter>Header Files\x86</Filter>
</ClInclude>
<ClInclude Include="..\..\src\x64\test64.h">
<Filter>Header Files\x86</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<YASM Include="..\..\src\x86\test.asm">
<Filter>Source Files\x86</Filter>
</YASM>
<YASM Include="..\..\src\x64\test64.asm">
<Filter>Source Files\x64</Filter>
</YASM>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -45,6 +45,12 @@
#include "picture.h" #include "picture.h"
#include "transform.h" #include "transform.h"
/* Assembly optimizations */
#ifndef X64
#include "x86/test.h"
#else
#include "x64/test64.h"
#endif
/*! /*!
\brief Program main function. \brief Program main function.
@ -54,6 +60,8 @@
*/ */
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
int ecx = 0,edx =0;
enum { BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26};
uint32_t curFrame = 0; uint32_t curFrame = 0;
config *cfg = NULL; /* Global configuration */ config *cfg = NULL; /* Global configuration */
FILE *input = NULL; FILE *input = NULL;
@ -64,6 +72,25 @@
#endif #endif
encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));; encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));;
/* CPU id */
printf("Checking for CPU features...\r\n");
#ifndef X64
cpuId(&ecx,&edx);
#else
cpuId64(&ecx,&edx);
#endif
//printf("CPUID ECX: %X EDX: %X\r\n", ecx, edx);
printf("CPU features enabled: ");
if(edx & (1<<BIT_MMX)) printf("MMX ");
if(edx & (1<<BIT_SSE)) printf("SSE ");
if(edx & (1<<BIT_SSE2)) printf("SSE2 ");
if(ecx & (1<<BIT_SSSE3)) printf("SSSE3 ");
if(ecx & (1<<BIT_SSE41)) printf("SSE4.1 ");
if(ecx & (1<<BIT_SSE42)) printf("SSE4.2 ");
printf("\r\n");
/* Handle configuration */ /* Handle configuration */
cfg = config_alloc(); cfg = config_alloc();

View file

@ -640,7 +640,7 @@ void encode_slice_data(encoder_control* encoder)
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth) void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
{ {
uint8_t split_flag = (depth<3)?1:0; /* ToDo: get from CU data */ uint8_t split_flag = (depth<1)?1:0; /* ToDo: get from CU data */
uint8_t split_model = 0; uint8_t split_model = 0;
/* Check for slice border */ /* Check for slice border */
@ -707,7 +707,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
if(cur_CU->type == CU_INTRA) if(cur_CU->type == CU_INTRA)
{ {
uint8_t intraPredMode = 1; uint8_t intraPredMode = 1;
uint8_t intraPredModeChroma =36; /* 36 = Chroma derived from luma */ uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */
int8_t intraPreds[3] = {-1, -1, -1}; int8_t intraPreds[3] = {-1, -1, -1};
int8_t mpmPred = -1; int8_t mpmPred = -1;
int i; int i;
@ -747,8 +747,14 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth); intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
} }
/* ToDo: separate chroma prediction(?) */ /* ToDo: separate chroma prediction(?) */
/* intraPredModeChroma = 1; */ /* intraPredModeChroma = 1; */
if(intraPredModeChroma != 36 && intraPredModeChroma == intraPredMode)
{
intraPredModeChroma = 36;
}
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1); intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1); intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2); intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);

View file

@ -290,7 +290,7 @@ int16_t intra_prediction(uint8_t* orig,int32_t origstride,int16_t* rec,int32_t r
if(MIN(abs(i-26),abs(i-10)) <= threshold) if(MIN(abs(i-26),abs(i-10)) <= threshold)
{ {
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter); intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
//CHECK_FOR_BEST(i); CHECK_FOR_BEST(i);
} }
} }

View file

@ -34,7 +34,7 @@ enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER };
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); } #define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
/*! /*!
\brief Struct for CU info \brief Struct for CU intra info
*/ */
typedef struct typedef struct
{ {
@ -42,6 +42,16 @@ typedef struct
uint32_t cost; uint32_t cost;
} CU_info_intra; } CU_info_intra;
/*!
\brief Struct for CU inter info
*/
typedef struct
{
uint8_t mode;
uint32_t cost;
int16_t mv[2];
} CU_info_inter;
/*! /*!
\brief Struct for CU info \brief Struct for CU info
@ -49,7 +59,9 @@ typedef struct
typedef struct typedef struct
{ {
uint8_t type; uint8_t type;
int8_t coded;
CU_info_intra intra; CU_info_intra intra;
CU_info_inter inter;
uint8_t split; uint8_t split;
} CU_info; } CU_info;

View file

@ -667,13 +667,13 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMode
int16_t tmp[LCU_WIDTH*LCU_WIDTH]; int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if(blockSize== 4) if(blockSize== 4)
{ {/*
if (uiMode != 65535) if (uiMode != 65535)
{ {
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
} }
else else*/
{ {
partialButterfly4(block, tmp, shift_1st, blockSize); partialButterfly4(block, tmp, shift_1st, blockSize);
partialButterfly4(tmp, coeff, shift_2nd, blockSize); partialButterfly4(tmp, coeff, shift_2nd, blockSize);
@ -718,13 +718,13 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMod
int16_t tmp[LCU_WIDTH*LCU_WIDTH]; int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if( blockSize == 4) if( blockSize == 4)
{ {/*
if (uiMode != 65535) if (uiMode != 65535)
{ {
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
} }
else else*/
{ {
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize); partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
partialButterflyInverse4(tmp,block,shift_2nd,blockSize); partialButterflyInverse4(tmp,block,shift_2nd,blockSize);

15
src/x64/test64.asm Normal file
View file

@ -0,0 +1,15 @@
; Function to get CPUID for identifying CPU capabilities
bits 64
section .code
global cpuId64
cpuId64:
mov r8, rcx ; pointer to ecx-output
mov r9, rdx ; pointer to edx-output
mov eax,1
cpuid
mov dword [r8], ecx
mov dword [r9], edx
mov eax,0
ret

19
src/x64/test64.h Normal file
View file

@ -0,0 +1,19 @@
/**
* Part of HEVC Encoder
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
*/
/*! \file test.h
\brief test header
\author Marko Viitanen
\date 2013-04
*/
#ifndef _TEST64_H_
#define _TEST64_H_
void __cdecl cpuId64(int* ecx, int *edx );
#endif

13
src/x86/test.asm Normal file
View file

@ -0,0 +1,13 @@
; Function to get CPUID for identifying CPU capabilities
bits 32
global _cpuId
_cpuId:
mov eax,1
cpuid
mov eax, dword [esp+4]
mov dword [eax], ecx
mov eax, dword [esp+8]
mov dword [eax], edx
mov eax,0
ret

19
src/x86/test.h Normal file
View file

@ -0,0 +1,19 @@
/**
* Part of HEVC Encoder
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
*/
/*! \file test.h
\brief test header
\author Marko Viitanen
\date 2013-04
*/
#ifndef _TEST_H_
#define _TEST_H_
void __cdecl cpuId(int* ecx, int *edx );
#endif