Added CPUID fetch assembly functions (x86 and x64)

This commit is contained in:
Marko Viitanen 2013-04-10 16:55:31 +03:00
parent 560917a532
commit 01c7f267d7
11 changed files with 161 additions and 15 deletions

View file

@ -48,6 +48,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="..\..\..\..\..\yasm\vsyasm.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@ -99,7 +100,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CompileAs>CompileAsC</CompileAs>
</ClCompile>
<Link>
@ -145,6 +146,7 @@
<InlineFunctionExpansion>Default</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@ -179,8 +181,15 @@
<ClInclude Include="..\..\src\nal.h" />
<ClInclude Include="..\..\src\picture.h" />
<ClInclude Include="..\..\src\transform.h" />
<ClInclude Include="..\..\src\x64\test64.h" />
<ClInclude Include="..\..\src\x86\test.h" />
</ItemGroup>
<ItemGroup>
<YASM Include="..\..\src\x64\test64.asm" />
<YASM Include="..\..\src\x86\test.asm" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="..\..\..\..\..\yasm\vsyasm.targets" />
</ImportGroup>
</Project>

View file

@ -13,6 +13,18 @@
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
<Filter Include="Source Files\x86">
<UniqueIdentifier>{9e551421-7564-43be-a2b6-1ffb16d744a4}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\x86">
<UniqueIdentifier>{d1533c70-cb9a-419d-9a38-5161894ec359}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\x64">
<UniqueIdentifier>{cbe787b6-15a0-4c72-b658-6f444735fd73}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\x64">
<UniqueIdentifier>{39fb72d7-bfcd-4505-9a00-7ed6bf67d9ad}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\encmain.c">
@ -83,5 +95,19 @@
<ClInclude Include="..\..\src\filter.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\src\x86\test.h">
<Filter>Header Files\x86</Filter>
</ClInclude>
<ClInclude Include="..\..\src\x64\test64.h">
<Filter>Header Files\x86</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<YASM Include="..\..\src\x86\test.asm">
<Filter>Source Files\x86</Filter>
</YASM>
<YASM Include="..\..\src\x64\test64.asm">
<Filter>Source Files\x64</Filter>
</YASM>
</ItemGroup>
</Project>

View file

@ -45,6 +45,12 @@
#include "picture.h"
#include "transform.h"
/* Assembly optimizations */
#ifndef X64
#include "x86/test.h"
#else
#include "x64/test64.h"
#endif
/*!
\brief Program main function.
@ -53,7 +59,9 @@
\return Program exit state
*/
int main(int argc, char* argv[])
{
{
int ecx = 0,edx =0;
enum { BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26};
uint32_t curFrame = 0;
config *cfg = NULL; /* Global configuration */
FILE *input = NULL;
@ -63,6 +71,25 @@
FILE *recout = fopen("encrec.yuv","wb");
#endif
encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));;
/* CPU id */
printf("Checking for CPU features...\r\n");
#ifndef X64
cpuId(&ecx,&edx);
#else
cpuId64(&ecx,&edx);
#endif
//printf("CPUID ECX: %X EDX: %X\r\n", ecx, edx);
printf("CPU features enabled: ");
if(edx & (1<<BIT_MMX)) printf("MMX ");
if(edx & (1<<BIT_SSE)) printf("SSE ");
if(edx & (1<<BIT_SSE2)) printf("SSE2 ");
if(ecx & (1<<BIT_SSSE3)) printf("SSSE3 ");
if(ecx & (1<<BIT_SSE41)) printf("SSE4.1 ");
if(ecx & (1<<BIT_SSE42)) printf("SSE4.2 ");
printf("\r\n");
/* Handle configuration */
cfg = config_alloc();

View file

@ -128,7 +128,7 @@ void initSigLastScan(uint32_t* pBuffD, uint32_t* pBuffH, uint32_t* pBuffV, int32
{
for(blkY=0; blkY < numBlkSide; blkY++)
{
uint32_t offset = blkY * 4 * iWidth + blkX * 4;
uint32_t offset = blkY * 4 * iWidth + blkX * 4;
for(x=0; x < 4; x++)
{
for(y=0; y < 4; y++)
@ -640,7 +640,7 @@ void encode_slice_data(encoder_control* encoder)
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
{
uint8_t split_flag = (depth<3)?1:0; /* ToDo: get from CU data */
uint8_t split_flag = (depth<1)?1:0; /* ToDo: get from CU data */
uint8_t split_model = 0;
/* Check for slice border */
@ -707,7 +707,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
if(cur_CU->type == CU_INTRA)
{
uint8_t intraPredMode = 1;
uint8_t intraPredModeChroma =36; /* 36 = Chroma derived from luma */
uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */
int8_t intraPreds[3] = {-1, -1, -1};
int8_t mpmPred = -1;
int i;
@ -746,9 +746,15 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
{
intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
}
/* ToDo: separate chroma prediction(?) */
/* intraPredModeChroma = 1; */
if(intraPredModeChroma != 36 && intraPredModeChroma == intraPredMode)
{
intraPredModeChroma = 36;
}
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);

View file

@ -290,7 +290,7 @@ int16_t intra_prediction(uint8_t* orig,int32_t origstride,int16_t* rec,int32_t r
if(MIN(abs(i-26),abs(i-10)) <= threshold)
{
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
//CHECK_FOR_BEST(i);
CHECK_FOR_BEST(i);
}
}

View file

@ -34,7 +34,7 @@ enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER };
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
/*!
\brief Struct for CU info
\brief Struct for CU intra info
*/
typedef struct
{
@ -42,6 +42,16 @@ typedef struct
uint32_t cost;
} CU_info_intra;
/*!
\brief Struct for CU inter info
*/
typedef struct
{
uint8_t mode;
uint32_t cost;
int16_t mv[2];
} CU_info_inter;
/*!
\brief Struct for CU info
@ -49,7 +59,9 @@ typedef struct
typedef struct
{
uint8_t type;
int8_t coded;
CU_info_intra intra;
CU_info_inter inter;
uint8_t split;
} CU_info;

View file

@ -249,10 +249,10 @@ void scalinglist_processDec( int32_t *coeff, int32_t *dequantcoeff, int32_t invQ
void scalinglist_set(int32_t *coeff, uint32_t listId, uint32_t sizeId, uint32_t qp)
{
uint32_t width = g_scalingListSizeX[sizeId];
uint32_t width = g_scalingListSizeX[sizeId];
uint32_t height = g_scalingListSizeX[sizeId];
uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]);
int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp];
uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]);
int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp];
int32_t *dequantcoeff = g_de_quant_coeff[sizeId][listId][qp];
scalinglist_processEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,MIN(8,g_scalingListSizeX[sizeId]),/*SCALING_LIST_DC*/16, 0);
@ -667,13 +667,13 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMode
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if(blockSize== 4)
{
{/*
if (uiMode != 65535)
{
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
}
else
else*/
{
partialButterfly4(block, tmp, shift_1st, blockSize);
partialButterfly4(tmp, coeff, shift_2nd, blockSize);
@ -718,13 +718,13 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMod
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if( blockSize == 4)
{
{/*
if (uiMode != 65535)
{
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
}
else
else*/
{
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
partialButterflyInverse4(tmp,block,shift_2nd,blockSize);
@ -762,7 +762,7 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidt
//uint32_t scanIdx = SCAN_DIAG;
scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
{
int32_t deltaU[LCU_WIDTH*LCU_WIDTH] ;
int32_t iQpBase = encoder->QP;

15
src/x64/test64.asm Normal file
View file

@ -0,0 +1,15 @@
; Function to get CPUID for identifying CPU capabilities
bits 64
section .code
global cpuId64
cpuId64:
mov r8, rcx ; pointer to ecx-output
mov r9, rdx ; pointer to edx-output
mov eax,1
cpuid
mov dword [r8], ecx
mov dword [r9], edx
mov eax,0
ret

19
src/x64/test64.h Normal file
View file

@ -0,0 +1,19 @@
/**
* Part of HEVC Encoder
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
*/
/*! \file test.h
\brief test header
\author Marko Viitanen
\date 2013-04
*/
#ifndef _TEST64_H_
#define _TEST64_H_
void __cdecl cpuId64(int* ecx, int *edx );
#endif

13
src/x86/test.asm Normal file
View file

@ -0,0 +1,13 @@
; Function to get CPUID for identifying CPU capabilities
bits 32
global _cpuId
_cpuId:
mov eax,1
cpuid
mov eax, dword [esp+4]
mov dword [eax], ecx
mov eax, dword [esp+8]
mov dword [eax], edx
mov eax,0
ret

19
src/x86/test.h Normal file
View file

@ -0,0 +1,19 @@
/**
* Part of HEVC Encoder
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
*/
/*! \file test.h
\brief test header
\author Marko Viitanen
\date 2013-04
*/
#ifndef _TEST_H_
#define _TEST_H_
void __cdecl cpuId(int* ecx, int *edx );
#endif