From 01c7f267d7c1a4112a03642a23a513fdfc8d1e93 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Wed, 10 Apr 2013 16:55:31 +0300 Subject: [PATCH] Added CPUID fetch assembly functions (x86 and x64) --- build/VS2010/HEVC_encoder.vcxproj | 11 ++++++++- build/VS2010/HEVC_encoder.vcxproj.filters | 26 ++++++++++++++++++++ src/encmain.c | 29 ++++++++++++++++++++++- src/encoder.c | 12 +++++++--- src/intra.c | 2 +- src/picture.h | 14 ++++++++++- src/transform.c | 16 ++++++------- src/x64/test64.asm | 15 ++++++++++++ src/x64/test64.h | 19 +++++++++++++++ src/x86/test.asm | 13 ++++++++++ src/x86/test.h | 19 +++++++++++++++ 11 files changed, 161 insertions(+), 15 deletions(-) create mode 100644 src/x64/test64.asm create mode 100644 src/x64/test64.h create mode 100644 src/x86/test.asm create mode 100644 src/x86/test.h diff --git a/build/VS2010/HEVC_encoder.vcxproj b/build/VS2010/HEVC_encoder.vcxproj index a3d7e2db..81a70754 100644 --- a/build/VS2010/HEVC_encoder.vcxproj +++ b/build/VS2010/HEVC_encoder.vcxproj @@ -48,6 +48,7 @@ + @@ -99,7 +100,7 @@ Level3 Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) CompileAsC @@ -145,6 +146,7 @@ Default Speed /MP %(AdditionalOptions) + Fast Console @@ -179,8 +181,15 @@ + + + + + + + \ No newline at end of file diff --git a/build/VS2010/HEVC_encoder.vcxproj.filters b/build/VS2010/HEVC_encoder.vcxproj.filters index 8fea39dc..7432deb2 100644 --- a/build/VS2010/HEVC_encoder.vcxproj.filters +++ b/build/VS2010/HEVC_encoder.vcxproj.filters @@ -13,6 +13,18 @@ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + {9e551421-7564-43be-a2b6-1ffb16d744a4} + + + {d1533c70-cb9a-419d-9a38-5161894ec359} + + + {cbe787b6-15a0-4c72-b658-6f444735fd73} + + + {39fb72d7-bfcd-4505-9a00-7ed6bf67d9ad} + @@ -83,5 +95,19 @@ Header Files + + Header Files\x86 + + + Header Files\x86 + + + + + Source Files\x86 + + + Source Files\x64 + \ No newline at end of file diff --git a/src/encmain.c b/src/encmain.c index d0d7d3b4..d932b9a5 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -45,6 +45,12 @@ #include "picture.h" #include "transform.h" + /* Assembly optimizations */ +#ifndef X64 + #include "x86/test.h" +#else + #include "x64/test64.h" +#endif /*! \brief Program main function. @@ -53,7 +59,9 @@ \return Program exit state */ int main(int argc, char* argv[]) - { + { + int ecx = 0,edx =0; + enum { BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26}; uint32_t curFrame = 0; config *cfg = NULL; /* Global configuration */ FILE *input = NULL; @@ -63,6 +71,25 @@ FILE *recout = fopen("encrec.yuv","wb"); #endif encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));; + + /* CPU id */ + + printf("Checking for CPU features...\r\n"); + #ifndef X64 + cpuId(&ecx,&edx); + #else + cpuId64(&ecx,&edx); + #endif + //printf("CPUID ECX: %X EDX: %X\r\n", ecx, edx); + printf("CPU features enabled: "); + if(edx & (1<type == CU_INTRA) { uint8_t intraPredMode = 1; - uint8_t intraPredModeChroma =36; /* 36 = Chroma derived from luma */ + uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */ int8_t intraPreds[3] = {-1, -1, -1}; int8_t mpmPred = -1; int i; @@ -746,9 +746,15 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui { intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth); } + /* ToDo: separate chroma prediction(?) */ /* intraPredModeChroma = 1; */ + + if(intraPredModeChroma != 36 && intraPredModeChroma == intraPredMode) + { + intraPredModeChroma = 36; + } intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1); intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1); intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2); diff --git a/src/intra.c b/src/intra.c index b115e784..f3a216ba 100644 --- a/src/intra.c +++ b/src/intra.c @@ -290,7 +290,7 @@ int16_t intra_prediction(uint8_t* orig,int32_t origstride,int16_t* rec,int32_t r if(MIN(abs(i-26),abs(i-10)) <= threshold) { intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter); - //CHECK_FOR_BEST(i); + CHECK_FOR_BEST(i); } } diff --git a/src/picture.h b/src/picture.h index 0d9eaa2b..abead2d4 100644 --- a/src/picture.h +++ b/src/picture.h @@ -34,7 +34,7 @@ enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER }; #define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); } /*! - \brief Struct for CU info + \brief Struct for CU intra info */ typedef struct { @@ -42,6 +42,16 @@ typedef struct uint32_t cost; } CU_info_intra; +/*! + \brief Struct for CU inter info +*/ +typedef struct +{ + uint8_t mode; + uint32_t cost; + int16_t mv[2]; +} CU_info_inter; + /*! \brief Struct for CU info @@ -49,7 +59,9 @@ typedef struct typedef struct { uint8_t type; + int8_t coded; CU_info_intra intra; + CU_info_inter inter; uint8_t split; } CU_info; diff --git a/src/transform.c b/src/transform.c index dd243bd0..474d60d7 100644 --- a/src/transform.c +++ b/src/transform.c @@ -249,10 +249,10 @@ void scalinglist_processDec( int32_t *coeff, int32_t *dequantcoeff, int32_t invQ void scalinglist_set(int32_t *coeff, uint32_t listId, uint32_t sizeId, uint32_t qp) { - uint32_t width = g_scalingListSizeX[sizeId]; + uint32_t width = g_scalingListSizeX[sizeId]; uint32_t height = g_scalingListSizeX[sizeId]; - uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]); - int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp]; + uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]); + int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp]; int32_t *dequantcoeff = g_de_quant_coeff[sizeId][listId][qp]; scalinglist_processEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,MIN(8,g_scalingListSizeX[sizeId]),/*SCALING_LIST_DC*/16, 0); @@ -667,13 +667,13 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMode int16_t tmp[LCU_WIDTH*LCU_WIDTH]; if(blockSize== 4) - { + {/* if (uiMode != 65535) { fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output } - else + else*/ { partialButterfly4(block, tmp, shift_1st, blockSize); partialButterfly4(tmp, coeff, shift_2nd, blockSize); @@ -718,13 +718,13 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMod int16_t tmp[LCU_WIDTH*LCU_WIDTH]; if( blockSize == 4) - { + {/* if (uiMode != 65535) { fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output } - else + else*/ { partialButterflyInverse4(coeff,tmp,shift_1st,blockSize); partialButterflyInverse4(tmp,block,shift_2nd,blockSize); @@ -762,7 +762,7 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidt //uint32_t scanIdx = SCAN_DIAG; - scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ]; + scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ]; { int32_t deltaU[LCU_WIDTH*LCU_WIDTH] ; int32_t iQpBase = encoder->QP; diff --git a/src/x64/test64.asm b/src/x64/test64.asm new file mode 100644 index 00000000..30963055 --- /dev/null +++ b/src/x64/test64.asm @@ -0,0 +1,15 @@ +; Function to get CPUID for identifying CPU capabilities +bits 64 +section .code +global cpuId64 + +cpuId64: + mov r8, rcx ; pointer to ecx-output + mov r9, rdx ; pointer to edx-output + + mov eax,1 + cpuid + mov dword [r8], ecx + mov dword [r9], edx + mov eax,0 + ret \ No newline at end of file diff --git a/src/x64/test64.h b/src/x64/test64.h new file mode 100644 index 00000000..21fe9a71 --- /dev/null +++ b/src/x64/test64.h @@ -0,0 +1,19 @@ +/** + * Part of HEVC Encoder + * By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems. + */ + +/*! \file test.h + \brief test header + \author Marko Viitanen + \date 2013-04 + +*/ + +#ifndef _TEST64_H_ +#define _TEST64_H_ + +void __cdecl cpuId64(int* ecx, int *edx ); + + +#endif \ No newline at end of file diff --git a/src/x86/test.asm b/src/x86/test.asm new file mode 100644 index 00000000..e9626f5b --- /dev/null +++ b/src/x86/test.asm @@ -0,0 +1,13 @@ +; Function to get CPUID for identifying CPU capabilities +bits 32 +global _cpuId + +_cpuId: + mov eax,1 + cpuid + mov eax, dword [esp+4] + mov dword [eax], ecx + mov eax, dword [esp+8] + mov dword [eax], edx + mov eax,0 + ret \ No newline at end of file diff --git a/src/x86/test.h b/src/x86/test.h new file mode 100644 index 00000000..3448e699 --- /dev/null +++ b/src/x86/test.h @@ -0,0 +1,19 @@ +/** + * Part of HEVC Encoder + * By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems. + */ + +/*! \file test.h + \brief test header + \author Marko Viitanen + \date 2013-04 + +*/ + +#ifndef _TEST_H_ +#define _TEST_H_ + +void __cdecl cpuId(int* ecx, int *edx ); + + +#endif \ No newline at end of file