mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Added CPUID fetch assembly functions (x86 and x64)
This commit is contained in:
parent
560917a532
commit
01c7f267d7
|
@ -48,6 +48,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
<Import Project="..\..\..\..\..\yasm\vsyasm.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
@ -99,7 +100,7 @@
|
||||||
</PrecompiledHeader>
|
</PrecompiledHeader>
|
||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<CompileAs>CompileAsC</CompileAs>
|
<CompileAs>CompileAsC</CompileAs>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
|
@ -145,6 +146,7 @@
|
||||||
<InlineFunctionExpansion>Default</InlineFunctionExpansion>
|
<InlineFunctionExpansion>Default</InlineFunctionExpansion>
|
||||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||||
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<FloatingPointModel>Fast</FloatingPointModel>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
|
@ -179,8 +181,15 @@
|
||||||
<ClInclude Include="..\..\src\nal.h" />
|
<ClInclude Include="..\..\src\nal.h" />
|
||||||
<ClInclude Include="..\..\src\picture.h" />
|
<ClInclude Include="..\..\src\picture.h" />
|
||||||
<ClInclude Include="..\..\src\transform.h" />
|
<ClInclude Include="..\..\src\transform.h" />
|
||||||
|
<ClInclude Include="..\..\src\x64\test64.h" />
|
||||||
|
<ClInclude Include="..\..\src\x86\test.h" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<YASM Include="..\..\src\x64\test64.asm" />
|
||||||
|
<YASM Include="..\..\src\x86\test.asm" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
<Import Project="..\..\..\..\..\yasm\vsyasm.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
|
@ -13,6 +13,18 @@
|
||||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||||
</Filter>
|
</Filter>
|
||||||
|
<Filter Include="Source Files\x86">
|
||||||
|
<UniqueIdentifier>{9e551421-7564-43be-a2b6-1ffb16d744a4}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Header Files\x86">
|
||||||
|
<UniqueIdentifier>{d1533c70-cb9a-419d-9a38-5161894ec359}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Header Files\x64">
|
||||||
|
<UniqueIdentifier>{cbe787b6-15a0-4c72-b658-6f444735fd73}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Source Files\x64">
|
||||||
|
<UniqueIdentifier>{39fb72d7-bfcd-4505-9a00-7ed6bf67d9ad}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="..\..\src\encmain.c">
|
<ClCompile Include="..\..\src\encmain.c">
|
||||||
|
@ -83,5 +95,19 @@
|
||||||
<ClInclude Include="..\..\src\filter.h">
|
<ClInclude Include="..\..\src\filter.h">
|
||||||
<Filter>Header Files</Filter>
|
<Filter>Header Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\src\x86\test.h">
|
||||||
|
<Filter>Header Files\x86</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\src\x64\test64.h">
|
||||||
|
<Filter>Header Files\x86</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<YASM Include="..\..\src\x86\test.asm">
|
||||||
|
<Filter>Source Files\x86</Filter>
|
||||||
|
</YASM>
|
||||||
|
<YASM Include="..\..\src\x64\test64.asm">
|
||||||
|
<Filter>Source Files\x64</Filter>
|
||||||
|
</YASM>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
|
@ -45,6 +45,12 @@
|
||||||
#include "picture.h"
|
#include "picture.h"
|
||||||
#include "transform.h"
|
#include "transform.h"
|
||||||
|
|
||||||
|
/* Assembly optimizations */
|
||||||
|
#ifndef X64
|
||||||
|
#include "x86/test.h"
|
||||||
|
#else
|
||||||
|
#include "x64/test64.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\brief Program main function.
|
\brief Program main function.
|
||||||
|
@ -54,6 +60,8 @@
|
||||||
*/
|
*/
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
|
int ecx = 0,edx =0;
|
||||||
|
enum { BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26};
|
||||||
uint32_t curFrame = 0;
|
uint32_t curFrame = 0;
|
||||||
config *cfg = NULL; /* Global configuration */
|
config *cfg = NULL; /* Global configuration */
|
||||||
FILE *input = NULL;
|
FILE *input = NULL;
|
||||||
|
@ -64,6 +72,25 @@
|
||||||
#endif
|
#endif
|
||||||
encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));;
|
encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));;
|
||||||
|
|
||||||
|
/* CPU id */
|
||||||
|
|
||||||
|
printf("Checking for CPU features...\r\n");
|
||||||
|
#ifndef X64
|
||||||
|
cpuId(&ecx,&edx);
|
||||||
|
#else
|
||||||
|
cpuId64(&ecx,&edx);
|
||||||
|
#endif
|
||||||
|
//printf("CPUID ECX: %X EDX: %X\r\n", ecx, edx);
|
||||||
|
printf("CPU features enabled: ");
|
||||||
|
if(edx & (1<<BIT_MMX)) printf("MMX ");
|
||||||
|
if(edx & (1<<BIT_SSE)) printf("SSE ");
|
||||||
|
if(edx & (1<<BIT_SSE2)) printf("SSE2 ");
|
||||||
|
if(ecx & (1<<BIT_SSSE3)) printf("SSSE3 ");
|
||||||
|
if(ecx & (1<<BIT_SSE41)) printf("SSE4.1 ");
|
||||||
|
if(ecx & (1<<BIT_SSE42)) printf("SSE4.2 ");
|
||||||
|
printf("\r\n");
|
||||||
|
|
||||||
|
|
||||||
/* Handle configuration */
|
/* Handle configuration */
|
||||||
cfg = config_alloc();
|
cfg = config_alloc();
|
||||||
|
|
||||||
|
|
|
@ -640,7 +640,7 @@ void encode_slice_data(encoder_control* encoder)
|
||||||
|
|
||||||
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
|
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
|
||||||
{
|
{
|
||||||
uint8_t split_flag = (depth<3)?1:0; /* ToDo: get from CU data */
|
uint8_t split_flag = (depth<1)?1:0; /* ToDo: get from CU data */
|
||||||
uint8_t split_model = 0;
|
uint8_t split_model = 0;
|
||||||
|
|
||||||
/* Check for slice border */
|
/* Check for slice border */
|
||||||
|
@ -707,7 +707,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
|
||||||
if(cur_CU->type == CU_INTRA)
|
if(cur_CU->type == CU_INTRA)
|
||||||
{
|
{
|
||||||
uint8_t intraPredMode = 1;
|
uint8_t intraPredMode = 1;
|
||||||
uint8_t intraPredModeChroma =36; /* 36 = Chroma derived from luma */
|
uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */
|
||||||
int8_t intraPreds[3] = {-1, -1, -1};
|
int8_t intraPreds[3] = {-1, -1, -1};
|
||||||
int8_t mpmPred = -1;
|
int8_t mpmPred = -1;
|
||||||
int i;
|
int i;
|
||||||
|
@ -747,8 +747,14 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
|
||||||
intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
|
intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ToDo: separate chroma prediction(?) */
|
/* ToDo: separate chroma prediction(?) */
|
||||||
/* intraPredModeChroma = 1; */
|
/* intraPredModeChroma = 1; */
|
||||||
|
|
||||||
|
if(intraPredModeChroma != 36 && intraPredModeChroma == intraPredMode)
|
||||||
|
{
|
||||||
|
intraPredModeChroma = 36;
|
||||||
|
}
|
||||||
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
|
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
|
||||||
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
|
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
|
||||||
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
|
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
|
||||||
|
|
|
@ -290,7 +290,7 @@ int16_t intra_prediction(uint8_t* orig,int32_t origstride,int16_t* rec,int32_t r
|
||||||
if(MIN(abs(i-26),abs(i-10)) <= threshold)
|
if(MIN(abs(i-26),abs(i-10)) <= threshold)
|
||||||
{
|
{
|
||||||
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
|
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
|
||||||
//CHECK_FOR_BEST(i);
|
CHECK_FOR_BEST(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER };
|
||||||
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
|
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\brief Struct for CU info
|
\brief Struct for CU intra info
|
||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
@ -42,6 +42,16 @@ typedef struct
|
||||||
uint32_t cost;
|
uint32_t cost;
|
||||||
} CU_info_intra;
|
} CU_info_intra;
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\brief Struct for CU inter info
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint8_t mode;
|
||||||
|
uint32_t cost;
|
||||||
|
int16_t mv[2];
|
||||||
|
} CU_info_inter;
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\brief Struct for CU info
|
\brief Struct for CU info
|
||||||
|
@ -49,7 +59,9 @@ typedef struct
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
uint8_t type;
|
uint8_t type;
|
||||||
|
int8_t coded;
|
||||||
CU_info_intra intra;
|
CU_info_intra intra;
|
||||||
|
CU_info_inter inter;
|
||||||
uint8_t split;
|
uint8_t split;
|
||||||
} CU_info;
|
} CU_info;
|
||||||
|
|
||||||
|
|
|
@ -667,13 +667,13 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMode
|
||||||
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
|
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
|
||||||
if(blockSize== 4)
|
if(blockSize== 4)
|
||||||
{
|
{/*
|
||||||
if (uiMode != 65535)
|
if (uiMode != 65535)
|
||||||
{
|
{
|
||||||
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
|
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
|
||||||
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
|
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
|
||||||
}
|
}
|
||||||
else
|
else*/
|
||||||
{
|
{
|
||||||
partialButterfly4(block, tmp, shift_1st, blockSize);
|
partialButterfly4(block, tmp, shift_1st, blockSize);
|
||||||
partialButterfly4(tmp, coeff, shift_2nd, blockSize);
|
partialButterfly4(tmp, coeff, shift_2nd, blockSize);
|
||||||
|
@ -718,13 +718,13 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMod
|
||||||
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
|
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
|
||||||
if( blockSize == 4)
|
if( blockSize == 4)
|
||||||
{
|
{/*
|
||||||
if (uiMode != 65535)
|
if (uiMode != 65535)
|
||||||
{
|
{
|
||||||
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
|
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
|
||||||
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
|
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
|
||||||
}
|
}
|
||||||
else
|
else*/
|
||||||
{
|
{
|
||||||
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
|
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
|
||||||
partialButterflyInverse4(tmp,block,shift_2nd,blockSize);
|
partialButterflyInverse4(tmp,block,shift_2nd,blockSize);
|
||||||
|
|
15
src/x64/test64.asm
Normal file
15
src/x64/test64.asm
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
; Function to get CPUID for identifying CPU capabilities
|
||||||
|
bits 64
|
||||||
|
section .code
|
||||||
|
global cpuId64
|
||||||
|
|
||||||
|
cpuId64:
|
||||||
|
mov r8, rcx ; pointer to ecx-output
|
||||||
|
mov r9, rdx ; pointer to edx-output
|
||||||
|
|
||||||
|
mov eax,1
|
||||||
|
cpuid
|
||||||
|
mov dword [r8], ecx
|
||||||
|
mov dword [r9], edx
|
||||||
|
mov eax,0
|
||||||
|
ret
|
19
src/x64/test64.h
Normal file
19
src/x64/test64.h
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Part of HEVC Encoder
|
||||||
|
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! \file test.h
|
||||||
|
\brief test header
|
||||||
|
\author Marko Viitanen
|
||||||
|
\date 2013-04
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _TEST64_H_
|
||||||
|
#define _TEST64_H_
|
||||||
|
|
||||||
|
void __cdecl cpuId64(int* ecx, int *edx );
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
13
src/x86/test.asm
Normal file
13
src/x86/test.asm
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
; Function to get CPUID for identifying CPU capabilities
|
||||||
|
bits 32
|
||||||
|
global _cpuId
|
||||||
|
|
||||||
|
_cpuId:
|
||||||
|
mov eax,1
|
||||||
|
cpuid
|
||||||
|
mov eax, dword [esp+4]
|
||||||
|
mov dword [eax], ecx
|
||||||
|
mov eax, dword [esp+8]
|
||||||
|
mov dword [eax], edx
|
||||||
|
mov eax,0
|
||||||
|
ret
|
19
src/x86/test.h
Normal file
19
src/x86/test.h
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
/**
|
||||||
|
* Part of HEVC Encoder
|
||||||
|
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! \file test.h
|
||||||
|
\brief test header
|
||||||
|
\author Marko Viitanen
|
||||||
|
\date 2013-04
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _TEST_H_
|
||||||
|
#define _TEST_H_
|
||||||
|
|
||||||
|
void __cdecl cpuId(int* ecx, int *edx );
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in a new issue