mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Added CPUID fetch assembly functions (x86 and x64)
This commit is contained in:
parent
560917a532
commit
01c7f267d7
|
@ -48,6 +48,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="..\..\..\..\..\yasm\vsyasm.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
|
@ -99,7 +100,7 @@
|
|||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<CompileAs>CompileAsC</CompileAs>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
@ -145,6 +146,7 @@
|
|||
<InlineFunctionExpansion>Default</InlineFunctionExpansion>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
@ -179,8 +181,15 @@
|
|||
<ClInclude Include="..\..\src\nal.h" />
|
||||
<ClInclude Include="..\..\src\picture.h" />
|
||||
<ClInclude Include="..\..\src\transform.h" />
|
||||
<ClInclude Include="..\..\src\x64\test64.h" />
|
||||
<ClInclude Include="..\..\src\x86\test.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<YASM Include="..\..\src\x64\test64.asm" />
|
||||
<YASM Include="..\..\src\x86\test.asm" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="..\..\..\..\..\yasm\vsyasm.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -13,6 +13,18 @@
|
|||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\x86">
|
||||
<UniqueIdentifier>{9e551421-7564-43be-a2b6-1ffb16d744a4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Header Files\x86">
|
||||
<UniqueIdentifier>{d1533c70-cb9a-419d-9a38-5161894ec359}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Header Files\x64">
|
||||
<UniqueIdentifier>{cbe787b6-15a0-4c72-b658-6f444735fd73}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\x64">
|
||||
<UniqueIdentifier>{39fb72d7-bfcd-4505-9a00-7ed6bf67d9ad}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\src\encmain.c">
|
||||
|
@ -83,5 +95,19 @@
|
|||
<ClInclude Include="..\..\src\filter.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\src\x86\test.h">
|
||||
<Filter>Header Files\x86</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\src\x64\test64.h">
|
||||
<Filter>Header Files\x86</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<YASM Include="..\..\src\x86\test.asm">
|
||||
<Filter>Source Files\x86</Filter>
|
||||
</YASM>
|
||||
<YASM Include="..\..\src\x64\test64.asm">
|
||||
<Filter>Source Files\x64</Filter>
|
||||
</YASM>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -45,6 +45,12 @@
|
|||
#include "picture.h"
|
||||
#include "transform.h"
|
||||
|
||||
/* Assembly optimizations */
|
||||
#ifndef X64
|
||||
#include "x86/test.h"
|
||||
#else
|
||||
#include "x64/test64.h"
|
||||
#endif
|
||||
|
||||
/*!
|
||||
\brief Program main function.
|
||||
|
@ -54,6 +60,8 @@
|
|||
*/
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int ecx = 0,edx =0;
|
||||
enum { BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26};
|
||||
uint32_t curFrame = 0;
|
||||
config *cfg = NULL; /* Global configuration */
|
||||
FILE *input = NULL;
|
||||
|
@ -64,6 +72,25 @@
|
|||
#endif
|
||||
encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));;
|
||||
|
||||
/* CPU id */
|
||||
|
||||
printf("Checking for CPU features...\r\n");
|
||||
#ifndef X64
|
||||
cpuId(&ecx,&edx);
|
||||
#else
|
||||
cpuId64(&ecx,&edx);
|
||||
#endif
|
||||
//printf("CPUID ECX: %X EDX: %X\r\n", ecx, edx);
|
||||
printf("CPU features enabled: ");
|
||||
if(edx & (1<<BIT_MMX)) printf("MMX ");
|
||||
if(edx & (1<<BIT_SSE)) printf("SSE ");
|
||||
if(edx & (1<<BIT_SSE2)) printf("SSE2 ");
|
||||
if(ecx & (1<<BIT_SSSE3)) printf("SSSE3 ");
|
||||
if(ecx & (1<<BIT_SSE41)) printf("SSE4.1 ");
|
||||
if(ecx & (1<<BIT_SSE42)) printf("SSE4.2 ");
|
||||
printf("\r\n");
|
||||
|
||||
|
||||
/* Handle configuration */
|
||||
cfg = config_alloc();
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@ void initSigLastScan(uint32_t* pBuffD, uint32_t* pBuffH, uint32_t* pBuffV, int32
|
|||
{
|
||||
for(blkY=0; blkY < numBlkSide; blkY++)
|
||||
{
|
||||
uint32_t offset = blkY * 4 * iWidth + blkX * 4;
|
||||
uint32_t offset = blkY * 4 * iWidth + blkX * 4;
|
||||
for(x=0; x < 4; x++)
|
||||
{
|
||||
for(y=0; y < 4; y++)
|
||||
|
@ -640,7 +640,7 @@ void encode_slice_data(encoder_control* encoder)
|
|||
|
||||
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
|
||||
{
|
||||
uint8_t split_flag = (depth<3)?1:0; /* ToDo: get from CU data */
|
||||
uint8_t split_flag = (depth<1)?1:0; /* ToDo: get from CU data */
|
||||
uint8_t split_model = 0;
|
||||
|
||||
/* Check for slice border */
|
||||
|
@ -707,7 +707,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
|
|||
if(cur_CU->type == CU_INTRA)
|
||||
{
|
||||
uint8_t intraPredMode = 1;
|
||||
uint8_t intraPredModeChroma =36; /* 36 = Chroma derived from luma */
|
||||
uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */
|
||||
int8_t intraPreds[3] = {-1, -1, -1};
|
||||
int8_t mpmPred = -1;
|
||||
int i;
|
||||
|
@ -747,8 +747,14 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
|
|||
intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
|
||||
}
|
||||
|
||||
|
||||
/* ToDo: separate chroma prediction(?) */
|
||||
/* intraPredModeChroma = 1; */
|
||||
|
||||
if(intraPredModeChroma != 36 && intraPredModeChroma == intraPredMode)
|
||||
{
|
||||
intraPredModeChroma = 36;
|
||||
}
|
||||
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
|
||||
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
|
||||
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
|
||||
|
|
|
@ -290,7 +290,7 @@ int16_t intra_prediction(uint8_t* orig,int32_t origstride,int16_t* rec,int32_t r
|
|||
if(MIN(abs(i-26),abs(i-10)) <= threshold)
|
||||
{
|
||||
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
|
||||
//CHECK_FOR_BEST(i);
|
||||
CHECK_FOR_BEST(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER };
|
|||
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
|
||||
|
||||
/*!
|
||||
\brief Struct for CU info
|
||||
\brief Struct for CU intra info
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
|
@ -42,6 +42,16 @@ typedef struct
|
|||
uint32_t cost;
|
||||
} CU_info_intra;
|
||||
|
||||
/*!
|
||||
\brief Struct for CU inter info
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint8_t mode;
|
||||
uint32_t cost;
|
||||
int16_t mv[2];
|
||||
} CU_info_inter;
|
||||
|
||||
|
||||
/*!
|
||||
\brief Struct for CU info
|
||||
|
@ -49,7 +59,9 @@ typedef struct
|
|||
typedef struct
|
||||
{
|
||||
uint8_t type;
|
||||
int8_t coded;
|
||||
CU_info_intra intra;
|
||||
CU_info_inter inter;
|
||||
uint8_t split;
|
||||
} CU_info;
|
||||
|
||||
|
|
|
@ -249,10 +249,10 @@ void scalinglist_processDec( int32_t *coeff, int32_t *dequantcoeff, int32_t invQ
|
|||
|
||||
void scalinglist_set(int32_t *coeff, uint32_t listId, uint32_t sizeId, uint32_t qp)
|
||||
{
|
||||
uint32_t width = g_scalingListSizeX[sizeId];
|
||||
uint32_t width = g_scalingListSizeX[sizeId];
|
||||
uint32_t height = g_scalingListSizeX[sizeId];
|
||||
uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]);
|
||||
int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp];
|
||||
uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]);
|
||||
int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp];
|
||||
int32_t *dequantcoeff = g_de_quant_coeff[sizeId][listId][qp];
|
||||
|
||||
scalinglist_processEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,MIN(8,g_scalingListSizeX[sizeId]),/*SCALING_LIST_DC*/16, 0);
|
||||
|
@ -667,13 +667,13 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMode
|
|||
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
|
||||
|
||||
if(blockSize== 4)
|
||||
{
|
||||
{/*
|
||||
if (uiMode != 65535)
|
||||
{
|
||||
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
|
||||
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
|
||||
}
|
||||
else
|
||||
else*/
|
||||
{
|
||||
partialButterfly4(block, tmp, shift_1st, blockSize);
|
||||
partialButterfly4(tmp, coeff, shift_2nd, blockSize);
|
||||
|
@ -718,13 +718,13 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMod
|
|||
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
|
||||
|
||||
if( blockSize == 4)
|
||||
{
|
||||
{/*
|
||||
if (uiMode != 65535)
|
||||
{
|
||||
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
|
||||
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
|
||||
}
|
||||
else
|
||||
else*/
|
||||
{
|
||||
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
|
||||
partialButterflyInverse4(tmp,block,shift_2nd,blockSize);
|
||||
|
|
15
src/x64/test64.asm
Normal file
15
src/x64/test64.asm
Normal file
|
@ -0,0 +1,15 @@
|
|||
; Function to get CPUID for identifying CPU capabilities
|
||||
bits 64
|
||||
section .code
|
||||
global cpuId64
|
||||
|
||||
cpuId64:
|
||||
mov r8, rcx ; pointer to ecx-output
|
||||
mov r9, rdx ; pointer to edx-output
|
||||
|
||||
mov eax,1
|
||||
cpuid
|
||||
mov dword [r8], ecx
|
||||
mov dword [r9], edx
|
||||
mov eax,0
|
||||
ret
|
19
src/x64/test64.h
Normal file
19
src/x64/test64.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
/**
|
||||
* Part of HEVC Encoder
|
||||
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
|
||||
*/
|
||||
|
||||
/*! \file test.h
|
||||
\brief test header
|
||||
\author Marko Viitanen
|
||||
\date 2013-04
|
||||
|
||||
*/
|
||||
|
||||
#ifndef _TEST64_H_
|
||||
#define _TEST64_H_
|
||||
|
||||
void __cdecl cpuId64(int* ecx, int *edx );
|
||||
|
||||
|
||||
#endif
|
13
src/x86/test.asm
Normal file
13
src/x86/test.asm
Normal file
|
@ -0,0 +1,13 @@
|
|||
; Function to get CPUID for identifying CPU capabilities
|
||||
bits 32
|
||||
global _cpuId
|
||||
|
||||
_cpuId:
|
||||
mov eax,1
|
||||
cpuid
|
||||
mov eax, dword [esp+4]
|
||||
mov dword [eax], ecx
|
||||
mov eax, dword [esp+8]
|
||||
mov dword [eax], edx
|
||||
mov eax,0
|
||||
ret
|
19
src/x86/test.h
Normal file
19
src/x86/test.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
/**
|
||||
* Part of HEVC Encoder
|
||||
* By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
|
||||
*/
|
||||
|
||||
/*! \file test.h
|
||||
\brief test header
|
||||
\author Marko Viitanen
|
||||
\date 2013-04
|
||||
|
||||
*/
|
||||
|
||||
#ifndef _TEST_H_
|
||||
#define _TEST_H_
|
||||
|
||||
void __cdecl cpuId(int* ecx, int *edx );
|
||||
|
||||
|
||||
#endif
|
Loading…
Reference in a new issue