diff --git a/build/VS2010/HEVC_encoder.vcxproj b/build/VS2010/HEVC_encoder.vcxproj
index a3d7e2db..81a70754 100644
--- a/build/VS2010/HEVC_encoder.vcxproj
+++ b/build/VS2010/HEVC_encoder.vcxproj
@@ -48,6 +48,7 @@
+
@@ -99,7 +100,7 @@
Level3
Disabled
- WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
CompileAsC
@@ -145,6 +146,7 @@
Default
Speed
/MP %(AdditionalOptions)
+ Fast
Console
@@ -179,8 +181,15 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/build/VS2010/HEVC_encoder.vcxproj.filters b/build/VS2010/HEVC_encoder.vcxproj.filters
index 8fea39dc..7432deb2 100644
--- a/build/VS2010/HEVC_encoder.vcxproj.filters
+++ b/build/VS2010/HEVC_encoder.vcxproj.filters
@@ -13,6 +13,18 @@
{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+ {9e551421-7564-43be-a2b6-1ffb16d744a4}
+
+
+ {d1533c70-cb9a-419d-9a38-5161894ec359}
+
+
+ {cbe787b6-15a0-4c72-b658-6f444735fd73}
+
+
+ {39fb72d7-bfcd-4505-9a00-7ed6bf67d9ad}
+
@@ -83,5 +95,19 @@
Header Files
+
+ Header Files\x86
+
+
+ Header Files\x86
+
+
+
+
+ Source Files\x86
+
+
+ Source Files\x64
+
\ No newline at end of file
diff --git a/src/encmain.c b/src/encmain.c
index d0d7d3b4..d932b9a5 100644
--- a/src/encmain.c
+++ b/src/encmain.c
@@ -45,6 +45,12 @@
#include "picture.h"
#include "transform.h"
+ /* Assembly optimizations */
+#ifndef X64
+ #include "x86/test.h"
+#else
+ #include "x64/test64.h"
+#endif
/*!
\brief Program main function.
@@ -53,7 +59,9 @@
\return Program exit state
*/
int main(int argc, char* argv[])
- {
+ {
+ int ecx = 0,edx =0;
+ enum { BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26};
uint32_t curFrame = 0;
config *cfg = NULL; /* Global configuration */
FILE *input = NULL;
@@ -63,6 +71,25 @@
FILE *recout = fopen("encrec.yuv","wb");
#endif
encoder_control* encoder = (encoder_control*)malloc(sizeof(encoder_control));;
+
+ /* CPU id */
+
+ printf("Checking for CPU features...\r\n");
+ #ifndef X64
+ cpuId(&ecx,&edx);
+ #else
+ cpuId64(&ecx,&edx);
+ #endif
+ //printf("CPUID ECX: %X EDX: %X\r\n", ecx, edx);
+ printf("CPU features enabled: ");
+ if(edx & (1<type == CU_INTRA)
{
uint8_t intraPredMode = 1;
- uint8_t intraPredModeChroma =36; /* 36 = Chroma derived from luma */
+ uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */
int8_t intraPreds[3] = {-1, -1, -1};
int8_t mpmPred = -1;
int i;
@@ -746,9 +746,15 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
{
intra_DCPredFiltering(recShift,(LCU_WIDTH>>(depth))*2+8,pred,width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
}
+
/* ToDo: separate chroma prediction(?) */
/* intraPredModeChroma = 1; */
+
+ if(intraPredModeChroma != 36 && intraPredModeChroma == intraPredMode)
+ {
+ intraPredModeChroma = 36;
+ }
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
diff --git a/src/intra.c b/src/intra.c
index b115e784..f3a216ba 100644
--- a/src/intra.c
+++ b/src/intra.c
@@ -290,7 +290,7 @@ int16_t intra_prediction(uint8_t* orig,int32_t origstride,int16_t* rec,int32_t r
if(MIN(abs(i-26),abs(i-10)) <= threshold)
{
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
- //CHECK_FOR_BEST(i);
+ CHECK_FOR_BEST(i);
}
}
diff --git a/src/picture.h b/src/picture.h
index 0d9eaa2b..abead2d4 100644
--- a/src/picture.h
+++ b/src/picture.h
@@ -34,7 +34,7 @@ enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER };
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
/*!
- \brief Struct for CU info
+ \brief Struct for CU intra info
*/
typedef struct
{
@@ -42,6 +42,16 @@ typedef struct
uint32_t cost;
} CU_info_intra;
+/*!
+ \brief Struct for CU inter info
+*/
+typedef struct
+{
+ uint8_t mode;
+ uint32_t cost;
+ int16_t mv[2];
+} CU_info_inter;
+
/*!
\brief Struct for CU info
@@ -49,7 +59,9 @@ typedef struct
typedef struct
{
uint8_t type;
+ int8_t coded;
CU_info_intra intra;
+ CU_info_inter inter;
uint8_t split;
} CU_info;
diff --git a/src/transform.c b/src/transform.c
index dd243bd0..474d60d7 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -249,10 +249,10 @@ void scalinglist_processDec( int32_t *coeff, int32_t *dequantcoeff, int32_t invQ
void scalinglist_set(int32_t *coeff, uint32_t listId, uint32_t sizeId, uint32_t qp)
{
- uint32_t width = g_scalingListSizeX[sizeId];
+ uint32_t width = g_scalingListSizeX[sizeId];
uint32_t height = g_scalingListSizeX[sizeId];
- uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]);
- int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp];
+ uint32_t ratio = g_scalingListSizeX[sizeId]/MIN(8,g_scalingListSizeX[sizeId]);
+ int32_t *quantcoeff = g_quant_coeff[sizeId][listId][qp];
int32_t *dequantcoeff = g_de_quant_coeff[sizeId][listId][qp];
scalinglist_processEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,MIN(8,g_scalingListSizeX[sizeId]),/*SCALING_LIST_DC*/16, 0);
@@ -667,13 +667,13 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMode
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if(blockSize== 4)
- {
+ {/*
if (uiMode != 65535)
{
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
}
- else
+ else*/
{
partialButterfly4(block, tmp, shift_1st, blockSize);
partialButterfly4(tmp, coeff, shift_2nd, blockSize);
@@ -718,13 +718,13 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int32_t uiMod
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if( blockSize == 4)
- {
+ {/*
if (uiMode != 65535)
{
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
}
- else
+ else*/
{
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
partialButterflyInverse4(tmp,block,shift_2nd,blockSize);
@@ -762,7 +762,7 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidt
//uint32_t scanIdx = SCAN_DIAG;
- scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
+ scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
{
int32_t deltaU[LCU_WIDTH*LCU_WIDTH] ;
int32_t iQpBase = encoder->QP;
diff --git a/src/x64/test64.asm b/src/x64/test64.asm
new file mode 100644
index 00000000..30963055
--- /dev/null
+++ b/src/x64/test64.asm
@@ -0,0 +1,15 @@
+; Function to get CPUID for identifying CPU capabilities
+bits 64
+section .code
+global cpuId64
+
+cpuId64:
+ mov r8, rcx ; pointer to ecx-output
+ mov r9, rdx ; pointer to edx-output
+
+ mov eax,1
+ cpuid
+ mov dword [r8], ecx
+ mov dword [r9], edx
+ mov eax,0
+ ret
\ No newline at end of file
diff --git a/src/x64/test64.h b/src/x64/test64.h
new file mode 100644
index 00000000..21fe9a71
--- /dev/null
+++ b/src/x64/test64.h
@@ -0,0 +1,19 @@
+/**
+ * Part of HEVC Encoder
+ * By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
+ */
+
+/*! \file test.h
+ \brief test header
+ \author Marko Viitanen
+ \date 2013-04
+
+*/
+
+#ifndef _TEST64_H_
+#define _TEST64_H_
+
+void __cdecl cpuId64(int* ecx, int *edx );
+
+
+#endif
\ No newline at end of file
diff --git a/src/x86/test.asm b/src/x86/test.asm
new file mode 100644
index 00000000..e9626f5b
--- /dev/null
+++ b/src/x86/test.asm
@@ -0,0 +1,13 @@
+; Function to get CPUID for identifying CPU capabilities
+bits 32
+global _cpuId
+
+_cpuId:
+ mov eax,1
+ cpuid
+ mov eax, dword [esp+4]
+ mov dword [eax], ecx
+ mov eax, dword [esp+8]
+ mov dword [eax], edx
+ mov eax,0
+ ret
\ No newline at end of file
diff --git a/src/x86/test.h b/src/x86/test.h
new file mode 100644
index 00000000..3448e699
--- /dev/null
+++ b/src/x86/test.h
@@ -0,0 +1,19 @@
+/**
+ * Part of HEVC Encoder
+ * By Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
+ */
+
+/*! \file test.h
+ \brief test header
+ \author Marko Viitanen
+ \date 2013-04
+
+*/
+
+#ifndef _TEST_H_
+#define _TEST_H_
+
+void __cdecl cpuId(int* ecx, int *edx );
+
+
+#endif
\ No newline at end of file