diff --git a/build/HEVC_encoder_VS2010.sln b/build/HEVC_encoder_VS2010.sln index 09411b73..b304a0c1 100644 --- a/build/HEVC_encoder_VS2010.sln +++ b/build/HEVC_encoder_VS2010.sln @@ -3,6 +3,11 @@ Microsoft Visual Studio Solution File, Format Version 11.00 # Visual Studio 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "HEVC_encoder", "VS2010\HEVC_encoder.vcxproj", "{EEA3BDD1-8A08-41C1-BA57-E05D5C2CD8FF}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{50AB7A17-4885-4D20-BF01-376DE4417FCD}" + ProjectSection(SolutionItems) = preProject + HEVC_encoder_VS2010.vsd = HEVC_encoder_VS2010.vsd + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 diff --git a/build/VS2010/HEVC_encoder.vcxproj b/build/VS2010/HEVC_encoder.vcxproj index a02be213..33bc8ced 100644 --- a/build/VS2010/HEVC_encoder.vcxproj +++ b/build/VS2010/HEVC_encoder.vcxproj @@ -102,6 +102,7 @@ Disabled WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) CompileAsC + AssemblyAndSourceCode Console diff --git a/src/encoder.c b/src/encoder.c index c382be5b..a2862303 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -31,6 +31,8 @@ #include "filter.h" #include "search.h" +int16_t g_lambda_cost[55]; + void initSigLastScan(uint32_t* pBuffD, uint32_t* pBuffH, uint32_t* pBuffV, int32_t iWidth, int32_t iHeight) { uint32_t uiNumScanPos = iWidth * iWidth; @@ -191,10 +193,12 @@ void init_tables(void) /* Lambda cost */ /* ToDo: cleanup */ - g_lambda_cost = (int16_t*)malloc(sizeof(int16_t)*55); + //g_lambda_cost = (int16_t*)malloc(sizeof(int16_t)*55); for(i = 0; i < 55; i++) { - g_lambda_cost[i] = sqrt(0.85*pow(2.0,i/3)); + if(i < 12) g_lambda_cost[i]= 0; + else g_lambda_cost[i] = sqrt(0.57*pow(2.0,(i-12)/3)); + //g_lambda_cost[i] = g_lambda_cost[i]*g_lambda_cost[i]; } } @@ -652,20 +656,21 @@ void encode_slice_data(encoder_control* encoder) } void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth) -{ - uint8_t split_flag = (depth<1)?1:0; /* ToDo: get from CU data */ +{ + CU_info *cur_CU = &encoder->in.cur_pic.CU[depth][(xCtb>>(MAX_DEPTH-depth))+(yCtb>>(MAX_DEPTH-depth))*(encoder->in.width_in_LCU<split;//(depth<1)?1:0; /* ToDo: get from CU data */ uint8_t split_model = 0; /* Check for slice border */ uint8_t border_x = ((encoder->in.width)<( xCtb*(LCU_WIDTH>>MAX_DEPTH) + (LCU_WIDTH>>depth) ))?1:0; uint8_t border_y = ((encoder->in.height)<( yCtb*(LCU_WIDTH>>MAX_DEPTH) + (LCU_WIDTH>>depth) ))?1:0; uint8_t border = border_x | border_y; /*!< are we in any border CU */ - CU_info *cur_CU = &encoder->in.cur_pic.CU[depth][(xCtb>>(MAX_DEPTH-depth))+(yCtb>>(MAX_DEPTH-depth))*(encoder->in.width_in_LCU<type = CU_INTRA; + //cur_CU->type = CU_INTRA; } /* Signal PartSize on max depth */ @@ -719,8 +724,8 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui if(cur_CU->type == CU_INTRA) { - uint8_t intraPredMode = 1; - uint8_t intraPredModeChroma = 1; /* 36 = Chroma derived from luma */ + uint8_t intraPredMode = cur_CU->intra.mode; + uint8_t intraPredModeChroma = 36; /* 36 = Chroma derived from luma */ int8_t intraPreds[3] = {-1, -1, -1}; int8_t mpmPred = -1; int i; @@ -751,8 +756,8 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui /* Build reconstructed block to use in prediction with extrapolated borders */ intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0); - - intraPredMode = (uint8_t)intra_prediction(encoder->in.cur_pic.yData,encoder->in.width,recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,&bestSAD); + intra_recon(recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,intraPredMode,0); + //intraPredMode = (uint8_t)intra_prediction(encoder->in.cur_pic.yData,encoder->in.width,recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,&bestSAD); /* Filter DC-prediction */ if(intraPredMode == 1 && (LCU_WIDTH>>depth) < 32) @@ -774,7 +779,9 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predV,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1); /* This affects reconstruction, do after that */ - intra_setBlockMode(&encoder->in.cur_pic, xCtb, yCtb, depth, intraPredMode); + //intra_setBlockMode(&encoder->in.cur_pic, xCtb, yCtb, depth, intraPredMode); + //cur_CU->coded = 1; + picture_setBlockCoded(&encoder->in.cur_pic, xCtb, yCtb, depth, 1); /* PREDINFO CODING diff --git a/src/encoder.h b/src/encoder.h index eca4b8f3..f7f40ff2 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -104,7 +104,7 @@ void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uin void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t depth); void encode_transform_coeff(encoder_control* encoder,transform_info* ti,int8_t depth, int8_t trDepth); -static int16_t* g_lambda_cost; +extern int16_t g_lambda_cost[55]; static uint32_t* g_auiSigLastScan[3][7]; int8_t g_aucConvertToBit[LCU_WIDTH+1]; static int8_t g_bitDepth = 8; diff --git a/src/global.h b/src/global.h index bc2c7750..4c1e021f 100644 --- a/src/global.h +++ b/src/global.h @@ -15,6 +15,10 @@ /* CONFIG VARIABLES */ #define LCU_WIDTH 64 /*!< Largest Coding Unit */ + +#define MAX_SEARCH_DEPTH 2 +#define MIN_SEARCH_DEPTH 1 + #define MAX_DEPTH 3 /*!< smallest CU is LCU_WIDTH>>MAX_DEPTH */ #define MIN_SIZE 3 /*!< log2_min_coding_block_size */ diff --git a/src/intra.c b/src/intra.c index 88af804e..3bbaefb0 100644 --- a/src/intra.c +++ b/src/intra.c @@ -23,6 +23,7 @@ const uint8_t intraHorVerDistThres[4] = {0,7,1,0}; + /*! \brief Set intrablock mode (and init typedata) \param pic picture to use @@ -117,13 +118,13 @@ int8_t intra_getDirLumaPredictor(picture* pic,uint32_t xCtb, uint32_t yCtb, uint int32_t CUpos = yCtb*width_in_SCU+xCtb; // Left PU predictor - if(xCtb && pic->CU[depth][CUpos-1].type == CU_INTRA) + if(xCtb && pic->CU[depth][CUpos-1].type == CU_INTRA && pic->CU[depth][CUpos-1].coded) { iLeftIntraDir = pic->CU[depth][CUpos-1].intra.mode; } // Top PU predictor - if(yCtb && ((yCtb*(LCU_WIDTH>>MAX_DEPTH))%LCU_WIDTH)!=0 && pic->CU[depth][CUpos-width_in_SCU].type == CU_INTRA) + if(yCtb && ((yCtb*(LCU_WIDTH>>MAX_DEPTH))%LCU_WIDTH)!=0 && pic->CU[depth][CUpos-width_in_SCU].type == CU_INTRA && pic->CU[depth][CUpos-width_in_SCU].coded) { iAboveIntraDir = pic->CU[depth][CUpos-width_in_SCU].intra.mode; } @@ -156,7 +157,7 @@ int8_t intra_getDirLumaPredictor(picture* pic,uint32_t xCtb, uint32_t yCtb, uint { preds[2] = (iLeftIntraDir+iAboveIntraDir)<2? 26 : 1; } - } + } return 1; } @@ -398,7 +399,7 @@ void intra_buildReferenceBorder(picture* pic, int32_t xCtb, int32_t yCtb,int16_t for(leftColumn = 1; leftColumn < outwidth/SCU_width; leftColumn++) { /* If over the picture height or block not yet coded, stop */ - if((yCtb+leftColumn)*SCU_width >= srcHeight || pic->CU[0][xCtb-1+(yCtb+leftColumn)*width_in_SCU].type == CU_NOTSET) + if((yCtb+leftColumn)*SCU_width >= srcHeight || !pic->CU[0][xCtb-1+(yCtb+leftColumn)*width_in_SCU].coded) { break; } @@ -434,7 +435,7 @@ void intra_buildReferenceBorder(picture* pic, int32_t xCtb, int32_t yCtb,int16_t /* Loop top SCU's */ for(topRow = 1; topRow < outwidth/SCU_width; topRow++) { - if((xCtb+topRow)*SCU_width >= srcWidth || pic->CU[0][xCtb+topRow+(yCtb-1)*width_in_SCU].type == CU_NOTSET) + if((xCtb+topRow)*SCU_width >= srcWidth || !pic->CU[0][xCtb+topRow+(yCtb-1)*width_in_SCU].coded) { break; } @@ -496,8 +497,8 @@ void intra_getAngularPred(int16_t* pSrc, int32_t srcStride, int16_t* rpDst, int3 int32_t signAng = intraPredAngle < 0 ? -1 : 1; // Set bitshifts and scale the angle parameter to block size - int32_t angTable[9] = {0, 2, 5, 9, 13, 17, 21, 26, 32}; - int32_t invAngTable[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle + const int32_t angTable[9] = {0, 2, 5, 9, 13, 17, 21, 26, 32}; + const int32_t invAngTable[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / Angle int32_t invAngle = invAngTable[absAng]; // Do angular predictions @@ -516,11 +517,14 @@ void intra_getAngularPred(int16_t* pSrc, int32_t srcStride, int16_t* rpDst, int3 for (k=0;k> 1) ); + pDst[k*dstStride] = (pDst[k*dstStride] + (( refSide[k+1] - refSide[0] ) >> 1)) & (1<> 1) ); } } } diff --git a/src/picture.c b/src/picture.c index d97bdce7..aa0a0a59 100644 --- a/src/picture.c +++ b/src/picture.c @@ -19,6 +19,38 @@ #include "global.h" #include "picture.h" + + +/*! + \brief Set block coded status + \param pic picture to use + \param xCtb x CU position (smallest CU) + \param yCtb y CU position (smallest CU) + \param depth current CU depth + \param mode mode to set + \returns Void +*/ +void picture_setBlockCoded(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t depth, int8_t coded) +{ + uint32_t x,y,d; + //Width in smallest CU + int width_in_SCU = pic->width/(LCU_WIDTH>>MAX_DEPTH); + int block_SCU_width = (LCU_WIDTH>>depth)/(LCU_WIDTH>>MAX_DEPTH); + for(y = yCtb; y < yCtb+block_SCU_width; y++) + { + int CUpos = y*width_in_SCU; + for(x = xCtb; x < xCtb+block_SCU_width; x++) + { + for(d = 0; d < MAX_DEPTH+1; d++) + { + pic->CU[d][CUpos+x].coded = coded; + } + } + } +} + + + /** \defgroup picture_group Picture handler group * This group contains all picture related stuff * @{ diff --git a/src/picture.h b/src/picture.h index 7a3fa8a3..a422ee8f 100644 --- a/src/picture.h +++ b/src/picture.h @@ -57,7 +57,7 @@ typedef struct \brief Struct for CU info */ typedef struct -{ +{ uint8_t type; int8_t coded; CU_info_intra intra; @@ -104,6 +104,8 @@ int picture_list_destroy(picture_list *list); int picture_destroy(picture *pic); +void picture_setBlockCoded(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t depth, int8_t coded); + enum { SLICE_P = 0, SLICE_B = 1, SLICE_I = 2 }; diff --git a/src/search.c b/src/search.c index 6475d982..97858690 100644 --- a/src/search.c +++ b/src/search.c @@ -24,8 +24,6 @@ #include "filter.h" #include "search.h" - - void search_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth) { uint8_t border_x = ((encoder->in.width)<( xCtb*(LCU_WIDTH>>MAX_DEPTH) + (LCU_WIDTH>>depth) ))?1:0; @@ -33,7 +31,7 @@ void search_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t d uint8_t border = border_x | border_y; /*!< are we in any border CU */ CU_info *cur_CU = &encoder->in.cur_pic.CU[depth][(xCtb>>(MAX_DEPTH-depth))+(yCtb>>(MAX_DEPTH-depth))*(encoder->in.width_in_LCU<intra.cost = (uint32_t)-1; + cur_CU->intra.cost = 0xffffffff; /* Force split on border */ if(depth != MAX_DEPTH) @@ -60,7 +58,6 @@ void search_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t d return; } } - if(encoder->in.cur_pic.slicetype != SLICE_I) { @@ -69,33 +66,45 @@ void search_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t d } /* INTRA SEARCH */ - if(depth > 0) + if(depth >= MIN_SEARCH_DEPTH) { + int x,y; uint8_t *base = &encoder->in.cur_pic.yData[xCtb*(LCU_WIDTH>>(MAX_DEPTH)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH))) *encoder->in.width]; - uint8_t *baseU = &encoder->in.cur_pic.uData[xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - uint8_t *baseV = &encoder->in.cur_pic.vData[xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; uint32_t width = LCU_WIDTH>>depth; /* INTRAPREDICTION */ /* ToDo: split to a function */ int16_t pred[LCU_WIDTH*LCU_WIDTH]; - int16_t predU[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t predV[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - int16_t *recShift = &rec[(LCU_WIDTH>>(depth))*2+8+1]; - int16_t *recShiftU = &rec[(LCU_WIDTH>>(depth+1))*2+8+1]; - uint8_t *recbase = &encoder->in.cur_pic.yRecData[xCtb*(LCU_WIDTH>>(MAX_DEPTH)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH))) *encoder->in.width]; - uint8_t *recbaseU = &encoder->in.cur_pic.uRecData[xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - uint8_t *recbaseV = &encoder->in.cur_pic.vRecData[xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; + int16_t *recShift = &rec[(LCU_WIDTH>>(depth))*2+8+1]; + + /* Use original pic as reference border for prediction */ + for(x = 0; x < LCU_WIDTH*2 && x < encoder->in.width-xCtb*(LCU_WIDTH>>(MAX_DEPTH)); x++) + { + rec[x] = base[x]; + } + for(;x < LCU_WIDTH*2; x++) + { + rec[x] = 1<<(g_bitDepth-1); + } + + for(y = 1; y < LCU_WIDTH*2 && y < encoder->in.height-yCtb*(LCU_WIDTH>>(MAX_DEPTH)); y++) + { + rec[y*((LCU_WIDTH>>(depth))*2+8)] = base[y*encoder->in.width]; + } + for(;y < LCU_WIDTH*2; y++) + { + rec[y*((LCU_WIDTH>>(depth))*2+8)] = 1<<(g_bitDepth-1); + } /* Build reconstructed block to use in prediction with extrapolated borders */ - intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0); + //intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0); + cur_CU->intra.mode = (uint8_t)intra_prediction(encoder->in.cur_pic.yData,encoder->in.width,recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,&cur_CU->intra.cost); } /* Split and search to max_depth */ - if(depth != 2) + if(depth != MAX_SEARCH_DEPTH) { /* Split blocks and remember to change x and y block positions */ uint8_t change = 1<<(MAX_DEPTH-1-depth); @@ -112,28 +121,36 @@ uint32_t search_best_mode(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint32_t bestCost = cur_CU->intra.cost; int8_t bestMode = cur_CU->type; uint32_t cost = 0; + uint32_t lambdaCost = g_lambda_cost[encoder->QP]<<9;//<<16; /* Split and search to max_depth */ - if(depth != MAX_DEPTH) + if(depth != MAX_SEARCH_DEPTH) { /* Split blocks and remember to change x and y block positions */ - uint8_t change = 1<<(MAX_DEPTH-1-depth); - cost = 4*g_lambda_cost[encoder->QP]; - cost += search_best_mode(encoder,xCtb,yCtb,depth+1); + uint8_t change = 1<<(MAX_DEPTH-1-depth); + cost = search_best_mode(encoder,xCtb,yCtb,depth+1); cost += search_best_mode(encoder,xCtb+change,yCtb,depth+1); - cost += search_best_mode(encoder,xCtb,yCtb+change,depth+1); + cost += search_best_mode(encoder,xCtb,yCtb+change,depth+1); cost += search_best_mode(encoder,xCtb+change,yCtb+change,depth+1); - if(cost != 0 && cost < bestCost) + /* We split if the cost is better */ + if(cost != 0 && cost+lambdaCost < bestCost) { cur_CU->split = 1; - bestCost = cost; + bestCost = cost+lambdaCost; } + /* Else, dont split and recursively set block mode */ else { cur_CU->split = 0; + intra_setBlockMode(&encoder->in.cur_pic,xCtb,yCtb,depth,cur_CU->intra.mode); } } + else + { + cur_CU->split = 0; + intra_setBlockMode(&encoder->in.cur_pic,xCtb,yCtb,depth,cur_CU->intra.mode); + } return bestCost; }