diff --git a/build/VS2010/HEVC_encoder.vcxproj b/build/VS2010/HEVC_encoder.vcxproj index b51c3dbb..11d8e4b1 100644 --- a/build/VS2010/HEVC_encoder.vcxproj +++ b/build/VS2010/HEVC_encoder.vcxproj @@ -100,7 +100,7 @@ Level3 Disabled - WIN32;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + WIN32;WIN64;X64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) CompileAsC AssemblyAndSourceCode diff --git a/src/encmain.c b/src/encmain.c index 7208d407..3eddee1b 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -68,6 +68,8 @@ FILE *input = NULL; FILE *output = NULL; double PSNR[3] = { 0.0, 0.0, 0.0 }; + fpos_t curpos = 0; + fpos_t lastpos = 0; #ifdef _DEBUG FILE *recout = fopen("encrec.yuv","wb"); #endif @@ -149,7 +151,7 @@ encoder->stream->buffer_pos = 0; encoder->stream->output = 0; /* Alloc 1MB */ - bitstream_alloc(encoder->stream, 1024*1024); + bitstream_alloc(encoder->stream, 1024*2*cfg->width); /* Config pointer to encoder struct */ encoder->cfg = cfg; @@ -161,10 +163,10 @@ /* input init (ToDo: read from commandline / config) */ encoder->bitdepth = 8; encoder->frame = 0; - encoder->QP = 36; + encoder->QP = 32; encoder->in.video_format = FORMAT_420; /* deblocking */ - encoder->deblock_enable = 1; + encoder->deblock_enable = 1; encoder->betaOffsetdiv2 = 0; encoder->tcOffsetdiv2 = 0; /* SAO */ @@ -197,12 +199,17 @@ fwrite(encoder->in.cur_pic.vRecData,cfg->width*cfg->height>>2,1,recout); #endif { + int32_t diff; double temp_PSNR[3]; + fgetpos(output,&curpos); + diff = (int32_t)(curpos-lastpos); + lastpos = curpos; + temp_PSNR[0] = imagePSNR(encoder->in.cur_pic.yData,encoder->in.cur_pic.yRecData,cfg->width,cfg->height); temp_PSNR[1] = imagePSNR(encoder->in.cur_pic.uData,encoder->in.cur_pic.uRecData,cfg->width>>1,cfg->height>>1); temp_PSNR[2] = imagePSNR(encoder->in.cur_pic.vData,encoder->in.cur_pic.vRecData,cfg->width>>1,cfg->height>>1); - printf("[%d] %c-frame PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, "BPI"[encoder->in.cur_pic.slicetype%3], + printf("POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, "BPI"[encoder->in.cur_pic.slicetype%3],diff<<3, temp_PSNR[0],temp_PSNR[1],temp_PSNR[2]); PSNR[0]+=temp_PSNR[0]; PSNR[1]+=temp_PSNR[1]; @@ -211,8 +218,9 @@ encoder->frame++; } /* Coding finished */ + fgetpos(output,&curpos); - printf(" Processed %d frames, AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame,PSNR[0]/encoder->frame,PSNR[1]/encoder->frame,PSNR[2]/encoder->frame); + printf(" Processed %d frames, %10d bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame,((int32_t)curpos)<<3,PSNR[0]/encoder->frame,PSNR[1]/encoder->frame,PSNR[2]/encoder->frame); fclose(input); fclose(output); diff --git a/src/encoder.c b/src/encoder.c index bb89656f..8a843e8c 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -32,6 +32,7 @@ #include "search.h" int16_t g_lambda_cost[55]; +uint32_t* g_auiSigLastScan[3][7]; void initSigLastScan(uint32_t* pBuffD, uint32_t* pBuffH, uint32_t* pBuffV, int32_t iWidth, int32_t iHeight) { @@ -229,6 +230,8 @@ void init_encoder_input(encoder_input* input,FILE* inputfile, int32_t width, int input->cur_pic.width = width; input->cur_pic.height = height; + input->cur_pic.width_in_LCU = input->width_in_LCU; + input->cur_pic.height_in_LCU = input->height_in_LCU; input->cur_pic.referenced = 0; /* Allocate buffers */ input->cur_pic.yData = (uint8_t *)malloc(width*height); @@ -859,7 +862,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui uint32_t width = LCU_WIDTH>>depth; /* INTRAPREDICTION VARIABLES */ - int16_t pred[LCU_WIDTH*LCU_WIDTH]; + int16_t pred[LCU_WIDTH*LCU_WIDTH+1]; int16_t predU[LCU_WIDTH*LCU_WIDTH>>2]; int16_t predV[LCU_WIDTH*LCU_WIDTH>>2]; @@ -867,11 +870,22 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui uint8_t *recbaseU = &encoder->in.cur_pic.uRecData[xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; uint8_t *recbaseV = &encoder->in.cur_pic.vRecData[xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; + + /* SEARCH BEST INTRA MODE (AGAIN) */ + + int16_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; + int16_t *recShift = &rec[(LCU_WIDTH>>(depth))*2+8+1]; + intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0); + cur_CU->intra.mode = (int8_t)intra_prediction(encoder->in.cur_pic.yData,encoder->in.width,recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,&cur_CU->intra.cost); + intraPredMode = cur_CU->intra.mode; + intra_setBlockMode(&encoder->in.cur_pic,xCtb, yCtb, depth, intraPredMode); + #if ENABLE_PCM == 1 /* Code must start after variable initialization */ cabac_encodeBinTrm(&cabac, 0); /* IPCMFlag == 0 */ #endif - + + /* PREDINFO CODING If intra prediction mode is found from the predictors, @@ -1142,6 +1156,8 @@ void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t d int16_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; int16_t *recShift = &rec[(LCU_WIDTH>>(depth))*2+8+1]; int16_t *recShiftU = &rec[(LCU_WIDTH>>(depth+1))*2+8+1]; + + uint32_t ac_sum = 0; /* Build reconstructed block to use in prediction with extrapolated borders */ intra_buildReferenceBorder(&encoder->in.cur_pic, ti->xCtb, ti->yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0); @@ -1168,17 +1184,19 @@ void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t d /* Get residual by subtracting prediction */ i = 0; + ac_sum = 0; for(y = 0; y < LCU_WIDTH>>depth; y++) { for(x = 0; x < LCU_WIDTH>>depth; x++) { - block[i++]=((int16_t)base[x+y*base_stride])-pred[x+y*pred_stride]; + block[i]=((int16_t)base[x+y*base_stride])-pred[x+y*pred_stride]; + i++; } } /* Transform and quant residual to coeffs */ transform2d(block,pre_quant_coeff,width,0); - quant(encoder,pre_quant_coeff,coeff,width, width,0, 0, SCAN_DIAG); + quant(encoder,pre_quant_coeff,coeff,width, width,&ac_sum, 0, SCAN_DIAG); /* Check for non-zero coeffs */ for(i = 0; i < width*width; i++) @@ -1226,15 +1244,17 @@ void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t d { /* U */ i = 0; + ac_sum = 0; for(y = 0; y < LCU_WIDTH>>(depth+1); y++) { for(x = 0; x < LCU_WIDTH>>(depth+1); x++) { - block[i++]=((int16_t)baseU[x+y*(base_stride>>1)])-predU[x+y*(pred_stride>>1)]; + block[i]=((int16_t)baseU[x+y*(base_stride>>1)])-predU[x+y*(pred_stride>>1)]; + i++; } } transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); - quant(encoder,pre_quant_coeff,coeffU, width>>1, width>>1, 0,2,SCAN_DIAG); + quant(encoder,pre_quant_coeff,coeffU, width>>1, width>>1, &ac_sum,2,SCAN_DIAG); for(i = 0; i < width*width>>2; i++) { if(coeffU[i] != 0) @@ -1247,15 +1267,17 @@ void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t d /* V */ i = 0; + ac_sum = 0; for(y = 0; y < LCU_WIDTH>>(depth+1); y++) { for(x = 0; x < LCU_WIDTH>>(depth+1); x++) { - block[i++]=((int16_t)baseV[x+y*(base_stride>>1)])-predV[x+y*(pred_stride>>1)]; + block[i]=((int16_t)baseV[x+y*(base_stride>>1)])-predV[x+y*(pred_stride>>1)]; + i++; } } transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); - quant(encoder,pre_quant_coeff,coeffV, width>>1, width>>1, 0,3,SCAN_DIAG); + quant(encoder,pre_quant_coeff,coeffV, width>>1, width>>1, &ac_sum,3,SCAN_DIAG); for(i = 0; i < width*width>>2; i++) { if(coeffV[i] != 0) diff --git a/src/encoder.h b/src/encoder.h index b979c84a..10add525 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -33,12 +33,13 @@ enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 }; typedef struct { FILE* file; - int32_t width; - int32_t height; - int32_t height_in_LCU; - int32_t width_in_LCU; + int32_t width; /*!< \brief input picture width */ + int32_t height; /*!< \brief input picture height */ + int32_t height_in_LCU; /*!< \brief input picture width in LCU*/ + int32_t width_in_LCU; /*!< \brief input picture height in LCU */ picture cur_pic; int8_t video_format; + int8_t bitdepth; /*!< \brief input bit depth (8,10) */ } encoder_input; /* Encoder control options, the main struct */ @@ -55,10 +56,10 @@ typedef struct int8_t bitdepth; /* Filtering */ - int8_t deblock_enable; - int8_t sao_enable; - int8_t betaOffsetdiv2; - int8_t tcOffsetdiv2; + int8_t deblock_enable; /*!< \brief Flag to enable deblocking filter */ + int8_t sao_enable; /*!< \brief Flag to enable sample adaptive offset filter */ + int8_t betaOffsetdiv2; /*!< \brief (deblocking) beta offset (div 2), range -6...6 */ + int8_t tcOffsetdiv2; /*!< \brief (deblocking)tc offset (div 2), range -6...6 */ } encoder_control; typedef struct @@ -110,7 +111,7 @@ void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t d void encode_transform_coeff(encoder_control* encoder,transform_info* ti,int8_t depth, int8_t trDepth); extern int16_t g_lambda_cost[55]; -static uint32_t* g_auiSigLastScan[3][7]; +extern uint32_t* g_auiSigLastScan[3][7]; int8_t g_aucConvertToBit[LCU_WIDTH+1]; static int8_t g_bitDepth = 8; static int8_t g_uiBitIncrement = 0; diff --git a/src/global.h b/src/global.h index 8c097278..f05ee8da 100644 --- a/src/global.h +++ b/src/global.h @@ -17,13 +17,13 @@ #define LCU_WIDTH 64 /*!< Largest Coding Unit */ #define MAX_SEARCH_DEPTH 3 -#define MIN_SEARCH_DEPTH 2 +#define MIN_SEARCH_DEPTH 1 #define MAX_DEPTH 3 /*!< smallest CU is LCU_WIDTH>>MAX_DEPTH */ #define MIN_SIZE 3 /*!< log2_min_coding_block_size */ #define ENABLE_PCM 0 /*!< Setting to 1 will enable using PCM blocks (current intra-search does not consider PCM) */ -#define ENABLE_SIGN_HIDING 0 /*!< NEED QUANT CHANGES! */ +#define ENABLE_SIGN_HIDING 1 /*!< NEED QUANT CHANGES! */ /* END OF CONFIG VARIABLES */ diff --git a/src/inter.c b/src/inter.c index d61eccdc..8999262b 100644 --- a/src/inter.c +++ b/src/inter.c @@ -34,7 +34,7 @@ void inter_setBlockMode(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t depth { uint32_t x,y,d; /* Width in smallest CU */ - int width_in_SCU = pic->width/(LCU_WIDTH>>MAX_DEPTH); + int width_in_SCU = pic->width_in_LCU<>depth)/(LCU_WIDTH>>MAX_DEPTH); for(y = yCtb; y < yCtb+block_SCU_width; y++) { diff --git a/src/intra.c b/src/intra.c index 46aa014b..2d07c464 100644 --- a/src/intra.c +++ b/src/intra.c @@ -37,7 +37,7 @@ void intra_setBlockMode(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t depth { uint32_t x,y,d; /* Width in smallest CU */ - int width_in_SCU = pic->width/(LCU_WIDTH>>MAX_DEPTH); + int width_in_SCU = pic->width_in_LCU<>depth)/(LCU_WIDTH>>MAX_DEPTH); for(y = yCtb; y < yCtb+block_SCU_width; y++) { @@ -65,7 +65,7 @@ void intra_setBlockMode(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t depth int8_t intra_getBlockMode(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t depth) { //Width in smallest CU - int width_in_SCU = pic->width/(LCU_WIDTH>>MAX_DEPTH); + int width_in_SCU = pic->width_in_LCU<CU[depth][CUpos].type == CU_INTRA) { @@ -115,7 +115,7 @@ int8_t intra_getDirLumaPredictor(picture* pic,uint32_t xCtb, uint32_t yCtb, uint { int32_t iLeftIntraDir = 1; //DC_IDX int32_t iAboveIntraDir = 1; //DC_IDX - int32_t width_in_SCU = pic->width/(LCU_WIDTH>>MAX_DEPTH); + int width_in_SCU = pic->width_in_LCU< threshold) { intra_getAngularPred(recFiltered,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter); - CHECK_FOR_BEST(i,distance*width); /* Favor modes closer to 26 and 10 */ + CHECK_FOR_BEST(i,0); /* Favor modes closer to 26 and 10 */ } } @@ -399,7 +399,7 @@ void intra_buildReferenceBorder(picture* pic, int32_t xCtb, int32_t yCtb,int16_t uint8_t* srcPic = (!chroma)?pic->yRecData: ((chroma==1)?pic->uRecData: pic->vRecData); /*!< input picture pointer */ int16_t SCU_width = LCU_WIDTH>>(MAX_DEPTH+(chroma?1:0)); /*!< Smallest Coding Unit width */ uint8_t* srcShifted = &srcPic[xCtb*SCU_width+(yCtb*SCU_width)*srcWidth]; /*!< input picture pointer shifted to start from the left-top corner of the current block */ - int32_t width_in_SCU = srcWidth/SCU_width; /*!< picture width in SCU */ + int width_in_SCU = pic->width_in_LCU<width/(LCU_WIDTH>>MAX_DEPTH); + int width_in_SCU = pic->width_in_LCU<>depth)/(LCU_WIDTH>>MAX_DEPTH); for(y = yCtb; y < yCtb+block_SCU_width; y++) { @@ -46,7 +52,6 @@ void picture_setBlockSplit(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t de } } - /*! \brief Set block coded status \param pic picture to use @@ -60,7 +65,7 @@ void picture_setBlockCoded(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t de { uint32_t x,y,d; //Width in smallest CU - int width_in_SCU = pic->width/(LCU_WIDTH>>MAX_DEPTH); + int width_in_SCU = pic->width_in_LCU<>depth)/(LCU_WIDTH>>MAX_DEPTH); for(y = yCtb; y < yCtb+block_SCU_width; y++) { @@ -76,11 +81,6 @@ void picture_setBlockCoded(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t de } -/** \defgroup picture_group Picture handler group - * This group contains all picture related stuff - * @{ - */ - /*! \brief Allocate memory for picture_list @@ -188,10 +188,10 @@ void picture_setBlockCoded(picture* pic,uint32_t xCtb, uint32_t yCtb, uint8_t de //Calculates image PSNR value double imagePSNR(uint8_t *frame1, uint8_t *frame2, int32_t x, int32_t y) { - int64_t MSE=0; - int64_t MSEtemp=0; + uint64_t MSE=0; + int32_t MSEtemp=0; double psnr=0.0; - double pixels = x*y; + int32_t pixels = x*y; int32_t index; //Calculate MSE @@ -205,7 +205,7 @@ double imagePSNR(uint8_t *frame1, uint8_t *frame2, int32_t x, int32_t y) if(MSE==0) return 99.0; //The PSNR - psnr=10*log10(PSNRMAX/((double)MSE/pixels)); + psnr=10*log10((pixels*PSNRMAX)/((double)MSE)); //Thats it. return psnr; @@ -327,6 +327,8 @@ uint32_t SAD64x64(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri { int32_t i,ii,y,x; uint32_t sum=0; + /* + for(y=0;y<64;y++) { i = y*stride1; @@ -336,6 +338,17 @@ uint32_t SAD64x64(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri sum+=abs((int16_t)block[i+x]-(int16_t)block2[ii+x]); } + }*/ + int32_t iOffsetOrg = stride1<<3; + int32_t iOffsetCur = stride2<<3; + for ( y=0; y<64; y+= 8 ) + { + for ( x=0; x<64; x+= 8 ) + { + sum += Hadamard8x8( &block[x], stride1,&block2[x], stride2 ); + } + block += iOffsetOrg; + block2 += iOffsetCur; } return sum; @@ -344,7 +357,7 @@ uint32_t SAD64x64(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri uint32_t SAD32x32(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stride2) { int32_t y; - /* + int32_t x,sum = 0; int32_t iOffsetOrg = stride1<<3; int32_t iOffsetCur = stride2<<3; @@ -352,13 +365,13 @@ uint32_t SAD32x32(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri { for ( x=0; x<32; x+= 8 ) { - sum += Hadamard8x8( &block[x], &block2[x], stride1, stride2 ); + sum += Hadamard8x8( &block[x], stride1,&block2[x], stride2 ); } block += iOffsetOrg; block2 += iOffsetCur; } - */ + /* uint32_t sum=0; int32_t i,ii; for(y=0;y<32;y++) @@ -398,7 +411,7 @@ uint32_t SAD32x32(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri sum+=abs((int32_t)block[i+30]-(int32_t)block2[ii+30]); sum+=abs((int32_t)block[i+31]-(int32_t)block2[ii+31]); } - + */ return sum; } @@ -406,22 +419,22 @@ uint32_t SAD32x32(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri uint32_t SAD16x16(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stride2) { int32_t y; - /* + int32_t x,sum = 0; int32_t iOffsetOrg = stride1<<3; int32_t iOffsetCur = stride2<<3; - for ( y=0; y<16; y+= 8 ) + for ( y=0; y<16; y+= 8 ) + { + for ( x=0; x<16; x+= 8 ) { - for ( x=0; x<16; x+= 8 ) - { - sum += Hadamard8x8( &block[x], &block2[x], stride1, stride2 ); - } - block += iOffsetOrg; - block2 += iOffsetCur; + sum += Hadamard8x8( &block[x], stride1,&block2[x], stride2 ); } + block += iOffsetOrg; + block2 += iOffsetCur; + } - */ + /* uint32_t sum=0; int32_t i,ii; for(y=0;y<16;y++) @@ -445,6 +458,7 @@ uint32_t SAD16x16(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stri sum+=abs((int32_t)block[i+14]-(int32_t)block2[ii+14]); sum+=abs((int32_t)block[i+15]-(int32_t)block2[ii+15]); } + */ return sum; } @@ -453,6 +467,9 @@ uint32_t SAD8x8(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stride { int32_t i,ii,y; uint32_t sum=0; + sum = Hadamard8x8( block, stride1,block2, stride2 ); + /* + for(y=0;y<8;y++) { i = y*stride1; @@ -466,6 +483,7 @@ uint32_t SAD8x8(int16_t *block,uint32_t stride1,int16_t* block2, uint32_t stride sum+=abs((int32_t)block[i+6]-(int32_t)block2[ii+6]); sum+=abs((int32_t)block[i+7]-(int32_t)block2[ii+7]); } + */ return sum; } diff --git a/src/picture.h b/src/picture.h index e8637952..9d4c46aa 100644 --- a/src/picture.h +++ b/src/picture.h @@ -79,8 +79,10 @@ typedef struct uint8_t* uRecData; /*!< \brief Pointer to reconstructed U-data */ uint8_t* vRecData; /*!< \brief Pointer to reconstructed V-data */ - int width; /*!< \brief Picture width */ - int height; /*!< \brief Picture height */ + int32_t width; /*!< \brief Picture width */ + int32_t height; /*!< \brief Picture height */ + int32_t height_in_LCU; /*!< \brief input picture width in LCU*/ + int32_t width_in_LCU; /*!< \brief input picture height in LCU */ uint8_t referenced; /*!< \brief Is this picture referenced */ CU_info** CU; /*!< \brief info for each CU at each depth */ uint8_t type; diff --git a/src/search.c b/src/search.c index 111b64e3..7b015cd0 100644 --- a/src/search.c +++ b/src/search.c @@ -36,7 +36,7 @@ void search_buildReferenceBorder(picture* pic, int32_t xCtb, int32_t yCtb,int16_ uint8_t* srcPic = (!chroma)?pic->yData: ((chroma==1)?pic->uData: pic->vData); /*!< input picture pointer */ int16_t SCU_width = LCU_WIDTH>>(MAX_DEPTH+(chroma?1:0)); /*!< Smallest Coding Unit width */ uint8_t* srcShifted = &srcPic[xCtb*SCU_width+(yCtb*SCU_width)*srcWidth]; /*!< input picture pointer shifted to start from the left-top corner of the current block */ - int32_t width_in_SCU = srcWidth/SCU_width; /*!< picture width in SCU */ + int32_t width_in_SCU = pic->width_in_LCU<in.cur_pic.CU[depth][xCtb+yCtb*(encoder->in.width_in_LCU<intra.cost; uint32_t cost = 0; - uint32_t lambdaCost = 4*g_lambda_cost[encoder->QP]<<5; + uint32_t lambdaCost = 4*g_lambda_cost[encoder->QP]<<4;//<<5; /* Split and search to max_depth */ if(depth != MAX_SEARCH_DEPTH) diff --git a/src/transform.c b/src/transform.c index 79452309..be9cc620 100644 --- a/src/transform.c +++ b/src/transform.c @@ -781,16 +781,14 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidt { int16_t* piCoef = pSrc; int16_t* piQCoef = pDes; - uint32_t* scan; + int8_t useRDOQForTransformSkip = 0; uint32_t log2BlockSize = g_aucConvertToBit[ iWidth ] + 2; - + uint32_t* scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ]; //uint32_t scanIdx = SCAN_DIAG; - scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ]; - { - int32_t deltaU[LCU_WIDTH*LCU_WIDTH] ; + int32_t deltaU[LCU_WIDTH*LCU_WIDTH>>2]; int32_t iQpBase = encoder->QP; int32_t qpScaled; @@ -802,7 +800,7 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidt } else { - qpScaled = MAX( -qpBDOffset, MIN(57, iQpBase)); + qpScaled = CLIP(-qpBDOffset, 57, iQpBase); if(qpScaled < 0) { qpScaled = qpScaled + qpBDOffset; @@ -830,22 +828,140 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidt int32_t iAdd = ((encoder->in.cur_pic.slicetype == SLICE_I) ? 171 : 85) << (iQBits-9); int32_t qBits8 = iQBits-8; - for( n = 0; n < iWidth*iHeight; n++ ) + for(n = 0; n < iWidth*iHeight; n++) { int32_t iLevel; int32_t iSign; - int64_t tmpLevel; + //int64_t tmpLevel; iLevel = piCoef[n]; iSign = (iLevel < 0 ? -1: 1); - tmpLevel = (int64_t)abs(iLevel) * piQuantCoeff[n]; - iLevel = (int32_t)((tmpLevel + iAdd ) >> iQBits); - deltaU[n] = (int32_t)((tmpLevel - (iLevel<> qBits8); + iLevel = ((int64_t)abs(iLevel) * piQuantCoeff[n] + iAdd ) >> iQBits; + deltaU[n] = (int32_t)( ((int64_t)abs(piCoef[n]) * piQuantCoeff[n] - (iLevel<> qBits8 ); + + #if ENABLE_SIGN_HIDING == 1 + *uiAcSum += iLevel; + #endif - iLevel *= iSign; + iLevel *= iSign; piQCoef[n] = CLIP( -32768, 32767, iLevel); } // for n + + #if ENABLE_SIGN_HIDING == 1 + if(*uiAcSum >= 2) + { + #define SCAN_SET_SIZE 16 + #define LOG2_SCAN_SET_SIZE 4 + int32_t n,lastCG = -1, abssum = 0, subset, subpos; + uint32_t* scan_subpos; + for(subset = (iWidth*iHeight-1)>>LOG2_SCAN_SET_SIZE; subset >= 0; subset--) + { + int32_t firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1; + subpos = subset<=0; n--) + { + if(piQCoef[scan[n + subpos]]) + { + lastNZPosInCG = n; + break; + } + } + + /* First coeff pos */ + for(n = 0; n =0 && lastCG==-1) + { + lastCG = 1; + } + + if(lastNZPosInCG-firstNZPosInCG >= /*SBH_THRESHOLD*/4) + { + uint32_t signbit = (piQCoef[scan[subpos+firstNZPosInCG]]>0?0:1) ; + if(signbit != (abssum&0x1)) /* compare signbit with sum_parity */ + { + int32_t minCostInc = 0x7fffffff, minPos =-1, finalChange=0, curCost=0x7fffffff, curChange=0; + + for(n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; n--) + { + uint32_t blkPos = scan[n+subpos]; + if(piQCoef[blkPos] != 0) + { + if(deltaU[blkPos] > 0) + { + curCost = -deltaU[blkPos]; + curChange=1; + } + else if(n == firstNZPosInCG && abs(piQCoef[blkPos]) == 1) + { + curCost=0x7fffffff; + } + else + { + curCost = deltaU[blkPos]; + curChange =-1; + } + } + else if(n < firstNZPosInCG && ((piCoef[blkPos] >= 0)?0:1) != signbit) + { + curCost = 0x7fffffff; + } + else + { + curCost = -deltaU[blkPos]; + curChange = 1; + } + + if(curCost < minCostInc) + { + minCostInc = curCost; + finalChange = curChange; + minPos = blkPos; + } + } //CG loop + + if(piQCoef[minPos] == 32767 || piQCoef[minPos] == -32768) + { + finalChange = -1; + } + + if(piCoef[minPos] >= 0) + { + piQCoef[minPos] += finalChange; + } + else + { + piQCoef[minPos] -= finalChange; + } + } // Hide + } + if(lastCG == 1) + { + lastCG=0; + } + } + + #undef SCAN_SET_SIZE + #undef LOG2_SCAN_SET_SIZE } + #endif } }