Fixed dequantization and made preparations for smaller blocks. This version produces bit-perfect output.

This commit is contained in:
Marko Viitanen 2013-03-21 17:12:55 +02:00
parent fa4c4acbe5
commit 78894b05d1
5 changed files with 46 additions and 45 deletions

View file

@ -71,6 +71,9 @@
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CompileAs>CompileAsC</CompileAs>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<InlineFunctionExpansion>Default</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>

View file

@ -199,13 +199,19 @@ void init_encoder_input(encoder_input* input,FILE* inputfile, uint32_t width, ui
input->height = height;
input->height_in_LCU = height / LCU_WIDTH;
input->width_in_LCU = width / LCU_WIDTH;
if(input->height_in_LCU * LCU_WIDTH < height)
input->height_in_LCU++;
if(input->width_in_LCU * LCU_WIDTH < width)
input->width_in_LCU++;
input->width_in_LCU = width / LCU_WIDTH;
input->cur_pic.width = width;
/* Add one extra LCU when image not divisible by LCU_WIDTH */
if(input->height_in_LCU * LCU_WIDTH < height)
{
input->height_in_LCU++;
}
if(input->width_in_LCU * LCU_WIDTH < width)
{
input->width_in_LCU++;
}
input->cur_pic.width = width;
input->cur_pic.height = height;
input->cur_pic.referenced = 0;
/* Allocate buffers */
@ -629,7 +635,6 @@ void encode_slice_data(encoder_control* encoder)
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
{
int x,y;
uint8_t split_flag = (depth<2)?1:0; /* ToDo: get from CU data */
uint8_t split_model = 0;
@ -729,15 +734,23 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0);
intraPredMode = (uint8_t)intra_prediction(encoder->in.cur_pic.yData,encoder->in.width,recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,&bestSAD);
/* Filter DC-prediction */
if(intraPredMode == 1 && (LCU_WIDTH>>depth) < 32 && xCtb && yCtb)
{
intra_DCPredFiltering(recbase,encoder->in.width,recbase,encoder->in.width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
}
/* ToDo: separate chroma prediction(?) */
/* intraPredModeChroma = 1; */
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predV,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
/* This affects reconstruction, do after that */
intra_setBlockMode(&encoder->in.cur_pic, xCtb, yCtb, depth, intraPredMode);
/* ToDo: separate chroma prediction(?) */
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredMode,1);
intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predV,width>>1,intraPredMode,1);
/*
PREDINFO CODING
If intra prediction mode is found from the predictors,
@ -866,7 +879,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
/* Transform and quant residual to coeffs */
transform2d(block,pre_quant_coeff,width,0);
quant(encoder,pre_quant_coeff,coeff, width, width, 0);
quant(encoder,pre_quant_coeff,coeff,width, width,0, 0, SCAN_DIAG);
/* Check for non-zero coeffs */
for(i = 0; (uint32_t)i < width*width; i++)
@ -911,10 +924,9 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
}
}
if(encoder->in.video_format != FORMAT_400)
{
/* U */
/* U */
i = 0;
for(y = 0; y < LCU_WIDTH>>(depth+1); y++)
{
@ -924,7 +936,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
}
}
transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),0);
quant(encoder,pre_quant_coeff,coeffU, width>>1, width>>1, 2);
quant(encoder,pre_quant_coeff,coeffU, width>>1, width>>1, 0,2,SCAN_DIAG);
for(i = 0; (uint32_t)i < width*width>>2; i++)
{
if(coeffU[i] != 0)
@ -945,7 +957,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
}
}
transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),0);
quant(encoder,pre_quant_coeff,coeffV, width>>1, width>>1, 3);
quant(encoder,pre_quant_coeff,coeffV, width>>1, width>>1, 0,3,SCAN_DIAG);
for(i = 0; (uint32_t)i < width*width>>2; i++)
{
if(coeffV[i] != 0)
@ -984,7 +996,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
recbaseU[x+y*(encoder->in.width>>1)] = (uint8_t)CLIP(0,255,predU[x+y*(LCU_WIDTH>>(depth+1))]);
}
}
}
}
if(CbV)
{
@ -1015,17 +1027,8 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
}
}
}
}
/* Filter DC-prediction */
/*
if(intraPredMode == 1 && (LCU_WIDTH>>depth) < 32 && xCtb && yCtb)
{
intra_DCPredFiltering(recbase,encoder->in.width,recbase,encoder->in.width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
}
*/
/* END INTRAPREDICTION */

View file

@ -236,7 +236,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
int16_t pred[LCU_WIDTH*LCU_WIDTH>>2];
int16_t origBlock[LCU_WIDTH*LCU_WIDTH>>2];
uint8_t *origShift = &orig[xpos+ypos*origstride];
int8_t filter = (width<32);
int8_t filter = (width<32); //ToDo: chroma support
SADfunction SADarray[4] = {&SAD4x4,&SAD8x8,&SAD16x16,&SAD32x32};
uint8_t threshold = intraHorVerDistThres[g_toBits[width]]; /*!< Intra filtering threshold */
#define COPY_PRED_TO_DST() for(y = 0; y < (int32_t)width; y++) { for(x = 0; x < (int32_t)width; x++) { dst[x+y*dststride] = pred[x+y*width]; } }
@ -261,8 +261,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
}
}
/* Test DC */
/* Test DC */
/*
x = intra_getDCPred(rec, recstride, xpos, ypos, width);
for(i = 0; i < (int32_t)(width*width); i++)
@ -272,7 +271,6 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
CHECK_FOR_BEST(1);
*/
/* Check angular not requiring filtering */
for(i = 2; i < 35; i++)
{
if(MIN(abs(i-26),abs(i-10)) <= threshold)
@ -280,13 +278,11 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
CHECK_FOR_BEST(i);
}
}
}
/*Apply filter*/
intra_filter(rec,recstride,width,0);
/* Test planar */
intra_getPlanarPred(rec, recstride, xpos, ypos, width, pred, width);
CHECK_FOR_BEST(0);
@ -295,8 +291,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
/* ToDo: add conditions to skip some modes on borders */
//chroma can use only 26 and 10
/* Test angular predictions which require filtered samples */
/* Test angular predictions which require filtered samples */
for(i = 2; i < 35; i++)
{
if(MIN(abs(i-26),abs(i-10)) > threshold)
@ -304,8 +299,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
CHECK_FOR_BEST(i);
}
}
}
*sad = bestSAD;
#undef COPY_PRED_TO_DST
#undef CHECK_FOR_BEST

View file

@ -694,8 +694,8 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int8_t uiMode
#define QUANT_SHIFT 14
void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, /*int32_t** pArlDes,*/ int32_t iWidth,
int32_t iHeight, /*uint32_t *uiAcSum,*/ int8_t eTType/*, uint32_t uiAbsPartIdx*/ )
void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidth,
int32_t iHeight, uint32_t *uiAcSum, int8_t eTType, int8_t scanIdx )
{
int16_t* piCoef = pSrc;
int16_t* piQCoef = pDes;
@ -704,7 +704,7 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, /*int32_t** p
int8_t useRDOQForTransformSkip = 0;
uint32_t log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
uint32_t scanIdx = SCAN_DIAG;
//uint32_t scanIdx = SCAN_DIAG;
scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
{
@ -804,7 +804,7 @@ void dequant(encoder_control* encoder, int16_t* piQCoef, int16_t* piCoef, int32_
iShift += 4;
if(iShift >qpScaled/5)
if(iShift >qpScaled/6)
{
iAdd = 1 << (iShift - qpScaled/6 - 1);

View file

@ -16,7 +16,8 @@
extern int32_t* g_quant_coeff[4][6][6];
extern const int32_t g_quantIntraDefault8x8[64];
void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes,int32_t iWidth,int32_t iHeight, int8_t eTType );
void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidth,
int32_t iHeight, uint32_t *uiAcSum, int8_t eTType, int8_t scanIdx );
void dequant(encoder_control* encoder, int16_t* piQCoef, int16_t* piCoef, int32_t iWidth, int32_t iHeight,int8_t eTType);
void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int8_t uiMode);