Cleanup and added 4x4 transform codes

This commit is contained in:
Marko Viitanen 2013-03-22 16:36:35 +02:00
parent 78894b05d1
commit 902ffb6a67
2 changed files with 94 additions and 44 deletions

View file

@ -244,7 +244,6 @@ void encode_one_frame(encoder_control* encoder)
/* output parameters before first frame */
if(encoder->frame == 0)
{
/* Sequence Parameter Set (SPS) */
encode_seq_parameter_set(encoder);
bitstream_align(encoder->stream);
@ -278,19 +277,15 @@ void encode_one_frame(encoder_control* encoder)
bitstream_flush(encoder->stream);
nal_write(encoder->output, encoder->stream->buffer, encoder->stream->buffer_pos, 0, NAL_IDR_SLICE, 0);
bitstream_clear_buffer(encoder->stream);
}
else// if(encoder->frame < 10)
else
{
//if(encoder->QP > 20) encoder->QP-=2;
/* ToDo: add intra/inter search before encoding */
cabac_start(&cabac);
encoder->in.cur_pic.slicetype = SLICE_I;
encoder->in.cur_pic.type = 0;
encode_slice_header(encoder);
encode_slice_header(encoder);
bitstream_align(encoder->stream);
encode_slice_data(encoder);
cabac_flush(&cabac);
@ -316,7 +311,7 @@ void encode_one_frame(encoder_control* encoder)
#endif
/* Clear prediction data */
/* ToDo: store */
/* ToDo: store as reference data */
for(i=0; i < MAX_DEPTH+1; i++)
{
memset(encoder->in.cur_pic.CU[i], 0, (encoder->in.height_in_LCU<<MAX_DEPTH)*(encoder->in.width_in_LCU<<MAX_DEPTH)*sizeof(CU_info));
@ -635,13 +630,13 @@ void encode_slice_data(encoder_control* encoder)
void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
{
uint8_t split_flag = (depth<2)?1:0; /* ToDo: get from CU data */
uint8_t split_flag = (depth<3)?1:0; /* ToDo: get from CU data */
uint8_t split_model = 0;
/* Check for slice border */
uint8_t border_x = ((encoder->in.width)<(uint32_t)( xCtb*(LCU_WIDTH>>MAX_DEPTH) + (LCU_WIDTH>>depth) ))?1:0;
uint8_t border_y = ((encoder->in.height)<(uint32_t)( yCtb*(LCU_WIDTH>>MAX_DEPTH) + (LCU_WIDTH>>depth) ))?1:0;
uint8_t border = border_x | border_y;
uint8_t border = border_x | border_y; /*!< are we in any border CU */
CU_info *cur_CU = &encoder->in.cur_pic.CU[depth][(xCtb>>(MAX_DEPTH-depth))+(yCtb>>(MAX_DEPTH-depth))*(encoder->in.width_in_LCU<<MAX_DEPTH)];
/* When not in MAX_DEPTH, insert split flag and split the blocks if needed */
@ -850,8 +845,11 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
/* Coeff */
/* Transform tree */
cabac.ctx = &g_TransSubdivSCModel[5-(g_aucConvertToBit[LCU_WIDTH]+2-depth)];
CABAC_BIN(&cabac,0,"TransformSubdivFlag");
if(depth < MAX_DEPTH)
{
cabac.ctx = &g_TransSubdivSCModel[5-(g_aucConvertToBit[LCU_WIDTH]+2-depth)];
CABAC_BIN(&cabac,0,"TransformSubdivFlag");
}
/* We don't subdiv and we have 64>>depth transform size */
/* ToDo: allow other sized */
@ -1184,18 +1182,16 @@ void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uin
/* if !intra, scanMode = SCAN_DIAG */
/* CONSTANTS */
const uint32_t uiNumBlkSide = width >> shift;
const uint32_t uiNumBlkSide = width >> shift;
const uint32_t uiLog2BlockSize = g_aucConvertToBit[ width ] + 2;
const uint32_t* scan = g_auiSigLastScan[ scanMode ][ uiLog2BlockSize - 1 ];
const uint32_t* scanCG = NULL;
const uint32_t* scan = g_auiSigLastScan[ scanMode ][ uiLog2BlockSize - 1 ];
const uint32_t* scanCG = NULL;
/* Init base contexts according to block type */
cabac_ctx* baseCoeffGroupCtx = &g_CUSigCoeffGroupSCModel[type];
cabac_ctx* baseCtx = (type==0) ? &g_CUSigSCModel_luma[0] :&g_CUSigSCModel_chroma[0];
cabac_ctx* baseCtx = (type==0) ? &g_CUSigSCModel_luma[0] :&g_CUSigSCModel_chroma[0];
memset(sig_coeffgroup_flag,0,sizeof(uint32_t)*64);
/* Count non-zero coeffs */
for(i = 0; i < width*width; i++)
{
@ -1240,26 +1236,26 @@ void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uin
#endif
encode_lastSignificantXY(encoder,last_coeff_x, last_coeff_y, width, width, type, 0);
iScanPosSig = scanPosLast;
iScanPosSig = scanPosLast;
iLastScanSet = (scanPosLast >> 4);
/* significant_coeff_flag */
for(i = iLastScanSet; i >= 0; i-- )
{
int32_t iSubPos = i << 4 /*LOG2_SCAN_SET_SIZE*/;
int32_t iSubPos = i << 4 /*LOG2_SCAN_SET_SIZE*/;
int32_t abs_coeff[16];
int32_t iCGBlkPos = scanCG[ i ];
int32_t iCGPosY = iCGBlkPos / uiNumBlkSide;
int32_t iCGPosX = iCGBlkPos - (iCGPosY * uiNumBlkSide);
uint32_t coeffSigns = 0;
int32_t iCGBlkPos = scanCG[ i ];
int32_t iCGPosY = iCGBlkPos / uiNumBlkSide;
int32_t iCGPosX = iCGBlkPos - (iCGPosY * uiNumBlkSide);
uint32_t coeffSigns = 0;
int32_t lastNZPosInCG = -1, firstNZPosInCG = 16;
int32_t numNonZero = 0;
uiGoRiceParam = 0;
int32_t numNonZero = 0;
uiGoRiceParam = 0;
if( iScanPosSig == scanPosLast )
{
abs_coeff[ 0 ] = abs( coeff[ posLast ] );
coeffSigns = ( coeff[ posLast ] < 0 )?1:0;
numNonZero = 1;
coeffSigns = ( coeff[ posLast ] < 0 )?1:0;
numNonZero = 1;
lastNZPosInCG = iScanPosSig;
firstNZPosInCG = iScanPosSig;
iScanPosSig--;
@ -1295,26 +1291,25 @@ void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uin
if( uiSig )
{
abs_coeff[ numNonZero ] = abs( coeff[ uiBlkPos ] );
coeffSigns = 2 * coeffSigns + ( coeff[ uiBlkPos ] < 0 );
coeffSigns = 2 * coeffSigns + ( coeff[ uiBlkPos ] < 0 );
numNonZero++;
if( lastNZPosInCG == -1 )
{
lastNZPosInCG = iScanPosSig;
}
firstNZPosInCG = iScanPosSig;
firstNZPosInCG = iScanPosSig;
}
}
}
else
{
iScanPosSig = iSubPos - 1;
}
}
if( numNonZero > 0 )
{
uint8_t signHidden = ( lastNZPosInCG - firstNZPosInCG >= 4 /*SBH_THRESHOLD*/ );
uint32_t uiCtxSet = (i > 0 && type==0) ? 2 : 0;
uint32_t uiCtxSet = (i > 0 && type==0) ? 2 : 0;
cabac_ctx* baseCtxMod;
int32_t numC1Flag,firstC2FlagIdx,idx,iFirstCoeff2;
if( c1 == 0 )
@ -1323,9 +1318,8 @@ void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uin
}
c1 = 1;
baseCtxMod = ( type==0 ) ? &g_CUOneSCModel_luma[4 * uiCtxSet] : &g_CUOneSCModel_chroma[4 * uiCtxSet];
numC1Flag = MIN(numNonZero, C1FLAG_NUMBER);
baseCtxMod = ( type==0 ) ? &g_CUOneSCModel_luma[4 * uiCtxSet] : &g_CUOneSCModel_chroma[4 * uiCtxSet];
numC1Flag = MIN(numNonZero, C1FLAG_NUMBER);
firstC2FlagIdx = -1;
for(idx = 0; idx < numC1Flag; idx++ )
{
@ -1352,7 +1346,7 @@ void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uin
if ( firstC2FlagIdx != -1)
{
uint8_t symbol = (abs_coeff[ firstC2FlagIdx ] > 2)?1:0;
cabac.ctx = &baseCtxMod[0];
cabac.ctx = &baseCtxMod[0];
CABAC_BIN(&cabac,symbol,"first_c2_flag");
}
}

View file

@ -294,6 +294,31 @@ void partialButterfly4(short *src,short *dst,int32_t shift, int32_t line)
}
}
void partialButterflyInverse4(short *src,short *dst,int shift, int line)
{
int j;
int E[2],O[2];
int add = 1<<(shift-1);
for (j=0; j<line; j++)
{
/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
O[0] = g_aiT4[1][0]*src[line] + g_aiT4[3][0]*src[3*line];
O[1] = g_aiT4[1][1]*src[line] + g_aiT4[3][1]*src[3*line];
E[0] = g_aiT4[0][0]*src[0] + g_aiT4[2][0]*src[2*line];
E[1] = g_aiT4[0][1]*src[0] + g_aiT4[2][1]*src[2*line];
/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
dst[0] = CLIP( -32768, 32767, (E[0] + O[0] + add)>>shift );
dst[1] = CLIP( -32768, 32767, (E[1] + O[1] + add)>>shift );
dst[2] = CLIP( -32768, 32767, (E[1] - O[1] + add)>>shift );
dst[3] = CLIP( -32768, 32767, (E[0] - O[0] + add)>>shift );
src ++;
dst += 4;
}
}
// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
// give identical results
void fastForwardDst(short *block,short *coeff,int32_t shift) // input block, output coeff
@ -315,6 +340,24 @@ void fastForwardDst(short *block,short *coeff,int32_t shift) // input block, ou
}
}
void fastInverseDst(short *tmp,short *block,int shift) // input tmp, output block
{
int i, c[4];
int rnd_factor = 1<<(shift-1);
for (i=0; i<4; i++)
{
// Intermediate Variables
c[0] = tmp[ i] + tmp[ 8+i];
c[1] = tmp[8+i] + tmp[12+i];
c[2] = tmp[ i] - tmp[12+i];
c[3] = 74* tmp[4+i];
block[4*i+0] = CLIP( -32768, 32767, ( 29 * c[0] + 55 * c[1] + c[3] + rnd_factor ) >> shift );
block[4*i+1] = CLIP( -32768, 32767, ( 55 * c[2] - 29 * c[1] + c[3] + rnd_factor ) >> shift );
block[4*i+2] = CLIP( -32768, 32767, ( 74 * (tmp[i] - tmp[8+i] + tmp[12+i]) + rnd_factor ) >> shift );
block[4*i+3] = CLIP( -32768, 32767, ( 55 * c[0] + 29 * c[2] - c[3] + rnd_factor ) >> shift );
}
}
void partialButterfly8(short *src,short *dst,int32_t shift, int32_t line)
@ -622,22 +665,22 @@ void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int8_t uiMode)
int32_t shift_2nd = g_aucConvertToBit[blockSize] + 8; // log2(iHeight) + 6
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
/*
if(blockSize== 4)
{
if (uiMode != REG_DCT)
if (uiMode != 65535)
{
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
}
else
{
partialButterfly4(block, tmp, shift_1st, iHeight);
partialButterfly4(tmp, coeff, shift_2nd, iWidth);
partialButterfly4(block, tmp, shift_1st, blockSize);
partialButterfly4(tmp, coeff, shift_2nd, blockSize);
}
}
else*/
else
switch(blockSize)
{
case 8:
@ -674,7 +717,20 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int8_t uiMode
int32_t shift_2nd = 12 - (g_uiBitDepth-8);
int16_t tmp[LCU_WIDTH*LCU_WIDTH];
if( blockSize == 8)
if( blockSize == 4)
{
if (uiMode != 65535)
{
fastInverseDst(coeff,tmp,shift_1st); // Inverse DST by FAST Algorithm, coeff input, tmp output
fastInverseDst(tmp,block,shift_2nd); // Inverse DST by FAST Algorithm, tmp input, coeff output
}
else
{
partialButterflyInverse4(coeff,tmp,shift_1st,blockSize);
partialButterflyInverse4(tmp,block,shift_2nd,blockSize);
}
}
else if( blockSize == 8)
{
partialButterflyInverse8(coeff,tmp,shift_1st,blockSize);
partialButterflyInverse8(tmp,block,shift_2nd,blockSize);