Fixed 4x4 chroma deblocking and cleaned deblocking functions

This commit is contained in:
Marko Viitanen 2013-05-30 10:37:47 +03:00
parent 1d7d411709
commit d33304e5ec
2 changed files with 84 additions and 155 deletions

View file

@ -96,93 +96,78 @@ INLINE void filter_luma( uint8_t* piSrc, int32_t iOffset, int32_t tc , int8_t sw
} }
} }
INLINE void filter_chroma( uint8_t* piSrc, int32_t iOffset, int32_t tc ,int8_t bPartPNoFilter, int8_t bPartQNoFilter) INLINE void filter_chroma( uint8_t* src, int32_t offset, int32_t tc ,int8_t part_p_nofilter, int8_t part_q_nofilter)
{ {
int32_t delta; int32_t delta;
int16_t m2 = piSrc[-iOffset*2]; int16_t m2 = src[-offset*2];
int16_t m3 = piSrc[-iOffset]; int16_t m3 = src[-offset];
int16_t m4 = piSrc[0]; int16_t m4 = src[0];
int16_t m5 = piSrc[ iOffset]; int16_t m5 = src[ offset];
delta = CLIP(-tc,tc, (((( m4 - m3 ) << 2 ) + m2 - m5 + 4 ) >> 3) ); delta = CLIP(-tc,tc, (((( m4 - m3 ) << 2 ) + m2 - m5 + 4 ) >> 3) );
piSrc[-iOffset] = CLIP(0,(1 << g_bitDepth)-1,m3+delta); if(!part_p_nofilter)
piSrc[0] = CLIP(0,(1 << g_bitDepth)-1,m4-delta);
if(bPartPNoFilter)
{ {
piSrc[-iOffset] = (uint8_t)m3; src[-offset] = CLIP(0,(1 << g_bitDepth)-1,m3+delta);
} }
if(bPartQNoFilter) if(!part_q_nofilter)
{ {
piSrc[0] = (uint8_t)m4; src[0] = CLIP(0,(1 << g_bitDepth)-1,m4-delta);
} }
} }
void filter_deblock_edge_luma(encoder_control* encoder, int32_t xpos, int32_t ypos, int8_t depth, int8_t dir) void filter_deblock_edge_luma(encoder_control* encoder, int32_t xpos, int32_t ypos, int8_t depth, int8_t dir)
{ {
int32_t iStride = encoder->in.cur_pic.width; int32_t iStride = encoder->in.cur_pic.width;
int32_t iOffset = iStride; int32_t offset = iStride;
int32_t betaOffsetDiv2 = encoder->betaOffsetdiv2; int32_t betaOffsetDiv2 = encoder->betaOffsetdiv2;
int32_t tcOffsetDiv2 = encoder->tcOffsetdiv2; int32_t tcOffsetDiv2 = encoder->tcOffsetdiv2;
const int8_t width = (LCU_WIDTH>>depth); const int8_t width = (LCU_WIDTH>>depth);
const int8_t scu_width = (LCU_WIDTH>>MAX_DEPTH); const int8_t scu_width = (LCU_WIDTH>>MAX_DEPTH);
const int8_t scu_width_log2 = TOBITS(scu_width); const int8_t scu_width_log2 = TOBITS(scu_width);
int8_t uiNumParts = (LCU_WIDTH>>depth)/4;
int8_t uiBs = 2; /* Filter strength */ int8_t uiBs = 2; /* Filter strength */
/* ToDo: support 10+bits */ /* ToDo: support 10+bits */
uint8_t* origsrc = &encoder->in.cur_pic.yRecData[xpos+ypos*iStride]; uint8_t* origsrc = &encoder->in.cur_pic.yRecData[xpos+ypos*iStride];
uint8_t* piTmpSrc = origsrc; uint8_t* src = origsrc;
int32_t iSrcStep = 1; int32_t step = 1;
//CU_info* cu = &encoder->in.cur_pic.CU[depth][(xpos>>scu_width_log2) + (ypos>>scu_width_log2)*(encoder->in.width>>scu_width_log2)]; //CU_info* cu = &encoder->in.cur_pic.CU[depth][(xpos>>scu_width_log2) + (ypos>>scu_width_log2)*(encoder->in.width>>scu_width_log2)];
if(dir == EDGE_VER) if(dir == EDGE_VER)
{ {
iOffset = 1; offset = 1;
iSrcStep = iStride; step = iStride;
//piTmpSrc += edge*4;
} }
/*
else
{
iOffset = iStride;
iSrcStep = 1;
//piTmpSrc += (edge*4)*iStride;
}
*/
/* For each subpart */ /* For each subpart */
//for(iIdx = 0; iIdx < uiNumParts; iIdx++)
{ {
int32_t iQP = encoder->QP; int32_t QP = encoder->QP;
int32_t iBitdepthScale = 1 << (g_bitDepth-8); int32_t bitdepth_scale = 1 << (g_bitDepth-8);
int32_t iIndexTC = CLIP(0, 51+2, (int32_t)(iQP + 2*(uiBs-1) + (tcOffsetDiv2 << 1))); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(uiBs-1) + (tcOffsetDiv2 << 1)));
int32_t iIndexB = CLIP(0, 51, iQP + (betaOffsetDiv2 << 1)); int32_t B_index = CLIP(0, 51, QP + (betaOffsetDiv2 << 1));
int32_t iTc = tctable_8x8[iIndexTC]*iBitdepthScale; int32_t Tc = tctable_8x8[TC_index]*bitdepth_scale;
int32_t iBeta = betatable_8x8[iIndexB]*iBitdepthScale; int32_t Beta = betatable_8x8[B_index]*bitdepth_scale;
int32_t iSideThreshold = (iBeta+(iBeta>>1))>>3; int32_t iSideThreshold = (Beta+(Beta>>1))>>3;
int32_t iThrCut = iTc*10; int32_t iThrCut = Tc*10;
uint32_t uiBlocksInPart= (LCU_WIDTH>>depth) / 4; uint32_t blocks_in_part = (LCU_WIDTH>>depth) / 4;
uint32_t iBlkIdx; uint32_t block_idx;
/* ToDo: add CU based QP calculation */ /* ToDo: add CU based QP calculation */
for (iBlkIdx = 0; iBlkIdx < uiBlocksInPart; iBlkIdx++) for (block_idx = 0; block_idx < blocks_in_part; block_idx++)
{ {
int32_t dp0,dq0,dp3,dq3,d0,d3,dp,dq,d; int32_t dp0,dq0,dp3,dq3,d0,d3,dp,dq,d;
/* Check conditions for filtering */ /* Check conditions for filtering */
#define calc_DP(src, offset) abs( src[-offset*3] - 2*src[-offset*2] + src[-offset] ) #define calc_DP(s,o) abs( s[-o*3] - 2*s[-o*2] + s[-o] )
#define calc_DQ(src, offset) abs( src[0] - 2*src[offset] + src[offset*2] ) #define calc_DQ(s,o) abs( s[0] - 2*s[o] + s[o*2] )
dp0 = calc_DP( (piTmpSrc+iSrcStep*(iBlkIdx*4+0)), iOffset); dp0 = calc_DP( (src+step*(block_idx*4+0)), offset);
dq0 = calc_DQ( (piTmpSrc+iSrcStep*(iBlkIdx*4+0)), iOffset); dq0 = calc_DQ( (src+step*(block_idx*4+0)), offset);
dp3 = calc_DP( (piTmpSrc+iSrcStep*(iBlkIdx*4+3)), iOffset); dp3 = calc_DP( (src+step*(block_idx*4+3)), offset);
dq3 = calc_DQ( (piTmpSrc+iSrcStep*(iBlkIdx*4+3)), iOffset); dq3 = calc_DQ( (src+step*(block_idx*4+3)), offset);
d0 = dp0 + dq0; d0 = dp0 + dq0;
d3 = dp3 + dq3; d3 = dp3 + dq3;
dp = dp0 + dp3; dp = dp0 + dp3;
dq = dq0 + dq3; dq = dq0 + dq3;
d = d0 + d3; d = d0 + d3;
@ -190,18 +175,19 @@ void filter_deblock_edge_luma(encoder_control* encoder, int32_t xpos, int32_t yp
#if ENABLE_PCM == 1 #if ENABLE_PCM == 1
//ToDo: add PCM deblocking //ToDo: add PCM deblocking
#endif #endif
if (d < iBeta) if (d < Beta)
{ {
#define useStrongFiltering(offset,d,beta,tc,src) ( ((abs(src[-offset*4]-src[-offset]) + abs(src[-offset*3]-src[0])) < (beta>>3)) && (d<(beta>>2)) && ( abs(src[-offset]-src[0]) < ((tc*5+1)>>1)) ) #define useStrongFiltering(o,d,s) ( ((abs(s[-o*4]-s[-o]) + abs(s[-o*3]-s[0])) < (Beta>>3)) && (d<(Beta>>2)) && ( abs(s[-o]-s[0]) < ((Tc*5+1)>>1)) )
int8_t bFilterP = (dp < iSideThreshold)?1:0; int8_t filter_P = (dp < iSideThreshold)?1:0;
int8_t bFilterQ = (dq < iSideThreshold)?1:0; int8_t filter_Q = (dq < iSideThreshold)?1:0;
int8_t sw = useStrongFiltering( iOffset, 2*d0, iBeta, iTc, (piTmpSrc+iSrcStep*(iBlkIdx*4+0))) int8_t sw = useStrongFiltering( offset, 2*d0, (src+step*(block_idx*4+0))) &&
&& useStrongFiltering( iOffset, 2*d3, iBeta, iTc, (piTmpSrc+iSrcStep*(iBlkIdx*4+3))); useStrongFiltering( offset, 2*d3, (src+step*(block_idx*4+3)));
/* Filter four rows/columns */ /* Filter four rows/columns */
filter_luma( piTmpSrc+iSrcStep*(iBlkIdx*4), iOffset, iTc, sw, 0, 0, iThrCut, bFilterP, bFilterQ); filter_luma( src+step*(block_idx*4+0), offset, Tc, sw, 0, 0, iThrCut, filter_P, filter_Q);
filter_luma( piTmpSrc+iSrcStep*(iBlkIdx*4+1), iOffset, iTc, sw, 0, 0, iThrCut, bFilterP, bFilterQ); filter_luma( src+step*(block_idx*4+1), offset, Tc, sw, 0, 0, iThrCut, filter_P, filter_Q);
filter_luma( piTmpSrc+iSrcStep*(iBlkIdx*4+2), iOffset, iTc, sw, 0, 0, iThrCut, bFilterP, bFilterQ); filter_luma( src+step*(block_idx*4+2), offset, Tc, sw, 0, 0, iThrCut, filter_P, filter_Q);
filter_luma( piTmpSrc+iSrcStep*(iBlkIdx*4+3), iOffset, iTc, sw, 0, 0, iThrCut, bFilterP, bFilterQ); filter_luma( src+step*(block_idx*4+3), offset, Tc, sw, 0, 0, iThrCut, filter_P, filter_Q);
} }
} }
} }
@ -209,62 +195,49 @@ void filter_deblock_edge_luma(encoder_control* encoder, int32_t xpos, int32_t yp
void filter_deblock_edge_chroma(encoder_control* encoder,int32_t xpos, int32_t ypos, int8_t depth, int8_t dir) void filter_deblock_edge_chroma(encoder_control* encoder,int32_t xpos, int32_t ypos, int8_t depth, int8_t dir)
{ {
//int i,iIdx; int32_t stride = encoder->in.cur_pic.width>>1;
int32_t iStride = encoder->in.cur_pic.width>>1;
int32_t iOffset = 0;
int32_t tcOffsetDiv2 = encoder->betaOffsetdiv2; int32_t tcOffsetDiv2 = encoder->betaOffsetdiv2;
const int8_t scu_width = (LCU_WIDTH>>(MAX_DEPTH+1));
const int8_t width = (LCU_WIDTH>>(depth+1));
const int8_t scu_width_log2 = TOBITS(scu_width);
int8_t uiNumParts = 1; int8_t uiNumParts = 1;
int8_t uiBs = 2; /* Filter strength */
/* ToDo: support 10+bits */ /* ToDo: support 10+bits */
uint8_t* srcU = &encoder->in.cur_pic.uRecData[xpos+ypos*iStride]; uint8_t* srcU = &encoder->in.cur_pic.uRecData[xpos+ypos*stride];
uint8_t* srcV = &encoder->in.cur_pic.vRecData[xpos+ypos*iStride]; uint8_t* srcV = &encoder->in.cur_pic.vRecData[xpos+ypos*stride];
uint8_t* piTmpSrcU = srcU;
uint8_t* piTmpSrcV = srcV; int32_t offset = stride;
//int32_t iSrcStep; int32_t step = 1;
//CU_info* cu = &encoder->in.cur_pic.CU[0][(xpos>>scu_width_log2) + (ypos>>scu_width_log2)*(encoder->in.width>>scu_width_log2)];
int32_t iSrcStep = 1; /* We cannot filter edges not on 8x8 grid */
if( depth == MAX_DEPTH && (( (ypos & 0x7) && dir == EDGE_HOR ) || ( (xpos & 0x7) && dir == EDGE_VER ) ) )
{
return;
}
if(dir == EDGE_VER) if(dir == EDGE_VER)
{ {
iOffset = 1; offset = 1;
iSrcStep = iStride; step = stride;
} }
else
{
iOffset = iStride;
iSrcStep = 1;
}
// For each subpart // For each subpart
{ {
int32_t iQP = encoder->QP; int32_t QP = encoder->QP;
int32_t iBitdepthScale = 1 << (g_bitDepth-8); int32_t bitdepth_scale = 1 << (g_bitDepth-8);
int32_t iIndexTC = CLIP(0, 51+2, (int32_t)(iQP + 2*(uiBs-1) + (tcOffsetDiv2 << 1))); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2 + (tcOffsetDiv2 << 1)));
int32_t iTc = tctable_8x8[iIndexTC]*iBitdepthScale; int32_t Tc = tctable_8x8[TC_index]*bitdepth_scale;
int32_t iThrCut = iTc*10; uint32_t blocks_in_part= (LCU_WIDTH>>(depth+1)) / 4;
uint32_t uiBlocksInPart= (LCU_WIDTH>>(depth+1)) / 4; uint32_t blk_idx;
uint32_t iBlkIdx;
for (iBlkIdx = 0; iBlkIdx < uiBlocksInPart; iBlkIdx++) for (blk_idx = 0; blk_idx < blocks_in_part; blk_idx++)
{ {
/* Chroma Red */
//for (i = 0; i < 4; i++) filter_chroma( srcU+step*(blk_idx*4+0), offset, Tc,0, 0);
{ filter_chroma( srcU+step*(blk_idx*4+1), offset, Tc,0, 0);
filter_chroma( piTmpSrcU+iSrcStep*(iBlkIdx*4+0), iOffset, iTc,0, 0); filter_chroma( srcU+step*(blk_idx*4+2), offset, Tc,0, 0);
filter_chroma( piTmpSrcU+iSrcStep*(iBlkIdx*4+1), iOffset, iTc,0, 0); filter_chroma( srcU+step*(blk_idx*4+3), offset, Tc,0, 0);
filter_chroma( piTmpSrcU+iSrcStep*(iBlkIdx*4+2), iOffset, iTc,0, 0); /* Chroma Blue */
filter_chroma( piTmpSrcU+iSrcStep*(iBlkIdx*4+3), iOffset, iTc,0, 0); filter_chroma( srcV+step*(blk_idx*4+0), offset, Tc,0, 0);
filter_chroma( srcV+step*(blk_idx*4+1), offset, Tc,0, 0);
filter_chroma( piTmpSrcV+iSrcStep*(iBlkIdx*4+0), iOffset, iTc,0, 0); filter_chroma( srcV+step*(blk_idx*4+2), offset, Tc,0, 0);
filter_chroma( piTmpSrcV+iSrcStep*(iBlkIdx*4+1), iOffset, iTc,0, 0); filter_chroma( srcV+step*(blk_idx*4+3), offset, Tc,0, 0);
filter_chroma( piTmpSrcV+iSrcStep*(iBlkIdx*4+2), iOffset, iTc,0, 0);
filter_chroma( piTmpSrcV+iSrcStep*(iBlkIdx*4+3), iOffset, iTc,0, 0);
//filter_chroma( piTmpSrc+iSrcStep*(iBlkIdx*4+2), iOffset, iTc,bFilterP, bFilterQ);
//filter_chroma( piTmpSrc+iSrcStep*(iBlkIdx*4+3), iOffset, iTc,bFilterP, bFilterQ);
}
} }
} }
} }
@ -299,58 +272,14 @@ void filter_deblock_CU(encoder_control* encoder, int32_t xCtb, int32_t yCtb, int
return; return;
} }
if((!xCtb && edge == EDGE_VER) || (!yCtb && edge == EDGE_HOR)) return; if((!xCtb && edge == EDGE_VER) || (!yCtb && edge == EDGE_HOR)) return;
/*
/* DO THE FILTERING FOR EDGE */
xSetLoopfilterParam( pcCU, uiAbsZorderIdx ); filter_deblock_edge_luma(encoder, xCtb*(LCU_WIDTH>>MAX_DEPTH), yCtb*(LCU_WIDTH>>MAX_DEPTH), depth, edge);
//if ( depth != MAX_DEPTH )
xSetEdgefilterTU ( pcCU, uiAbsZorderIdx , uiAbsZorderIdx, uiDepth );
xSetEdgefilterPU ( pcCU, uiAbsZorderIdx );
Int iDir = Edge;
for( UInt uiPartIdx = uiAbsZorderIdx; uiPartIdx < uiAbsZorderIdx + uiCurNumParts; uiPartIdx++ )
{ {
UInt uiBSCheck; filter_deblock_edge_chroma(encoder, xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)), yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)), depth, edge);
if( (g_uiMaxCUWidth >> g_uiMaxCUDepth) == 4 )
{
uiBSCheck = (iDir == EDGE_VER && uiPartIdx%2 == 0) || (iDir == EDGE_HOR && (uiPartIdx-((uiPartIdx>>2)<<2))/2 == 0);
}
else
{
uiBSCheck = 1;
}
if ( m_aapbEdgeFilter[iDir][uiPartIdx] && uiBSCheck )
{
xGetBoundaryStrengthSingle ( pcCU, iDir, uiPartIdx );
}
}
UInt uiPelsInPart = g_uiMaxCUWidth >> g_uiMaxCUDepth;
UInt PartIdxIncr = 8 / uiPelsInPart ? 8 / uiPelsInPart : 1 ;
UInt uiSizeInPU = pcPic->getNumPartInWidth()>>(uiDepth);
for ( UInt iEdge = 0; iEdge < uiSizeInPU ; iEdge+=PartIdxIncr)
{
xEdgeFilterLuma ( pcCU, uiAbsZorderIdx, uiDepth, iDir, iEdge );
if ( (uiPelsInPart>8) || (iEdge % ( (8<<1)/uiPelsInPart ) ) == 0 )
{
xEdgeFilterChroma ( pcCU, uiAbsZorderIdx, uiDepth, iDir, iEdge );
}
}
*/
{
//int32_t uiSizeInPU = 16>>depth;
//int32_t PartIdxIncr = ((8 / (LCU_WIDTH>>(depth+1))) ? (8 / (LCU_WIDTH>>(depth+1))) : 1);
//for(edge = 0; edge < uiSizeInPU; edge += PartIdxIncr)
{
filter_deblock_edge_luma(encoder, xCtb*(LCU_WIDTH>>MAX_DEPTH), yCtb*(LCU_WIDTH>>MAX_DEPTH), depth, edge);
//if ( depth != 3 )
{
filter_deblock_edge_chroma(encoder, xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)), yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)), depth, edge);
}
}
} }
} }
void filter_deblock(encoder_control* encoder) void filter_deblock(encoder_control* encoder)
{ {

View file

@ -17,7 +17,7 @@
#define LCU_WIDTH 64 /*!< Largest Coding Unit */ #define LCU_WIDTH 64 /*!< Largest Coding Unit */
#define MAX_SEARCH_DEPTH 3 #define MAX_SEARCH_DEPTH 3
#define MIN_SEARCH_DEPTH 1 #define MIN_SEARCH_DEPTH 2
#define MAX_DEPTH 3 /*!< smallest CU is LCU_WIDTH>>MAX_DEPTH */ #define MAX_DEPTH 3 /*!< smallest CU is LCU_WIDTH>>MAX_DEPTH */
#define MIN_SIZE 3 /*!< log2_min_coding_block_size */ #define MIN_SIZE 3 /*!< log2_min_coding_block_size */