From c42b25054a848a9ab6d96f8c4b8b7081d1769a6f Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Thu, 20 Mar 2014 17:30:20 +0200 Subject: [PATCH] Modify deblocking to be done per-LCU in the encoding loop. - Intra works. There is still something wrong in inter. - Avoid horizontal deblocking of the rightmost 4 pixels in the LCU. This is because vertical deblocking must be done for all pixels before horizontal, but vertical deblocking can't be done for those pixels before the next LCU is finished. - Add separate deblocking of the rightmost pixels of the last LCU after the LCU edge has been deblocked. - This is a pretty ugly hack but will have to do for now. --- src/encoder.c | 25 +++++++++++++++++++------ src/filter.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index bef9dcc3..b60afae3 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -514,7 +514,25 @@ void encode_one_frame(encoder_control* encoder) &ver_buf->v[1], 1, lcu_dim.y / 2, size.x / 2, 1); - //encode_lcu(encoder, x.px, y.px, hor_buf, ver_buf); + if (encoder->deblock_enable) { + filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER); + + // Filter rightmost 4 pixels from last LCU now that they have been + // finally deblocked vertically. + if (lcu.x > 0) { + int y; + for (y = 0; y < 64; y += 8) { + if (lcu.y + y == 0) continue; + filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR); + } + for (y = 0; y < 32; y += 8) { + if (lcu.y + y == 0) continue; + filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR); + } + } + + filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR); + } } } } @@ -1262,11 +1280,6 @@ void encode_slice_data(encoder_control* encoder) picture *pic = encoder->in.cur_pic; const vector2d size_lcu = { encoder->in.width_in_lcu, encoder->in.height_in_lcu }; - // Filtering - if(encoder->deblock_enable) { - filter_deblock(encoder); - } - if (encoder->sao_enable) { pixel *new_y_data = MALLOC(pixel, pic->width * pic->height); pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2); diff --git a/src/filter.c b/src/filter.c index 23796227..9334ee2c 100644 --- a/src/filter.c +++ b/src/filter.c @@ -179,6 +179,14 @@ void filter_deblock_edge_luma(encoder_control *encoder, int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE; int8_t strength = 0; + { + // Don't do anything if there is no PU or TU edge here. + int cu_width = LCU_WIDTH >> cu_q->depth; + if (dir == EDGE_HOR && ypos % cu_width != 0) { + return; + } + } + if(dir == EDGE_VER) { offset = 1; @@ -199,8 +207,19 @@ void filter_deblock_edge_luma(encoder_control *encoder, // For each 4-pixel part in the edge for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) { int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d; - if((block_idx & 1) == 0) + { + vector2d px = { + (dir == EDGE_HOR ? xpos + block_idx * 4 : xpos), + (dir == EDGE_VER ? ypos + block_idx * 4 : ypos) + }; + + // Don't deblock the last 4x4 block of the LCU. This will be deblocked + // when processing the next LCU. + if (block_idx > 0 && dir == EDGE_HOR && (px.x + 4) % 64 == 0 && (px.x + 4 != encoder->in.width)) { + continue; + } + // CU in the side we are filtering, update every 8-pixels cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0)) + (y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0)) @@ -283,11 +302,19 @@ void filter_deblock_edge_chroma(encoder_control *encoder, int8_t strength = 2; // We cannot filter edges not on 8x8 grid - if((depth == MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) )) + if((depth >= MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) )) { return; } + { + // Don't do anything if there is no PU or TU edge here. + int cu_width = (LCU_WIDTH / 2) >> (cu_q->depth); + if (dir == EDGE_HOR && y % cu_width != 0) { + return; + } + } + if(dir == EDGE_VER) { offset = 1; @@ -300,14 +327,25 @@ void filter_deblock_edge_chroma(encoder_control *encoder, int32_t bitdepth_scale = 1 << (g_bitdepth-8); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale; - uint32_t blocks_in_part= (LCU_WIDTH>>(depth+1)) / 4; + uint32_t blocks_in_part= (LCU_WIDTH>>(depth == 4 ? depth : depth + 1)) / 4; uint32_t blk_idx; for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx) { + vector2d px = { + (dir == EDGE_HOR ? x + blk_idx * 4 : x), + (dir == EDGE_VER ? y + blk_idx * 4 : y) + }; cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0)) + (y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0)) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + + // Don't deblock the last 4x4 block of the LCU. This will be deblocked + // when processing the next LCU. + if (depth != 4 && dir == EDGE_HOR && (px.x + 4) % 32 == 0 && (px.x + 4 != encoder->in.width / 2)) { + continue; + } + // Only filter when strenght == 2 (one of the blocks is intra coded) if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { // Chroma U