Modify deblocking to be done per-LCU in the encoding loop.

- Intra works. There is still something wrong in inter.

- Avoid horizontal deblocking of the rightmost 4 pixels in the LCU.
  This is because vertical deblocking must be done for all pixels
  before horizontal, but vertical deblocking can't be done for those
  pixels before the next LCU is finished.

- Add separate deblocking of the rightmost pixels of the last LCU
  after the LCU edge has been deblocked.

- This is a pretty ugly hack but will have to do for now.
This commit is contained in:
Ari Koivula 2014-03-20 17:30:20 +02:00
parent bbd1202f90
commit c42b25054a
2 changed files with 60 additions and 9 deletions

View file

@ -514,7 +514,25 @@ void encode_one_frame(encoder_control* encoder)
&ver_buf->v[1], &ver_buf->v[1],
1, lcu_dim.y / 2, size.x / 2, 1); 1, lcu_dim.y / 2, size.x / 2, 1);
//encode_lcu(encoder, x.px, y.px, hor_buf, ver_buf); if (encoder->deblock_enable) {
filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
// Filter rightmost 4 pixels from last LCU now that they have been
// finally deblocked vertically.
if (lcu.x > 0) {
int y;
for (y = 0; y < 64; y += 8) {
if (lcu.y + y == 0) continue;
filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
}
for (y = 0; y < 32; y += 8) {
if (lcu.y + y == 0) continue;
filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
}
}
filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
}
} }
} }
} }
@ -1262,11 +1280,6 @@ void encode_slice_data(encoder_control* encoder)
picture *pic = encoder->in.cur_pic; picture *pic = encoder->in.cur_pic;
const vector2d size_lcu = { encoder->in.width_in_lcu, encoder->in.height_in_lcu }; const vector2d size_lcu = { encoder->in.width_in_lcu, encoder->in.height_in_lcu };
// Filtering
if(encoder->deblock_enable) {
filter_deblock(encoder);
}
if (encoder->sao_enable) { if (encoder->sao_enable) {
pixel *new_y_data = MALLOC(pixel, pic->width * pic->height); pixel *new_y_data = MALLOC(pixel, pic->width * pic->height);
pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2); pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2);

View file

@ -179,6 +179,14 @@ void filter_deblock_edge_luma(encoder_control *encoder,
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE; int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
int8_t strength = 0; int8_t strength = 0;
{
// Don't do anything if there is no PU or TU edge here.
int cu_width = LCU_WIDTH >> cu_q->depth;
if (dir == EDGE_HOR && ypos % cu_width != 0) {
return;
}
}
if(dir == EDGE_VER) { if(dir == EDGE_VER) {
offset = 1; offset = 1;
@ -199,8 +207,19 @@ void filter_deblock_edge_luma(encoder_control *encoder,
// For each 4-pixel part in the edge // For each 4-pixel part in the edge
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) { for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) {
int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d; int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d;
if((block_idx & 1) == 0)
{ {
vector2d px = {
(dir == EDGE_HOR ? xpos + block_idx * 4 : xpos),
(dir == EDGE_VER ? ypos + block_idx * 4 : ypos)
};
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
// when processing the next LCU.
if (block_idx > 0 && dir == EDGE_HOR && (px.x + 4) % 64 == 0 && (px.x + 4 != encoder->in.width)) {
continue;
}
// CU in the side we are filtering, update every 8-pixels // CU in the side we are filtering, update every 8-pixels
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0)) + cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0)) +
(y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0)) (y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0))
@ -283,11 +302,19 @@ void filter_deblock_edge_chroma(encoder_control *encoder,
int8_t strength = 2; int8_t strength = 2;
// We cannot filter edges not on 8x8 grid // We cannot filter edges not on 8x8 grid
if((depth == MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) )) if((depth >= MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) ))
{ {
return; return;
} }
{
// Don't do anything if there is no PU or TU edge here.
int cu_width = (LCU_WIDTH / 2) >> (cu_q->depth);
if (dir == EDGE_HOR && y % cu_width != 0) {
return;
}
}
if(dir == EDGE_VER) if(dir == EDGE_VER)
{ {
offset = 1; offset = 1;
@ -300,14 +327,25 @@ void filter_deblock_edge_chroma(encoder_control *encoder,
int32_t bitdepth_scale = 1 << (g_bitdepth-8); int32_t bitdepth_scale = 1 << (g_bitdepth-8);
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale; int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
uint32_t blocks_in_part= (LCU_WIDTH>>(depth+1)) / 4; uint32_t blocks_in_part= (LCU_WIDTH>>(depth == 4 ? depth : depth + 1)) / 4;
uint32_t blk_idx; uint32_t blk_idx;
for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx) for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx)
{ {
vector2d px = {
(dir == EDGE_HOR ? x + blk_idx * 4 : x),
(dir == EDGE_VER ? y + blk_idx * 4 : y)
};
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0)) + cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0)) +
(y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0)) (y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0))
* (encoder->in.width_in_lcu << MAX_DEPTH)]; * (encoder->in.width_in_lcu << MAX_DEPTH)];
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
// when processing the next LCU.
if (depth != 4 && dir == EDGE_HOR && (px.x + 4) % 32 == 0 && (px.x + 4 != encoder->in.width / 2)) {
continue;
}
// Only filter when strenght == 2 (one of the blocks is intra coded) // Only filter when strenght == 2 (one of the blocks is intra coded)
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
// Chroma U // Chroma U