mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 12:44:07 +00:00
Modify deblocking to be done per-LCU in the encoding loop.
- Intra works. There is still something wrong in inter. - Avoid horizontal deblocking of the rightmost 4 pixels in the LCU. This is because vertical deblocking must be done for all pixels before horizontal, but vertical deblocking can't be done for those pixels before the next LCU is finished. - Add separate deblocking of the rightmost pixels of the last LCU after the LCU edge has been deblocked. - This is a pretty ugly hack but will have to do for now.
This commit is contained in:
parent
bbd1202f90
commit
c42b25054a
|
@ -514,7 +514,25 @@ void encode_one_frame(encoder_control* encoder)
|
||||||
&ver_buf->v[1],
|
&ver_buf->v[1],
|
||||||
1, lcu_dim.y / 2, size.x / 2, 1);
|
1, lcu_dim.y / 2, size.x / 2, 1);
|
||||||
|
|
||||||
//encode_lcu(encoder, x.px, y.px, hor_buf, ver_buf);
|
if (encoder->deblock_enable) {
|
||||||
|
filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
|
||||||
|
|
||||||
|
// Filter rightmost 4 pixels from last LCU now that they have been
|
||||||
|
// finally deblocked vertically.
|
||||||
|
if (lcu.x > 0) {
|
||||||
|
int y;
|
||||||
|
for (y = 0; y < 64; y += 8) {
|
||||||
|
if (lcu.y + y == 0) continue;
|
||||||
|
filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
|
||||||
|
}
|
||||||
|
for (y = 0; y < 32; y += 8) {
|
||||||
|
if (lcu.y + y == 0) continue;
|
||||||
|
filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1262,11 +1280,6 @@ void encode_slice_data(encoder_control* encoder)
|
||||||
picture *pic = encoder->in.cur_pic;
|
picture *pic = encoder->in.cur_pic;
|
||||||
const vector2d size_lcu = { encoder->in.width_in_lcu, encoder->in.height_in_lcu };
|
const vector2d size_lcu = { encoder->in.width_in_lcu, encoder->in.height_in_lcu };
|
||||||
|
|
||||||
// Filtering
|
|
||||||
if(encoder->deblock_enable) {
|
|
||||||
filter_deblock(encoder);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (encoder->sao_enable) {
|
if (encoder->sao_enable) {
|
||||||
pixel *new_y_data = MALLOC(pixel, pic->width * pic->height);
|
pixel *new_y_data = MALLOC(pixel, pic->width * pic->height);
|
||||||
pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2);
|
pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2);
|
||||||
|
|
44
src/filter.c
44
src/filter.c
|
@ -179,6 +179,14 @@ void filter_deblock_edge_luma(encoder_control *encoder,
|
||||||
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
||||||
int8_t strength = 0;
|
int8_t strength = 0;
|
||||||
|
|
||||||
|
{
|
||||||
|
// Don't do anything if there is no PU or TU edge here.
|
||||||
|
int cu_width = LCU_WIDTH >> cu_q->depth;
|
||||||
|
if (dir == EDGE_HOR && ypos % cu_width != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if(dir == EDGE_VER) {
|
if(dir == EDGE_VER) {
|
||||||
offset = 1;
|
offset = 1;
|
||||||
|
@ -199,8 +207,19 @@ void filter_deblock_edge_luma(encoder_control *encoder,
|
||||||
// For each 4-pixel part in the edge
|
// For each 4-pixel part in the edge
|
||||||
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) {
|
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) {
|
||||||
int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d;
|
int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d;
|
||||||
if((block_idx & 1) == 0)
|
|
||||||
{
|
{
|
||||||
|
vector2d px = {
|
||||||
|
(dir == EDGE_HOR ? xpos + block_idx * 4 : xpos),
|
||||||
|
(dir == EDGE_VER ? ypos + block_idx * 4 : ypos)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
|
||||||
|
// when processing the next LCU.
|
||||||
|
if (block_idx > 0 && dir == EDGE_HOR && (px.x + 4) % 64 == 0 && (px.x + 4 != encoder->in.width)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// CU in the side we are filtering, update every 8-pixels
|
// CU in the side we are filtering, update every 8-pixels
|
||||||
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0)) +
|
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0)) +
|
||||||
(y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0))
|
(y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0))
|
||||||
|
@ -283,11 +302,19 @@ void filter_deblock_edge_chroma(encoder_control *encoder,
|
||||||
int8_t strength = 2;
|
int8_t strength = 2;
|
||||||
|
|
||||||
// We cannot filter edges not on 8x8 grid
|
// We cannot filter edges not on 8x8 grid
|
||||||
if((depth == MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) ))
|
if((depth >= MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) ))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Don't do anything if there is no PU or TU edge here.
|
||||||
|
int cu_width = (LCU_WIDTH / 2) >> (cu_q->depth);
|
||||||
|
if (dir == EDGE_HOR && y % cu_width != 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(dir == EDGE_VER)
|
if(dir == EDGE_VER)
|
||||||
{
|
{
|
||||||
offset = 1;
|
offset = 1;
|
||||||
|
@ -300,14 +327,25 @@ void filter_deblock_edge_chroma(encoder_control *encoder,
|
||||||
int32_t bitdepth_scale = 1 << (g_bitdepth-8);
|
int32_t bitdepth_scale = 1 << (g_bitdepth-8);
|
||||||
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
|
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
|
||||||
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
|
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
|
||||||
uint32_t blocks_in_part= (LCU_WIDTH>>(depth+1)) / 4;
|
uint32_t blocks_in_part= (LCU_WIDTH>>(depth == 4 ? depth : depth + 1)) / 4;
|
||||||
uint32_t blk_idx;
|
uint32_t blk_idx;
|
||||||
|
|
||||||
for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx)
|
for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx)
|
||||||
{
|
{
|
||||||
|
vector2d px = {
|
||||||
|
(dir == EDGE_HOR ? x + blk_idx * 4 : x),
|
||||||
|
(dir == EDGE_VER ? y + blk_idx * 4 : y)
|
||||||
|
};
|
||||||
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0)) +
|
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0)) +
|
||||||
(y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0))
|
(y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0))
|
||||||
* (encoder->in.width_in_lcu << MAX_DEPTH)];
|
* (encoder->in.width_in_lcu << MAX_DEPTH)];
|
||||||
|
|
||||||
|
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
|
||||||
|
// when processing the next LCU.
|
||||||
|
if (depth != 4 && dir == EDGE_HOR && (px.x + 4) % 32 == 0 && (px.x + 4 != encoder->in.width / 2)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Only filter when strenght == 2 (one of the blocks is intra coded)
|
// Only filter when strenght == 2 (one of the blocks is intra coded)
|
||||||
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
|
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
|
||||||
// Chroma U
|
// Chroma U
|
||||||
|
|
Loading…
Reference in a new issue