[dual-tree] Fix deblock

This commit is contained in:
Joose Sainio 2022-06-30 12:24:53 +03:00
parent 5fefea025f
commit 3a6414c31d
5 changed files with 119 additions and 81 deletions

View file

@ -264,14 +264,19 @@ static INLINE void uvg_filter_deblock_chroma(const encoder_control_t * const enc
* \param dir direction of the edge to check
* \return true, if the edge is a TU boundary, otherwise false
*/
static bool is_tu_boundary(const encoder_state_t *const state,
static bool is_tu_boundary(
const encoder_state_t *const state,
int32_t x,
int32_t y,
edge_dir dir)
edge_dir dir,
enum uvg_tree_type tree_type)
{
x >>= tree_type == UVG_CHROMA_T;
y >>= tree_type == UVG_CHROMA_T;
// if (x & 3 || y & 3) return false;
const cu_info_t *const scu =
uvg_cu_array_at_const(state->tile->frame->cu_array, x, y);
const int tu_width = LCU_WIDTH >> scu->tr_depth;
uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y);
const int tu_width = LCU_WIDTH >> (scu->tr_depth + (tree_type == UVG_CHROMA_T));
if (dir == EDGE_HOR) {
return (y & (tu_width - 1)) == 0;
@ -295,28 +300,38 @@ static bool is_pu_boundary(const encoder_state_t *const state,
int32_t y,
edge_dir dir)
{
const cu_info_t *const scu =
uvg_cu_array_at_const(state->tile->frame->cu_array, x, y);
// Get the containing CU.
const int32_t cu_width = LCU_WIDTH >> scu->depth;
const int32_t x_cu = x & ~(cu_width - 1);
const int32_t y_cu = y & ~(cu_width - 1);
const cu_info_t *const cu =
uvg_cu_array_at_const(state->tile->frame->cu_array, x_cu, y_cu);
const int num_pu = uvg_part_mode_num_parts[cu->part_size];
for (int i = 0; i < num_pu; i++) {
if (dir == EDGE_HOR) {
int y_pu = PU_GET_Y(cu->part_size, cu_width, y_cu, i);
if (y_pu == y) return true;
} else {
int x_pu = PU_GET_X(cu->part_size, cu_width, x_cu, i);
if (x_pu == x) return true;
}
}
/*
TODO: it appears that this function can never be true when is_tu_boundary
is false. Therefore it should be safe to remove this function but let's keep
it for now, in case some other tool requires it.
*/
return false;
//const cu_info_t *const scu =
// uvg_cu_array_at_const(state->tile->frame->cu_array, x, y);
//// Get the containing CU.
//const int32_t cu_width = LCU_WIDTH >> scu->depth;
//const int32_t x_cu = x & ~(cu_width - 1);
//const int32_t y_cu = y & ~(cu_width - 1);
//const cu_info_t *const cu =
// uvg_cu_array_at_const(state->tile->frame->cu_array, x_cu, y_cu);
//const int num_pu = uvg_part_mode_num_parts[cu->part_size];
//for (int i = 0; i < num_pu; i++) {
// if (dir == EDGE_HOR) {
// int y_pu = PU_GET_Y(cu->part_size, cu_width, y_cu, i);
// if (y_pu == y) {
// return true;
// }
// } else {
// int x_pu = PU_GET_X(cu->part_size, cu_width, x_cu, i);
// if (x_pu == x) {
// return true;
// }
// }
//}
//return false;
}
@ -599,7 +614,8 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_
const edge_dir dir, const bool transform_edge,
const int tu_size_P_side, const int tu_size_Q_side,
const int pu_pos, const int pu_size,
const bool merge_flag, const color_t comp)
const bool merge_flag, const color_t comp,
enum uvg_tree_type tree_type)
{
//const int tu_size_P_side = 0;
//const int tu_size_Q_side = 0;
@ -612,10 +628,10 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_
bool transform_edge_4x4[2] = { false, false };
bool transform_edge_8x8[2] = { false, false };
if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir);
if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir);
if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir);
if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir);
if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, tree_type);
if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, tree_type);
if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, tree_type);
if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, tree_type);
if (comp == COLOR_Y) {
if (tu_size_P_side <= 4 || tu_size_Q_side <= 4){
@ -845,8 +861,11 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
: x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
dir, tu_boundary, LCU_WIDTH >> cu_p->tr_depth, LCU_WIDTH >> cu_q->tr_depth,
pu_pos, pu_size, cu_q->merged, COLOR_Y);
dir, tu_boundary,
LCU_WIDTH >> cu_p->tr_depth,
LCU_WIDTH >> cu_q->tr_depth,
pu_pos, pu_size, cu_q->merged, COLOR_Y,
UVG_LUMA_T);
if (max_filter_length_P > 3) {
is_side_P_large = dir == EDGE_HOR && y % LCU_WIDTH == 0 ? false : true;
@ -1011,7 +1030,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
int32_t y,
int32_t length,
edge_dir dir,
bool tu_boundary)
bool tu_boundary,
enum uvg_tree_type tree_type)
{
const encoder_control_t * const encoder = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
@ -1051,23 +1071,24 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
// CUs on both sides of the edge
cu_info_t *cu_p;
cu_info_t *cu_q;
int32_t x_coord = x << 1;
int32_t y_coord = y << 1;
int32_t x_coord = x << (tree_type != UVG_CHROMA_T);
int32_t y_coord = y << (tree_type != UVG_CHROMA_T);
cu_array_t* cua = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
if (dir == EDGE_VER) {
y_coord = (y + min_chroma_length * blk_idx) << 1;
cu_p = uvg_cu_array_at(frame->cu_array, x_coord - 1, y_coord);
cu_q = uvg_cu_array_at(frame->cu_array, x_coord , y_coord);
y_coord = (y + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T);
cu_p = uvg_cu_array_at(cua, x_coord - 1, y_coord);
cu_q = uvg_cu_array_at(cua, x_coord , y_coord);
} else {
x_coord = (x + min_chroma_length * blk_idx) << 1;
cu_p = uvg_cu_array_at(frame->cu_array, x_coord, y_coord - 1);
cu_q = uvg_cu_array_at(frame->cu_array, x_coord, y_coord );
x_coord = (x + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T);
cu_p = uvg_cu_array_at(cua, x_coord, y_coord - 1);
cu_q = uvg_cu_array_at(cua, x_coord, y_coord );
}
const int cu_size = LCU_WIDTH >> cu_q->depth;
const int pu_part_idx = ((y << 1) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ?
const int cu_size = LCU_WIDTH >> (cu_q->depth + (tree_type == UVG_CHROMA_T));
const int pu_part_idx = ((y << (tree_type != UVG_CHROMA_T)) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ?
1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0)
+ ((x << 1) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0);
+ ((x << (tree_type != UVG_CHROMA_T)) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0);
const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx)
: PU_GET_W(cu_q->part_size, cu_size, pu_part_idx);
const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
@ -1079,11 +1100,12 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
const int tu_q_size = LCU_WIDTH >> (cu_q->tr_depth + (chroma_shift));
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
dir, tu_boundary, tu_p_size, tu_q_size,
pu_pos, pu_size, cu_q->merged, COLOR_U);
pu_pos, pu_size, cu_q->merged, COLOR_U,
tree_type);
const bool large_boundary = (max_filter_length_P >= 3 && max_filter_length_Q >= 3);
const bool is_chroma_hor_CTB_boundary = (dir == EDGE_HOR && y_coord % LCU_WIDTH == 0);
const bool is_chroma_hor_CTB_boundary = (dir == EDGE_HOR && y_coord % (LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) == 0);
uint8_t c_strength[2] = { 0, 0 };
@ -1171,14 +1193,16 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
* \param dir direction of the edges to filter
* \param tu_boundary whether the edge is a TU boundary
*/
static void filter_deblock_unit(encoder_state_t * const state,
static void filter_deblock_unit(
encoder_state_t * const state,
int x,
int y,
int width,
int height,
edge_dir dir,
bool tu_boundary,
bool previous_ctu)
bool previous_ctu,
enum uvg_tree_type tree_type)
{
// no filtering on borders (where filter would use pixels outside the picture)
if (x == 0 && dir == EDGE_VER) return;
@ -1204,14 +1228,18 @@ static void filter_deblock_unit(encoder_state_t * const state,
length = height;
}
if(tree_type != UVG_CHROMA_T) {
filter_deblock_edge_luma(state, x, y, length, dir, tu_boundary);
}
// Chroma pixel coordinates.
const int32_t x_c = x >> 1;
const int32_t y_c = y >> 1;
if (state->encoder_control->chroma_format != UVG_CSP_400 && (is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32)
|| (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0))) {
filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary);
if (state->encoder_control->chroma_format != UVG_CSP_400 &&
(is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32)
|| (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0))
&& tree_type != UVG_LUMA_T) {
filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary, tree_type);
}
}
@ -1236,11 +1264,17 @@ static void filter_deblock_lcu_inside(encoder_state_t * const state,
const int end_x = MIN(x + LCU_WIDTH, state->tile->frame->width);
const int end_y = MIN(y + LCU_WIDTH, state->tile->frame->height);
const enum uvg_tree_type luma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
const enum uvg_tree_type chroma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_CHROMA_T : UVG_BOTH_T;
for (int edge_y = y; edge_y < end_y; edge_y += 4) {
for (int edge_x = x; edge_x < end_x; edge_x += 4) {
bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir);
bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, luma_tree);
if (tu_boundary || is_pu_boundary(state, edge_x, edge_y, dir)) {
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x);
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, luma_tree);
}
if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, chroma_tree)) {
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, chroma_tree);
}
}
}
@ -1259,13 +1293,15 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
int32_t y_px)
{
// Luma
const enum uvg_tree_type luma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
const enum uvg_tree_type chroma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_CHROMA_T : UVG_BOTH_T;
const int end = MIN(y_px + LCU_WIDTH, state->tile->frame->height);
for (int x = x_px - 8; x < x_px; x += 4) {
for (int y = y_px; y < end; y += 4) {
// The top edge of the whole frame is not filtered.
bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR);
bool pu_boundary = is_pu_boundary(state, x, y, EDGE_HOR);
if (y > 0 && (tu_boundary || pu_boundary)) {
bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, luma_tree);
if (y > 0 && (tu_boundary || is_pu_boundary(state, x, y, EDGE_HOR))) {
filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR, tu_boundary);
}
}
@ -1279,10 +1315,9 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
for (int y_c = y_px_c; y_c < end_c; y_c += 8) {
// The top edge of the whole frame is not filtered.
bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR);
bool pu_boundary = is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR);
if (y_c > 0 && (tu_boundary || pu_boundary)) {
filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary);
bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, chroma_tree);
if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) {
filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary, chroma_tree);
}
}
}
@ -1323,7 +1358,6 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
void uvg_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px)
{
assert(!state->encoder_control->cfg.lossless);
filter_deblock_lcu_inside(state, x_px, y_px, EDGE_VER);
if (x_px > 0) {
filter_deblock_lcu_rightmost(state, x_px, y_px);

View file

@ -133,11 +133,12 @@ static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t *work
}
}
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth)
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type
tree_type)
{
const int x_local = SUB_SCU(x_px);
const int y_local = SUB_SCU(y_px);
const unsigned width = LCU_WIDTH >> depth;
const unsigned width = (tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C) >> depth;
for (unsigned y = 0; y < width; y += SCU_WIDTH) {
for (unsigned x = 0; x < width; x += SCU_WIDTH) {
@ -989,7 +990,9 @@ static double search_cu(
intra_search.pred_cu.intra.mode_chroma = intra_mode;
}
intra_search.pred_cu.intra.mode = intra_mode;
if(tree_type == UVG_CHROMA_T) {
uvg_lcu_fill_trdepth(lcu, x_local, y_local, depth, depth, tree_type);
}
}
if (intra_cost < cost) {
cost = intra_cost;
@ -1041,7 +1044,7 @@ static double search_cu(
if (cur_cu->part_size != SIZE_2Nx2N) {
tr_depth = depth + 1;
}
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth);
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type);
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
uvg_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma);
@ -1115,7 +1118,7 @@ static double search_cu(
if (cur_cu->tr_depth != depth) {
// Reset transform depth since there are no coefficients. This
// ensures that CBF is cleared for the whole area of the CU.
uvg_lcu_fill_trdepth(lcu, x, y, depth, depth);
uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type);
}
cur_cu->cbf = 0;
@ -1236,7 +1239,7 @@ static double search_cu(
// Disable MRL in this case
cur_cu->intra.multi_ref_idx = 0;
uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth);
uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth, tree_type);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
intra_search_data_t proxy;

View file

@ -93,7 +93,8 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
cu_info_t *const pred_cu,
lcu_t *const lcu);
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth);
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type
tree_type);
void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
void uvg_intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);

View file

@ -1775,7 +1775,7 @@ static void search_pu_inter(encoder_state_t * const state,
cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1];
cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0];
cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1];
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth));
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T);
uvg_inter_recon_cu(state, lcu, x, y, width, true, false);
uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
@ -2097,7 +2097,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
if (cur_cu->part_size != SIZE_2Nx2N) {
tr_depth = depth + 1;
}
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth);
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, UVG_BOTH_T);
const int x_px = SUB_SCU(x);
const int y_px = SUB_SCU(y);

View file

@ -625,7 +625,7 @@ static double search_intra_trdepth(
if (depth == 0 || split_cost < nosplit_cost) {
return split_cost;
} else {
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth);
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type);
pred_cu->cbf = nosplit_cbf;
@ -1916,7 +1916,7 @@ void uvg_search_cu_intra(
// Set transform depth to current depth, meaning no transform splits.
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth);
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type);
// Refine results with slower search or get some results if rough search was skipped.
const int32_t rdo_level = state->encoder_control->cfg.rdo;
if (rdo_level >= 2 || skip_rough_search) {