diff --git a/src/filter.c b/src/filter.c index 721d153a..edc9f1e1 100644 --- a/src/filter.c +++ b/src/filter.c @@ -264,14 +264,19 @@ static INLINE void uvg_filter_deblock_chroma(const encoder_control_t * const enc * \param dir direction of the edge to check * \return true, if the edge is a TU boundary, otherwise false */ -static bool is_tu_boundary(const encoder_state_t *const state, - int32_t x, - int32_t y, - edge_dir dir) +static bool is_tu_boundary( + const encoder_state_t *const state, + int32_t x, + int32_t y, + edge_dir dir, + enum uvg_tree_type tree_type) { + x >>= tree_type == UVG_CHROMA_T; + y >>= tree_type == UVG_CHROMA_T; + // if (x & 3 || y & 3) return false; const cu_info_t *const scu = - uvg_cu_array_at_const(state->tile->frame->cu_array, x, y); - const int tu_width = LCU_WIDTH >> scu->tr_depth; + uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y); + const int tu_width = LCU_WIDTH >> (scu->tr_depth + (tree_type == UVG_CHROMA_T)); if (dir == EDGE_HOR) { return (y & (tu_width - 1)) == 0; @@ -295,28 +300,38 @@ static bool is_pu_boundary(const encoder_state_t *const state, int32_t y, edge_dir dir) { - const cu_info_t *const scu = - uvg_cu_array_at_const(state->tile->frame->cu_array, x, y); - // Get the containing CU. - const int32_t cu_width = LCU_WIDTH >> scu->depth; - const int32_t x_cu = x & ~(cu_width - 1); - const int32_t y_cu = y & ~(cu_width - 1); - const cu_info_t *const cu = - uvg_cu_array_at_const(state->tile->frame->cu_array, x_cu, y_cu); - - const int num_pu = uvg_part_mode_num_parts[cu->part_size]; - for (int i = 0; i < num_pu; i++) { - if (dir == EDGE_HOR) { - int y_pu = PU_GET_Y(cu->part_size, cu_width, y_cu, i); - if (y_pu == y) return true; - - } else { - int x_pu = PU_GET_X(cu->part_size, cu_width, x_cu, i); - if (x_pu == x) return true; - } - } - + /* + TODO: it appears that this function can never be true when is_tu_boundary + is false. Therefore it should be safe to remove this function but let's keep + it for now, in case some other tool requires it. + */ return false; + //const cu_info_t *const scu = + // uvg_cu_array_at_const(state->tile->frame->cu_array, x, y); + //// Get the containing CU. + //const int32_t cu_width = LCU_WIDTH >> scu->depth; + //const int32_t x_cu = x & ~(cu_width - 1); + //const int32_t y_cu = y & ~(cu_width - 1); + //const cu_info_t *const cu = + // uvg_cu_array_at_const(state->tile->frame->cu_array, x_cu, y_cu); + + //const int num_pu = uvg_part_mode_num_parts[cu->part_size]; + //for (int i = 0; i < num_pu; i++) { + // if (dir == EDGE_HOR) { + // int y_pu = PU_GET_Y(cu->part_size, cu_width, y_cu, i); + // if (y_pu == y) { + // return true; + // } + + // } else { + // int x_pu = PU_GET_X(cu->part_size, cu_width, x_cu, i); + // if (x_pu == x) { + // return true; + // } + // } + //} + + //return false; } @@ -599,7 +614,8 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_ const edge_dir dir, const bool transform_edge, const int tu_size_P_side, const int tu_size_Q_side, const int pu_pos, const int pu_size, - const bool merge_flag, const color_t comp) + const bool merge_flag, const color_t comp, + enum uvg_tree_type tree_type) { //const int tu_size_P_side = 0; //const int tu_size_Q_side = 0; @@ -612,10 +628,10 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_ bool transform_edge_4x4[2] = { false, false }; bool transform_edge_8x8[2] = { false, false }; - if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir); - if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir); - if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir); - if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir); + if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, tree_type); + if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, tree_type); + if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, tree_type); + if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, tree_type); if (comp == COLOR_Y) { if (tu_size_P_side <= 4 || tu_size_Q_side <= 4){ @@ -845,8 +861,11 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx) : x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx); get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord, - dir, tu_boundary, LCU_WIDTH >> cu_p->tr_depth, LCU_WIDTH >> cu_q->tr_depth, - pu_pos, pu_size, cu_q->merged, COLOR_Y); + dir, tu_boundary, + LCU_WIDTH >> cu_p->tr_depth, + LCU_WIDTH >> cu_q->tr_depth, + pu_pos, pu_size, cu_q->merged, COLOR_Y, + UVG_LUMA_T); if (max_filter_length_P > 3) { is_side_P_large = dir == EDGE_HOR && y % LCU_WIDTH == 0 ? false : true; @@ -1011,7 +1030,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, int32_t y, int32_t length, edge_dir dir, - bool tu_boundary) + bool tu_boundary, + enum uvg_tree_type tree_type) { const encoder_control_t * const encoder = state->encoder_control; const videoframe_t * const frame = state->tile->frame; @@ -1051,23 +1071,24 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, // CUs on both sides of the edge cu_info_t *cu_p; cu_info_t *cu_q; - int32_t x_coord = x << 1; - int32_t y_coord = y << 1; + int32_t x_coord = x << (tree_type != UVG_CHROMA_T); + int32_t y_coord = y << (tree_type != UVG_CHROMA_T); + cu_array_t* cua = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; if (dir == EDGE_VER) { - y_coord = (y + min_chroma_length * blk_idx) << 1; - cu_p = uvg_cu_array_at(frame->cu_array, x_coord - 1, y_coord); - cu_q = uvg_cu_array_at(frame->cu_array, x_coord , y_coord); + y_coord = (y + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T); + cu_p = uvg_cu_array_at(cua, x_coord - 1, y_coord); + cu_q = uvg_cu_array_at(cua, x_coord , y_coord); } else { - x_coord = (x + min_chroma_length * blk_idx) << 1; - cu_p = uvg_cu_array_at(frame->cu_array, x_coord, y_coord - 1); - cu_q = uvg_cu_array_at(frame->cu_array, x_coord, y_coord ); + x_coord = (x + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T); + cu_p = uvg_cu_array_at(cua, x_coord, y_coord - 1); + cu_q = uvg_cu_array_at(cua, x_coord, y_coord ); } - const int cu_size = LCU_WIDTH >> cu_q->depth; - const int pu_part_idx = ((y << 1) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ? + const int cu_size = LCU_WIDTH >> (cu_q->depth + (tree_type == UVG_CHROMA_T)); + const int pu_part_idx = ((y << (tree_type != UVG_CHROMA_T)) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ? 1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0) - + ((x << 1) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0); + + ((x << (tree_type != UVG_CHROMA_T)) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0); const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx) : PU_GET_W(cu_q->part_size, cu_size, pu_part_idx); const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx) @@ -1079,11 +1100,12 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, const int tu_q_size = LCU_WIDTH >> (cu_q->tr_depth + (chroma_shift)); get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord, dir, tu_boundary, tu_p_size, tu_q_size, - pu_pos, pu_size, cu_q->merged, COLOR_U); + pu_pos, pu_size, cu_q->merged, COLOR_U, + tree_type); const bool large_boundary = (max_filter_length_P >= 3 && max_filter_length_Q >= 3); - const bool is_chroma_hor_CTB_boundary = (dir == EDGE_HOR && y_coord % LCU_WIDTH == 0); + const bool is_chroma_hor_CTB_boundary = (dir == EDGE_HOR && y_coord % (LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) == 0); uint8_t c_strength[2] = { 0, 0 }; @@ -1171,14 +1193,16 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, * \param dir direction of the edges to filter * \param tu_boundary whether the edge is a TU boundary */ -static void filter_deblock_unit(encoder_state_t * const state, - int x, - int y, - int width, - int height, - edge_dir dir, - bool tu_boundary, - bool previous_ctu) +static void filter_deblock_unit( + encoder_state_t * const state, + int x, + int y, + int width, + int height, + edge_dir dir, + bool tu_boundary, + bool previous_ctu, + enum uvg_tree_type tree_type) { // no filtering on borders (where filter would use pixels outside the picture) if (x == 0 && dir == EDGE_VER) return; @@ -1204,14 +1228,18 @@ static void filter_deblock_unit(encoder_state_t * const state, length = height; } - filter_deblock_edge_luma(state, x, y, length, dir, tu_boundary); + if(tree_type != UVG_CHROMA_T) { + filter_deblock_edge_luma(state, x, y, length, dir, tu_boundary); + } // Chroma pixel coordinates. const int32_t x_c = x >> 1; const int32_t y_c = y >> 1; - if (state->encoder_control->chroma_format != UVG_CSP_400 && (is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32) - || (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0))) { - filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary); + if (state->encoder_control->chroma_format != UVG_CSP_400 && + (is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32) + || (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0)) + && tree_type != UVG_LUMA_T) { + filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary, tree_type); } } @@ -1236,11 +1264,17 @@ static void filter_deblock_lcu_inside(encoder_state_t * const state, const int end_x = MIN(x + LCU_WIDTH, state->tile->frame->width); const int end_y = MIN(y + LCU_WIDTH, state->tile->frame->height); + const enum uvg_tree_type luma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T; + const enum uvg_tree_type chroma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_CHROMA_T : UVG_BOTH_T; + for (int edge_y = y; edge_y < end_y; edge_y += 4) { for (int edge_x = x; edge_x < end_x; edge_x += 4) { - bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir); + bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, luma_tree); if (tu_boundary || is_pu_boundary(state, edge_x, edge_y, dir)) { - filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x); + filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, luma_tree); + } + if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, chroma_tree)) { + filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, chroma_tree); } } } @@ -1259,13 +1293,15 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, int32_t y_px) { // Luma + const enum uvg_tree_type luma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T; + const enum uvg_tree_type chroma_tree = state->frame->is_irap && state->encoder_control->cfg.dual_tree ? UVG_CHROMA_T : UVG_BOTH_T; + const int end = MIN(y_px + LCU_WIDTH, state->tile->frame->height); for (int x = x_px - 8; x < x_px; x += 4) { for (int y = y_px; y < end; y += 4) { // The top edge of the whole frame is not filtered. - bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR); - bool pu_boundary = is_pu_boundary(state, x, y, EDGE_HOR); - if (y > 0 && (tu_boundary || pu_boundary)) { + bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, luma_tree); + if (y > 0 && (tu_boundary || is_pu_boundary(state, x, y, EDGE_HOR))) { filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR, tu_boundary); } } @@ -1279,10 +1315,9 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1); for (int y_c = y_px_c; y_c < end_c; y_c += 8) { // The top edge of the whole frame is not filtered. - bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR); - bool pu_boundary = is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR); - if (y_c > 0 && (tu_boundary || pu_boundary)) { - filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary); + bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, chroma_tree); + if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) { + filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary, chroma_tree); } } } @@ -1323,7 +1358,6 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, void uvg_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px) { assert(!state->encoder_control->cfg.lossless); - filter_deblock_lcu_inside(state, x_px, y_px, EDGE_VER); if (x_px > 0) { filter_deblock_lcu_rightmost(state, x_px, y_px); diff --git a/src/search.c b/src/search.c index b6710ff2..f26a0f0a 100644 --- a/src/search.c +++ b/src/search.c @@ -133,11 +133,12 @@ static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t *work } } -void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth) +void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type + tree_type) { const int x_local = SUB_SCU(x_px); const int y_local = SUB_SCU(y_px); - const unsigned width = LCU_WIDTH >> depth; + const unsigned width = (tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C) >> depth; for (unsigned y = 0; y < width; y += SCU_WIDTH) { for (unsigned x = 0; x < width; x += SCU_WIDTH) { @@ -989,7 +990,9 @@ static double search_cu( intra_search.pred_cu.intra.mode_chroma = intra_mode; } intra_search.pred_cu.intra.mode = intra_mode; - + if(tree_type == UVG_CHROMA_T) { + uvg_lcu_fill_trdepth(lcu, x_local, y_local, depth, depth, tree_type); + } } if (intra_cost < cost) { cost = intra_cost; @@ -1041,7 +1044,7 @@ static double search_cu( if (cur_cu->part_size != SIZE_2Nx2N) { tr_depth = depth + 1; } - uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth); + uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type); const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400; uvg_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma); @@ -1115,7 +1118,7 @@ static double search_cu( if (cur_cu->tr_depth != depth) { // Reset transform depth since there are no coefficients. This // ensures that CBF is cleared for the whole area of the CU. - uvg_lcu_fill_trdepth(lcu, x, y, depth, depth); + uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type); } cur_cu->cbf = 0; @@ -1236,7 +1239,7 @@ static double search_cu( // Disable MRL in this case cur_cu->intra.multi_ref_idx = 0; - uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth); + uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth, tree_type); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); intra_search_data_t proxy; diff --git a/src/search.h b/src/search.h index 60dea9e3..7566fb96 100644 --- a/src/search.h +++ b/src/search.h @@ -93,7 +93,8 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, cu_info_t *const pred_cu, lcu_t *const lcu); -void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth); +void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type + tree_type); void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); void uvg_intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); diff --git a/src/search_inter.c b/src/search_inter.c index 6f3869b3..2eed1ca8 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1775,7 +1775,7 @@ static void search_pu_inter(encoder_state_t * const state, cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1]; cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0]; cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1]; - uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth)); + uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T); uvg_inter_recon_cu(state, lcu, x, y, width, true, false); uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T); @@ -2097,7 +2097,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, if (cur_cu->part_size != SIZE_2Nx2N) { tr_depth = depth + 1; } - uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth); + uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, UVG_BOTH_T); const int x_px = SUB_SCU(x); const int y_px = SUB_SCU(y); diff --git a/src/search_intra.c b/src/search_intra.c index fe22ad46..215e64ab 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -625,7 +625,7 @@ static double search_intra_trdepth( if (depth == 0 || split_cost < nosplit_cost) { return split_cost; } else { - uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth); + uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type); pred_cu->cbf = nosplit_cbf; @@ -1916,7 +1916,7 @@ void uvg_search_cu_intra( // Set transform depth to current depth, meaning no transform splits. - uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth); + uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type); // Refine results with slower search or get some results if rough search was skipped. const int32_t rdo_level = state->encoder_control->cfg.rdo; if (rdo_level >= 2 || skip_rough_search) {