From b988c60dd12fe67400ea0fb6c8bc1b0662d0509a Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Fri, 2 Dec 2022 13:56:38 +0200 Subject: [PATCH] [mtt] search works completely with everything except RDOQ deblock and ISP --- src/encode_coding_tree.c | 9 +++++---- src/search.c | 41 ++++++++++++++++++++++++++++------------ src/search_intra.c | 3 ++- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 4f0c3f10..ab79ee92 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -657,7 +657,7 @@ static void encode_transform_coeff( split_cu_loc[i].chroma_height = split_cu_loc[i].height; } encode_transform_coeff(state, &split_cu_loc[i], only_chroma, - coeff, NULL, tree_type, true, false, luma_cbf_ctx, &split_cu_loc[i], chroma_loc); + coeff, NULL, tree_type, true, false, luma_cbf_ctx, &split_cu_loc[i], chroma_loc ? &split_cu_loc[i] : NULL); } return; } @@ -1399,7 +1399,7 @@ void uvg_encode_coding_tree( DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, cur_cu->type-1); - fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma); + // fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma); if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; @@ -1616,8 +1616,9 @@ void uvg_encode_coding_tree( if (tree_type != UVG_CHROMA_T) { encode_lfnst_idx(state, cabac, cur_cu, is_local_dual_tree && state->encoder_control->chroma_format != UVG_CSP_400 ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc); + + encode_mts_idx(state, cabac, cur_cu, cu_loc); } - encode_mts_idx(state, cabac, cur_cu, cu_loc); // For 4x4 the chroma PU/TU is coded after the last if (state->encoder_control->chroma_format != UVG_CSP_400 && @@ -1777,7 +1778,7 @@ double uvg_mock_encode_coding_unit( int8_t luma_dir = uvg_get_co_located_luma_mode(chroma_loc,cu_loc , cur_cu, tree_type != UVG_CHROMA_T ? lcu : NULL, tree_type == UVG_CHROMA_T ? state->tile->frame->cu_array : NULL, is_separate_tree ? UVG_CHROMA_T : tree_type); - encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm && uvg_cclm_is_allowed(state, cu_loc, cur_cu, tree_type), luma_dir, &bits); + encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm && uvg_cclm_is_allowed(state, chroma_loc, cur_cu, tree_type), luma_dir, &bits); } } else { diff --git a/src/search.c b/src/search.c index f325e785..c5480a2e 100644 --- a/src/search.c +++ b/src/search.c @@ -76,7 +76,9 @@ static INLINE void copy_cu_info(lcu_t *from, lcu_t *to, const cu_loc_t* const cu } -static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu_loc_t * const cu_loc, const enum uvg_tree_type tree_type) { +static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu_loc_t * const cu_loc, const cu_loc_t* const + chroma_loc, + const enum uvg_tree_type tree_type) { const int y_limit = LCU_WIDTH >> (tree_type == UVG_CHROMA_T); const int x_limit = LCU_WIDTH >> (tree_type == UVG_CHROMA_T); @@ -90,8 +92,8 @@ static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu uvg_pixels_blit(from->rec.y, to->rec.y, cu_loc->local_x, LCU_WIDTH, LCU_WIDTH, LCU_WIDTH); } if(tree_type != UVG_LUMA_T && from->ref.chroma_format != UVG_CSP_400) { - uvg_pixels_blit(from->rec.u, to->rec.u, cu_loc->local_x / 2, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); - uvg_pixels_blit(from->rec.v, to->rec.v, cu_loc->local_x / 2, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(from->rec.u, to->rec.u, chroma_loc->local_x / 2, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(from->rec.v, to->rec.v, chroma_loc->local_x / 2, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); } } @@ -106,11 +108,11 @@ static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu LCU_WIDTH, LCU_WIDTH); } if (tree_type != UVG_LUMA_T && from->ref.chroma_format != UVG_CSP_400) { - uvg_pixels_blit(&from->rec.u[cu_loc->local_x / 2], &to->rec.u[cu_loc->local_x / 2], - LCU_WIDTH_C - cu_loc->local_x / 2, cu_loc->local_y / 2, + uvg_pixels_blit(&from->rec.u[chroma_loc->local_x / 2], &to->rec.u[chroma_loc->local_x / 2], + LCU_WIDTH_C - chroma_loc->local_x / 2, chroma_loc->local_y / 2, LCU_WIDTH_C, LCU_WIDTH_C); - uvg_pixels_blit(&from->rec.v[cu_loc->local_x / 2], &to->rec.v[cu_loc->local_x / 2], - LCU_WIDTH_C - cu_loc->local_x / 2, cu_loc->local_y / 2, + uvg_pixels_blit(&from->rec.v[chroma_loc->local_x / 2], &to->rec.v[chroma_loc->local_x / 2], + LCU_WIDTH_C - chroma_loc->local_x / 2, chroma_loc->local_y / 2, LCU_WIDTH_C, LCU_WIDTH_C); } } @@ -129,16 +131,15 @@ static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu } if(tree_type != UVG_LUMA_T && from->ref.chroma_format != UVG_CSP_400) { - const int offset = cu_loc->local_x / 2 + cu_loc->local_y / 2 * LCU_WIDTH_C; - uvg_pixels_blit(&from->ref.u[offset], &to->ref.u[offset], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C); - uvg_pixels_blit(&from->ref.v[offset], &to->ref.v[offset], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C); + const int offset = chroma_loc->local_x / 2 + chroma_loc->local_y / 2 * LCU_WIDTH_C; + uvg_pixels_blit(&from->ref.u[offset], &to->ref.u[offset], chroma_loc->chroma_width, chroma_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&from->ref.v[offset], &to->ref.v[offset], chroma_loc->chroma_width, chroma_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C); } const int y_start = (cu_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4; const int x_start = (cu_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4; for (int y = y_start; y < y_limit; y += SCU_WIDTH) { *LCU_GET_CU_AT_PX(to, x_start, y) = *LCU_GET_CU_AT_PX(from, x_start, y); - } for (int x = x_start; x < x_limit; x += SCU_WIDTH) { *LCU_GET_CU_AT_PX(to, x, y_start) = *LCU_GET_CU_AT_PX(from, x, y_start); @@ -149,6 +150,22 @@ static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu memset(LCU_GET_CU_AT_PX(to, x, y), 0, sizeof(cu_info_t)); } } + + if(chroma_loc->local_y != cu_loc->local_y || chroma_loc->local_x != cu_loc->local_x && tree_type == UVG_BOTH_T) { + const int y_start = (chroma_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4; + const int x_start = (chroma_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4; + for (int y = y_start; y < y_limit; y += SCU_WIDTH) { + *LCU_GET_CU_AT_PX(to, x_start, y) = *LCU_GET_CU_AT_PX(from, x_start, y); + } + if (chroma_loc->local_x == 0) { + to->left_ref = from->left_ref; + *LCU_GET_TOP_RIGHT_CU(to) = *LCU_GET_TOP_RIGHT_CU(from); + } + if (chroma_loc->local_y == 0) { + to->top_ref = from->top_ref; + *LCU_GET_TOP_RIGHT_CU(to) = *LCU_GET_TOP_RIGHT_CU(from); + } + } } static INLINE void copy_cu_pixels( @@ -1425,7 +1442,7 @@ static double search_cu( cu_loc_t new_cu_loc[4]; uint8_t separate_chroma = 0; const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc, &separate_chroma); - initialize_partial_work_tree(lcu, &split_lcu[split_type - 1], cu_loc, tree_type); + initialize_partial_work_tree(lcu, &split_lcu[split_type - 1], cu_loc, separate_chroma ? chroma_loc : cu_loc , tree_type); for (int split = 0; split < splits; ++split) { new_split.part_index = split; split_cost += search_cu(state, diff --git a/src/search_intra.c b/src/search_intra.c index 4ee36f95..2856a7d4 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -377,7 +377,8 @@ static double search_intra_trdepth( pred_cu->mts_last_scan_pos = 0; pred_cu->violates_mts_coeff_constraint = 0; - if (trafo == MTS_SKIP && (width > (1 << state->encoder_control->cfg.trskip_max_size) + if (trafo == MTS_SKIP && ((width > (1 << state->encoder_control->cfg.trskip_max_size) + || (height > (1 << state->encoder_control->cfg.trskip_max_size))) || !state->encoder_control->cfg.trskip_enable)) { continue; }