From 76b0b35c0565748ad632dbf5b4dff6d0acef6970 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 30 Nov 2022 13:26:48 +0200 Subject: [PATCH] [mtt] fix lfnst --- src/encode_coding_tree.c | 2 +- src/encoderstate.h | 7 ++++++- src/search.c | 1 + src/strategies/avx2/quant-avx2.c | 4 ++-- src/strategies/generic/quant-generic.c | 10 +++++----- src/transform.c | 22 ++++++++++++---------- src/transform.h | 6 ++++-- 7 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index eeb0e816..13321248 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -124,7 +124,7 @@ bool uvg_is_lfnst_allowed( if ((isp_mode && !uvg_can_use_isp_with_lfnst(cu_width, cu_height, isp_mode, tree_type)) || (pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip) || (is_sep_tree && MIN(cu_width, cu_height) < 4) || - (cu_width > TR_MAX_WIDTH || cu_height > TR_MAX_WIDTH)) { + (cu_width > (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) || cu_height > (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)))) { return false; } bool luma_flag = tree_type != UVG_CHROMA_T; diff --git a/src/encoderstate.h b/src/encoderstate.h index 8f88a7ce..027c7a00 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -362,7 +362,12 @@ typedef struct encoder_state_t { //Constraint structure void * constraint; - + // Since lfnst needs the collocated luma intra mode for + // dual tree if the chroma mode is cclm mode and getting all of + // the information that would be necessary to get the collocated + // luma mode in the lfnst functions, instead store the current + // collocated luma mode in the state. + int8_t collocated_luma_mode; } encoder_state_t; void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame); diff --git a/src/search.c b/src/search.c index 71d7144a..bd0d0cc5 100644 --- a/src/search.c +++ b/src/search.c @@ -1107,6 +1107,7 @@ static double search_cu( chroma_loc, cu_loc, &intra_search.pred_cu, is_separate_tree ? lcu : NULL, tree_type == UVG_CHROMA_T ? state->tile->frame->cu_array : NULL, UVG_CHROMA_T); + state->collocated_luma_mode = intra_mode; intra_search.pred_cu.type = CU_INTRA; } else if (intra_search.pred_cu.intra.mip_flag) { intra_mode = 0; diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index a7e0f2f6..d49b2f8f 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -703,7 +703,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state, if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { // Forward low frequency non-separable transform - uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type); + uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode); } // Quantize coeffs. (coeff -> coeff_out) @@ -739,7 +739,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state, if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { // Inverse low frequency non-separable transform - uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type); + uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode); } if (use_trskip) { uvg_itransformskip(state->encoder_control, residual, coeff, width, height); diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index 04a668f3..8c5649dc 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -314,7 +314,7 @@ int uvg_quant_cbcr_residual_generic( uvg_transform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu); if(cur_cu->cr_lfnst_idx) { - uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type); + uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type, state->collocated_luma_mode); } if (state->encoder_control->cfg.rdoq_enable && @@ -329,7 +329,7 @@ int uvg_quant_cbcr_residual_generic( } else { uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, - scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx); + scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->cr_lfnst_idx); } int8_t has_coeffs = 0; @@ -349,7 +349,7 @@ int uvg_quant_cbcr_residual_generic( uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false); if (cur_cu->cr_lfnst_idx) { - uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type); + uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type, state->collocated_luma_mode); } uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu); @@ -491,7 +491,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state, if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { // Forward low frequency non-separable transform - uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type); + uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode); } @@ -533,7 +533,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state, if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { // Inverse low frequency non-separable transform - uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type); + uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode); } if (use_trskip) { uvg_itransformskip(state->encoder_control, residual, coeff, width, height); diff --git a/src/transform.c b/src/transform.c index 7e2b64ee..54ec2ecd 100644 --- a/src/transform.c +++ b/src/transform.c @@ -554,9 +554,9 @@ void uvg_chroma_transform_search( bool v_has_coeffs = false; bool is_jccr = IS_JCCR_MODE(transforms[i]); if(pred_cu->cr_lfnst_idx) { - uvg_fwd_lfnst(pred_cu, width, height, COLOR_U, pred_cu->cr_lfnst_idx, &u_coeff[i * trans_offset], tree_type); + uvg_fwd_lfnst(pred_cu, width, height, COLOR_U, pred_cu->cr_lfnst_idx, &u_coeff[i * trans_offset], tree_type, state->collocated_luma_mode); if (!is_jccr) { - uvg_fwd_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type); + uvg_fwd_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type, state->collocated_luma_mode); } } quantize_chroma( @@ -572,7 +572,7 @@ void uvg_chroma_transform_search( &u_has_coeffs, &v_has_coeffs, pred_cu->cr_lfnst_idx); - if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue; + if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue; if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) { bool constraints[2] = { false, false }; @@ -591,7 +591,7 @@ void uvg_chroma_transform_search( if (transforms[i] != CHROMA_TS) { if (pred_cu->cr_lfnst_idx) { - uvg_inv_lfnst(pred_cu, width, height, COLOR_U, pred_cu->cr_lfnst_idx, &u_coeff[i * trans_offset], tree_type); + uvg_inv_lfnst(pred_cu, width, height, COLOR_U, pred_cu->cr_lfnst_idx, &u_coeff[i * trans_offset], tree_type, state->collocated_luma_mode); } uvg_itransform2d(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu); @@ -622,7 +622,7 @@ void uvg_chroma_transform_search( if (transforms[i] != CHROMA_TS) { if (pred_cu->cr_lfnst_idx) { - uvg_inv_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type); + uvg_inv_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type, state->collocated_luma_mode); } uvg_itransform2d(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu); @@ -856,7 +856,8 @@ void uvg_fwd_lfnst( const color_t color, const uint16_t lfnst_idx, coeff_t *coeffs, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + int8_t luma_mode) { const uint16_t lfnst_index = lfnst_idx; const uint32_t log2_width = uvg_g_convert_to_log2[width]; @@ -879,7 +880,7 @@ void uvg_fwd_lfnst( const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_width] : uvg_g_sig_last_scan[scan_order][log2_width - 1]; if (is_cclm_mode) { - intra_mode = cur_cu->intra.mode; + intra_mode = luma_mode; } if (is_mip && color == COLOR_Y) { intra_mode = 0; // Set to planar mode @@ -989,7 +990,8 @@ void uvg_inv_lfnst( const color_t color, const uint16_t lfnst_idx, coeff_t *coeffs, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + int8_t luma_mode) { // In VTM, max log2 dynamic range is something in range [15, 20] depending on whether extended precision processing is enabled // Such is not yet present in uvg266 so use 15 for now @@ -1010,7 +1012,7 @@ void uvg_inv_lfnst( const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_width] : uvg_g_sig_last_scan[scan_order][log2_width - 1]; if (is_cclm_mode) { - intra_mode = cur_cu->intra.mip_flag ? 0 : cur_cu->intra.mode; + intra_mode = luma_mode; } if (is_mip && color == COLOR_Y) { intra_mode = 0; // Set to planar mode @@ -1299,7 +1301,7 @@ static void quantize_tr_residual( for (int j = 0; j < tr_height; ++j) { memcpy(&dst_coeff[j * lcu_width], &coeff[j * tr_width], tr_width * sizeof(coeff_t)); } - cbf_set(&cur_pu->cbf, color); + cbf_set(&cur_pu->cbf, COLOR_U); } else { for (int j = 0; j < tr_height; ++j) { diff --git a/src/transform.h b/src/transform.h index e96a2893..d2b95ca8 100644 --- a/src/transform.h +++ b/src/transform.h @@ -131,7 +131,8 @@ void uvg_fwd_lfnst( const color_t color, const uint16_t lfnst_idx, coeff_t *coeffs, - enum uvg_tree_type tree_type); + enum uvg_tree_type tree_type, + int8_t luma_mode); void uvg_inv_lfnst( const cu_info_t* cur_cu, @@ -140,6 +141,7 @@ void uvg_inv_lfnst( const color_t color, const uint16_t lfnst_idx, coeff_t* coeffs, - enum uvg_tree_type tree_type); + enum uvg_tree_type tree_type, + int8_t luma_mode); #endif