diff --git a/src/cu.c b/src/cu.c index e84526bd..b0cb2a63 100644 --- a/src/cu.c +++ b/src/cu.c @@ -312,15 +312,16 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height) loc->height = height; // TODO: when MTT is implemented, chroma dimensions can be minimum 2. // Chroma width is half of luma width, when not at maximum depth. - loc->chroma_width = MAX(width >> 1, 4); - loc->chroma_height = MAX(height >> 1, 4); + loc->chroma_width = width >> 1; + loc->chroma_height = height >> 1; } int uvg_get_split_locs( const cu_loc_t* const origin, enum split_type split, - cu_loc_t out[4]) + cu_loc_t out[4], + uint8_t* separate_chroma) { const int half_width = origin->width >> 1; const int half_height = origin->height >> 1; @@ -336,24 +337,29 @@ int uvg_get_split_locs( uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height); uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height); uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height); + if (half_height == 4 && separate_chroma) *separate_chroma = 1; return 4; case BT_HOR_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height); uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height); + if (half_height * origin->width < 64 && separate_chroma) *separate_chroma = 1; return 2; case BT_VER_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height); + if (half_width == 4 && separate_chroma) *separate_chroma = 1; return 2; case TT_HOR_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height); uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height); uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height); + if (quarter_height * origin->width < 64 && separate_chroma) *separate_chroma = 1; return 3; case TT_VER_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height); + if (quarter_width == 4 && separate_chroma) *separate_chroma = 1; return 3; } return 0; diff --git a/src/cu.h b/src/cu.h index 6440f6f2..48a021c3 100644 --- a/src/cu.h +++ b/src/cu.h @@ -189,7 +189,8 @@ void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height); int uvg_get_split_locs( const cu_loc_t* const origin, enum split_type split, - cu_loc_t out[4]); + cu_loc_t out[4], + uint8_t* separate_chroma); #define CU_GET_MV_CAND(cu_info_ptr, reflist) \ diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index f93b6cf7..c908449d 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -466,7 +466,6 @@ static void encode_chroma_tu( uvg_get_sub_coeff(coeff_v, coeff->v, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); if (cbf_is_set(cur_pu->cbf, COLOR_U)) { - // TODO: height for this check and the others below if(state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size) && height_c <= (1 << state->encoder_control->cfg.trskip_max_size)){ @@ -505,8 +504,9 @@ static void encode_chroma_tu( static void encode_transform_unit( encoder_state_t * const state, const cu_loc_t *cu_loc, - bool only_chroma, + const cu_info_t* cur_pu, lcu_coeff_t* coeff, + bool only_chroma, enum uvg_tree_type tree_type, bool last_split, const cu_loc_t *original_loc) // Original cu dimensions, before CU split @@ -524,7 +524,9 @@ static void encode_transform_unit( int isp_x = x; int isp_y = y; uvg_get_isp_cu_arr_coords(&isp_x, &isp_y); - const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y); + if(cur_pu == NULL) { + cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y); + } int8_t scan_idx = SCAN_DIAG; @@ -540,7 +542,10 @@ static void encode_transform_unit( // CoeffNxN // Residual Coding - if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) { + if(state->encoder_control->cfg.trskip_enable + && width <= (1 << state->encoder_control->cfg.trskip_max_size) + && height <= (1 << state->encoder_control->cfg.trskip_max_size) + && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) { cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma; CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag"); DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, height, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0); @@ -561,7 +566,7 @@ static void encode_transform_unit( } bool joint_chroma = cur_pu->joint_cb_cr != 0; - if (cur_pu->log2_height + cur_pu->log2_width < 6 && tree_type != UVG_CHROMA_T) { + if (cur_pu->log2_height + cur_pu->log2_width < 6 && tree_type != UVG_CHROMA_T && !only_chroma) { // For size 4x4 luma transform the corresponding chroma transforms are // also of size 4x4 covering 8x8 luma pixels. The residual is coded in // the last transform unit. @@ -597,6 +602,7 @@ static void encode_transform_coeff( const cu_loc_t * cu_loc, bool only_chroma, lcu_coeff_t* coeff, + const cu_info_t* cur_tu, enum uvg_tree_type tree_type, bool last_split, bool can_skip_last_cbf, @@ -604,10 +610,6 @@ static void encode_transform_coeff( const cu_loc_t * const original_loc) // Original dimensions before ISP split { cabac_data_t * const cabac = &state->cabac; - int x = cu_loc->x; - int y = cu_loc->y; - const int width = cu_loc->width; - const int height = cu_loc->height; bool isp_split = cu_loc->x != original_loc->x || cu_loc->y != original_loc->y; @@ -618,20 +620,16 @@ static void encode_transform_coeff( //const encoder_control_t *const ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; - - const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, x, y); - // Round coordinates down to a multiple of 8 to get the location of the - // containing CU. - const int x_cu = 8 * (x / 8); - const int y_cu = 8 * (y / 8); - const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, x, y); // TODO: very suspect, chroma cbfs stored in upper left corner, everything else in bottom right for depth 4 + if(cur_tu == NULL) { + cur_tu = uvg_cu_array_at_const(used_array, cu_loc->x, cu_loc->y); + } const bool ver_split = cu_loc->height > TR_MAX_WIDTH; const bool hor_split = cu_loc->width > TR_MAX_WIDTH; - const int cb_flag_y = tree_type != UVG_CHROMA_T ? cbf_is_set(cur_pu->cbf, COLOR_Y) : 0; - const int cb_flag_u = tree_type != UVG_LUMA_T ?( cur_pu->joint_cb_cr ? (cur_pu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_cu->cbf, COLOR_U)) : 0; - const int cb_flag_v = tree_type != UVG_LUMA_T ? (cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, COLOR_V)) : 0; + const int cb_flag_y = tree_type != UVG_CHROMA_T ? cbf_is_set(cur_tu->cbf, COLOR_Y) : 0; + const int cb_flag_u = tree_type != UVG_LUMA_T ?(cur_tu->joint_cb_cr ? (cur_tu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_tu->cbf, COLOR_U)) : 0; + const int cb_flag_v = tree_type != UVG_LUMA_T ? (cur_tu->joint_cb_cr ? cur_tu->joint_cb_cr & 1 : cbf_is_set(cur_tu->cbf, COLOR_V)) : 0; if (hor_split || ver_split) { @@ -647,9 +645,9 @@ static void encode_transform_coeff( } cu_loc_t split_cu_loc[4]; - const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc); + const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); for (int i = 0; i < split_count; ++i) { - encode_transform_coeff(state, &split_cu_loc[i], only_chroma, coeff, tree_type, true, luma_cbf_ctx, &split_cu_loc[i]); + encode_transform_coeff(state, &split_cu_loc[i], only_chroma, coeff, NULL, tree_type, true, luma_cbf_ctx, &split_cu_loc[i]); } return; } @@ -658,7 +656,7 @@ static void encode_transform_coeff( // Not the last CU for area of 64 pixels cowered by more than one luma CU. // Not the last ISP Split if (state->encoder_control->chroma_format != UVG_CSP_400 - && (cur_pu->log2_height + cur_pu->log2_width >= 6 || only_chroma) + && (cur_tu->log2_height + cur_tu->log2_width >= 6 || only_chroma) && tree_type != UVG_LUMA_T && last_split) { cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_cb[0]); @@ -684,22 +682,22 @@ static void encode_transform_coeff( // - transform depth > 0 // - we have chroma coefficients at this level // When it is not present, it is inferred to be 1. - if ((cur_cu->type == CU_INTRA || !PU_IS_TU(cur_cu) || cb_flag_u || cb_flag_v) && !only_chroma && tree_type != UVG_CHROMA_T) { + if ((cur_tu->type == CU_INTRA || !PU_IS_TU(cur_tu) || cb_flag_u || cb_flag_v) && !only_chroma && tree_type != UVG_CHROMA_T) { if (can_skip_last_cbf && isp_split && last_split) { // Do not write luma cbf if first three isp splits have luma cbf 0 } else { cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_luma[*luma_cbf_ctx]); CABAC_BIN(cabac, cb_flag_y, "cbf_luma"); - if (PU_IS_TU(cur_cu)) { + if (PU_IS_TU(cur_tu)) { *luma_cbf_ctx = 2 + cb_flag_y; } } } if (cb_flag_y | cb_flag_u | cb_flag_v) { - if (state->must_code_qp_delta && (only_chroma || cb_flag_y || cur_pu->log2_height + cur_pu->log2_width >= 6) ) { - const int qp_pred = uvg_get_cu_ref_qp(state, x_cu, y_cu, state->last_qp); - const int qp_delta = cur_cu->qp - qp_pred; + if (state->must_code_qp_delta && (only_chroma || cb_flag_y || cur_tu->log2_height + cur_tu->log2_width >= 6) ) { + const int qp_pred = uvg_get_cu_ref_qp(state, cu_loc->x, cu_loc->y, state->last_qp); + const int qp_delta = cur_tu->qp - qp_pred; // Possible deltaQP range depends on bit depth as stated in HEVC specification. assert(qp_delta >= UVG_QP_DELTA_MIN && qp_delta <= UVG_QP_DELTA_MAX && "QP delta not in valid range."); @@ -722,18 +720,18 @@ static void encode_transform_coeff( } if(( ((cb_flag_u || cb_flag_v ) - && cur_cu->type == CU_INTRA) + && cur_tu->type == CU_INTRA) || (cb_flag_u && cb_flag_v)) - && (cur_pu->log2_height + cur_pu->log2_width >= 6 || only_chroma || tree_type == UVG_CHROMA_T) + && (cur_tu->log2_height + cur_tu->log2_width >= 6 || only_chroma || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.jccr && last_split ) { - assert(cur_pu->joint_cb_cr < 4 && "JointCbCr is in search state."); + assert(cur_tu->joint_cb_cr < 4 && "JointCbCr is in search state."); cabac->cur_ctx = &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1]; - CABAC_BIN(cabac, cur_pu->joint_cb_cr != 0, "tu_joint_cbcr_residual_flag"); + CABAC_BIN(cabac, cur_tu->joint_cb_cr != 0, "tu_joint_cbcr_residual_flag"); } - encode_transform_unit(state, cu_loc, only_chroma, coeff, tree_type, last_split, original_loc); + encode_transform_unit(state, cu_loc, only_chroma ? cur_tu : NULL, coeff, only_chroma, tree_type, last_split, original_loc); } } @@ -1261,10 +1259,10 @@ uint8_t uvg_write_split_flag( bh_split = bv_split = th_split = tv_split = false; } else { - bv_split &= cu_width <= state->encoder_control->cfg.max_bt_size[slice_type]; - tv_split &= cu_width <= state->encoder_control->cfg.max_tt_size[slice_type]; - bh_split &= cu_height <= state->encoder_control->cfg.max_bt_size[slice_type]; - th_split &= cu_height <= state->encoder_control->cfg.max_tt_size[slice_type]; + bv_split &= cu_width <= state->encoder_control->cfg.max_bt_size[slice_type] && cu_width > state->encoder_control->cfg.min_qt_size[slice_type]; + tv_split &= cu_width <= state->encoder_control->cfg.max_tt_size[slice_type] && cu_width > 2 * state->encoder_control->cfg.min_qt_size[slice_type]; + bh_split &= cu_height <= state->encoder_control->cfg.max_bt_size[slice_type] && cu_height > state->encoder_control->cfg.min_qt_size[slice_type]; + th_split &= cu_height <= state->encoder_control->cfg.max_tt_size[slice_type] && cu_height > 2 * state->encoder_control->cfg.min_qt_size[slice_type]; } bool allow_split = allow_qt | bh_split | bv_split | th_split | tv_split; @@ -1307,12 +1305,12 @@ uint8_t uvg_write_split_flag( unsigned left_qt_depth = 0; unsigned top_qt_depth = 0; if(left_cu) { - while (((left_cu->split_tree >> left_qt_depth) & 7u) == QT_SPLIT) { + while (((left_cu->split_tree >> (left_qt_depth * 3)) & 7u) == QT_SPLIT) { left_qt_depth++; } } if(above_cu) { - while (((above_cu->split_tree >> top_qt_depth) & 7u) == QT_SPLIT) { + while (((above_cu->split_tree >> (top_qt_depth * 3)) & 7u) == QT_SPLIT) { top_qt_depth++; } } @@ -1353,7 +1351,9 @@ void uvg_encode_coding_tree( lcu_coeff_t *coeff, enum uvg_tree_type tree_type, const cu_loc_t* const cu_loc, - split_tree_t split_tree) + const cu_loc_t* const chroma_loc, + split_tree_t split_tree, + bool has_chroma) { cabac_data_t * const cabac = &state->cabac; const encoder_control_t * const ctrl = state->encoder_control; @@ -1410,13 +1410,15 @@ void uvg_encode_coding_tree( NULL); if (split_flag || border) { - const int half_luma = cu_loc->width / 2; const split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1, split_tree.mtt_depth + (split_flag != QT_SPLIT)}; cu_loc_t new_cu_loc[4]; - const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc); + uint8_t separate_chroma = 0; + const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc, &separate_chroma); for (int split = 0; split encoder_control->chroma_format != UVG_CSP_400 ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc); } encode_mts_idx(state, cabac, cur_cu, cu_loc); // For 4x4 the chroma PU/TU is coded after the last if (state->encoder_control->chroma_format != UVG_CSP_400 && - ((depth == 4 && x % 8 && y % 8) || tree_type == UVG_CHROMA_T) && + (has_chroma || tree_type == UVG_CHROMA_T) && tree_type != UVG_LUMA_T) { encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, NULL); // LFNST constraints must be reset here. Otherwise the left over values will interfere when calculating new constraints - cu_info_t* tmp = uvg_cu_array_at((cu_array_t*)used_array, x, y); + cu_info_t* tmp = (cu_info_t*)cur_cu; tmp->violates_lfnst_constrained_luma = false; tmp->violates_lfnst_constrained_chroma = false; tmp->lfnst_last_scan_pos = false; - encode_transform_coeff(state, cu_loc, 1, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc); + encode_transform_coeff(state, chroma_loc, 1, coeff, cur_cu, tree_type, true, false, &luma_cbf_ctx, chroma_loc); // Write LFNST only once for single tree structure - encode_lfnst_idx(state, cabac, tmp, is_local_dual_tree ? UVG_CHROMA_T : tree_type, COLOR_UV, cu_loc); + encode_lfnst_idx(state, cabac, tmp, is_local_dual_tree ? UVG_CHROMA_T : tree_type, COLOR_UV, chroma_loc); } } diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 357e059a..5a9b4023 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -54,7 +54,9 @@ void uvg_encode_coding_tree( lcu_coeff_t *coeff, enum uvg_tree_type tree_type, const cu_loc_t* const cu_loc, - split_tree_t split_tree); + const cu_loc_t* const chroma_loc, + split_tree_t split_tree, + bool has_chroma); void uvg_encode_ts_residual(encoder_state_t* const state, cabac_data_t* const cabac, diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 8e9f7c52..920331a5 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -1125,7 +1125,7 @@ static void uvg_encoder_state_write_bitstream_picture_header( WRITE_U(stream, 0, 1, "ph_mvd_l1_zero_flag"); } - if (encoder->cfg.jccr) { + if (encoder->cfg.jccr && encoder->chroma_format != UVG_CSP_400) { WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag"); } // END PICTURE HEADER diff --git a/src/encoderstate.c b/src/encoderstate.c index e8af6add..6c7517d8 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -885,11 +885,11 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque) uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH); split_tree_t split_tree = { 0, 0, 0 }; - uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, split_tree); + uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, &start, split_tree, true); if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, LCU_WIDTH, LCU_WIDTH); - uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, split_tree); + uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, &start, split_tree, true); } if (!state->cabac.only_count) { diff --git a/src/global.h b/src/global.h index 87ca92ee..a6a109c5 100644 --- a/src/global.h +++ b/src/global.h @@ -128,9 +128,9 @@ typedef int16_t coeff_t; typedef int32_t mv_t; -//#define VERBOSE 1 +#define VERBOSE 1 #define UVG_DEBUG_PRINT_CABAC 1 -//#define UVG_DEBUG 1 +#define UVG_DEBUG 1 //#define UVG_DEBUG_PRINT_YUVIEW_CSV 1 //#define UVG_DEBUG_PRINT_MV_INFO 1 diff --git a/src/intra.c b/src/intra.c index 398ebc39..5db5abe5 100644 --- a/src/intra.c +++ b/src/intra.c @@ -916,7 +916,8 @@ static void mip_predict( } -int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height) +int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height, const + bool account_for_dc_planar) { int8_t pred_mode = mode; if (!is_isp && log2_width != log2_height) { @@ -927,7 +928,7 @@ int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int pred_mode += (66 - 1); } else if (log2_height > log2_width && mode > 66 - modeShift[deltaSize]) { - pred_mode -= (66 - 1); + pred_mode -= (66 - 1) + (account_for_dc_planar ? 2 : 0); } } } @@ -958,7 +959,8 @@ static void intra_predict_regular( int8_t pred_mode = uvg_wide_angle_correction(mode, is_isp, log2_width, - log2_height); + log2_height, + false); const uvg_intra_ref *used_ref = &refs->ref; if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || isp_mode /*ISP_TODO: replace this fake ISP check*/) { @@ -1817,12 +1819,7 @@ void uvg_intra_recon_cu( cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } - cu_loc_t chroma_cu_loc; - if(!recon_luma && recon_chroma) { - uvg_cu_loc_ctor(&chroma_cu_loc, cu_loc->x & ~7, cu_loc->y & ~7, width, height); - cu_loc = &chroma_cu_loc; - } - + // Reset CBFs because CBFs might have been set // for depth earlier if (recon_luma) { @@ -1846,7 +1843,7 @@ void uvg_intra_recon_cu( } cu_loc_t split_cu_loc[4]; - const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc); + const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); for (int i = 0; i < split_count; ++i) { uvg_intra_recon_cu(state, search_data, &split_cu_loc[i], NULL, lcu, tree_type, recon_luma, recon_chroma); } @@ -1876,7 +1873,7 @@ void uvg_intra_recon_cu( } } const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP; - const bool has_chroma = recon_chroma && (cu_loc->x % 8 == 0 && cu_loc->y % 8 == 0); + const bool has_chroma = recon_chroma; // Process a leaf TU. if (has_luma) { diff --git a/src/intra.h b/src/intra.h index 7c4c8852..022b8ce1 100644 --- a/src/intra.h +++ b/src/intra.h @@ -169,7 +169,8 @@ int8_t uvg_wide_angle_correction( int_fast8_t mode, const bool is_isp, const int log2_width, - const int log2_height); + const int log2_height, + const bool account_for_dc_planar); // ISP related defines #define NUM_ISP_MODES 3 diff --git a/src/search.c b/src/search.c index 5166e47f..e11d6d15 100644 --- a/src/search.c +++ b/src/search.c @@ -380,18 +380,23 @@ double uvg_cu_rd_cost_luma( if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) { double sum = 0; - const int half_width = cu_loc->width >> 1; - const int half_height = cu_loc->height >> 1; - cu_loc_t split_cu_loc; + // Recursively process sub-CUs. + enum split_type split; + if (cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) { + split = QT_SPLIT; + } + else if (cu_loc->width > TR_MAX_WIDTH) { + split = BT_VER_SPLIT; + } + else { + split = BT_HOR_SPLIT; + } - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); - sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); - sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y+ half_height, half_width, half_height); - sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); - sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); + cu_loc_t split_cu_loc[4]; + const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); + for (int i = 0; i < split_count; ++i) { + sum += uvg_cu_rd_cost_luma(state, &split_cu_loc[i], pred_cu, lcu, isp_cbf); + } return sum + tr_tree_bits * state->lambda; } @@ -478,20 +483,12 @@ double uvg_cu_rd_cost_chroma( const cu_loc_t * const cu_loc) { const vector2d_t lcu_px = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; - const int width = cu_loc->chroma_width; - const int height = cu_loc->chroma_height; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); double tr_tree_bits = 0; double coeff_bits = 0; - if (cu_loc->width == 4 && cu_loc->height == 4 && (cu_loc->x % 8 == 0 || cu_loc->y % 8 == 0)) { - // For MAX_PU_DEPTH calculate chroma for previous depth for the first - // block and return 0 cost for all others. - return 0; - } - const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; int u_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 2) >> 1 : cbf_is_set(pred_cu->cbf, COLOR_U); int v_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 1) : cbf_is_set(pred_cu->cbf, COLOR_V); @@ -499,18 +496,22 @@ double uvg_cu_rd_cost_chroma( if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) { double sum = 0; // Recursively process sub-CUs. - const int half_width = cu_loc->width >> 1; - const int half_height = cu_loc->height >> 1; - cu_loc_t split_cu_loc; + enum split_type split; + if (cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) { + split = QT_SPLIT; + } + else if (cu_loc->width > TR_MAX_WIDTH) { + split = BT_VER_SPLIT; + } + else { + split = BT_HOR_SPLIT; + } - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); - sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); - sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); - sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); - sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); + cu_loc_t split_cu_loc[4]; + const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); + for (int i = 0; i < split_count; ++i) { + sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc[i]); + } return sum + tr_tree_bits * state->lambda; } @@ -544,10 +545,10 @@ double uvg_cu_rd_cost_chroma( int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x; int ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index], LCU_WIDTH_C, LCU_WIDTH_C, - width); + cu_loc->chroma_width); int ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index], LCU_WIDTH_C, LCU_WIDTH_C, - width); + cu_loc->chroma_width); ssd = ssd_u + ssd_v; } @@ -580,7 +581,9 @@ static double cu_rd_cost_tr_split_accurate( lcu_t* const lcu, enum uvg_tree_type tree_type, uint8_t isp_cbf, - const cu_loc_t* const cu_loc) { + const cu_loc_t* const cu_loc, + const cu_loc_t* const chroma_loc, + bool has_chroma) { const int width = cu_loc->width; const int height = cu_loc->height; // TODO: height for non-square blocks @@ -590,8 +593,6 @@ static double cu_rd_cost_tr_split_accurate( double coeff_bits = 0; double tr_tree_bits = 0; - - const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, COLOR_U); const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, COLOR_V); @@ -610,22 +611,24 @@ static double cu_rd_cost_tr_split_accurate( if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) { double sum = 0; - - const int half_width = cu_loc->width >> 1; - const int half_height = cu_loc->height >> 1; - cu_loc_t split_cu_loc; - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); - sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); - sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); - sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); - uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); - sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); + enum split_type split; + if(cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) { + split = QT_SPLIT; + } else if(cu_loc->width > TR_MAX_WIDTH) { + split = BT_VER_SPLIT; + } else { + split = BT_HOR_SPLIT; + } + + cu_loc_t split_cu_loc[4]; + const int split_count= uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); + for (int i = 0; i < split_count; ++i) { + sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc[i], &split_cu_loc[i], has_chroma); + } return sum + tr_tree_bits * state->lambda; } - bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (cu_loc->x % 8 && cu_loc->y % 8)) && tree_type != UVG_LUMA_T; + has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && has_chroma && tree_type != UVG_LUMA_T; if (!skip_residual_coding && has_chroma) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb"); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr"); @@ -712,7 +715,7 @@ static double cu_rd_cost_tr_split_accurate( } } - const bool is_local_sep_tree = pred_cu->log2_width + pred_cu->log2_height < 6 && tree_type == UVG_BOTH_T; + const bool is_local_sep_tree = (cu_loc->width != chroma_loc->width || cu_loc->height != chroma_loc->height) && state->encoder_control->chroma_format != UVG_CSP_400; if(is_local_sep_tree || tree_type == UVG_LUMA_T) { @@ -738,11 +741,11 @@ static double cu_rd_cost_tr_split_accurate( unsigned chroma_ssd = 0; if(has_chroma) { - cu_loc_t chroma_loc; - const vector2d_t lcu_px = { (cu_loc->local_x >> 1) & ~3, (cu_loc->local_y >> 1) &~3 }; - uvg_cu_loc_ctor(&chroma_loc, lcu_px.x, lcu_px.y, width, height); - const int chroma_width = cu_loc->chroma_width; - const int chroma_height = cu_loc->chroma_height; // TODO: height for non-square blocks + cu_loc_t temp_chroma_loc; + const vector2d_t lcu_px = { chroma_loc->local_x >> 1, chroma_loc->local_y >> 1}; + uvg_cu_loc_ctor(&temp_chroma_loc, lcu_px.x, lcu_px.y, chroma_loc->width, chroma_loc->height); + const int chroma_width = chroma_loc->chroma_width; + const int chroma_height = chroma_loc->chroma_height; int8_t scan_order = SCAN_DIAG; //const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); @@ -766,8 +769,8 @@ static double cu_rd_cost_tr_split_accurate( if(chroma_can_use_tr_skip && cb_flag_v) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); } else { @@ -789,7 +792,7 @@ static double cu_rd_cost_tr_split_accurate( } const bool is_chroma_tree = is_local_sep_tree || tree_type == UVG_CHROMA_T; - if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, cu_loc)) { + if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, is_chroma_tree ? cu_loc : chroma_loc)) { const int lfnst_idx = is_chroma_tree ? tr_cu->cr_lfnst_idx : tr_cu->lfnst_idx; CABAC_FBITS_UPDATE( cabac, @@ -931,10 +934,11 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map) static double search_cu( encoder_state_t* const state, const cu_loc_t* const cu_loc, + const cu_loc_t* const chroma_loc, lcu_t* lcu, - enum uvg_tree_type - tree_type, - const split_tree_t split_tree) + enum uvg_tree_type tree_type, + const split_tree_t split_tree, + bool has_chroma) { const int depth = split_tree.current_depth; const encoder_control_t* ctrl = state->encoder_control; @@ -1091,9 +1095,8 @@ static double search_cu( double intra_cost = intra_search.cost; if (intra_cost < cost && tree_type != UVG_LUMA_T) { int8_t intra_mode = intra_search.pred_cu.intra.mode; - - // TODO: This heavily relies to square CUs - if ((cur_cu->log2_height + cur_cu->log2_width >= 6 || (x % 8 && y % 8) || tree_type == UVG_CHROMA_T) + + if ((has_chroma || tree_type == UVG_CHROMA_T) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { intra_search.pred_cu.joint_cb_cr = 0; @@ -1104,7 +1107,7 @@ static double search_cu( } intra_search.pred_cu.intra.mode_chroma = intra_search.pred_cu.intra.mode; if (ctrl->cfg.rdo >= 2 || ctrl->cfg.jccr || ctrl->cfg.lfnst) { - uvg_search_cu_intra_chroma(state, cu_loc, lcu, &intra_search, tree_type); + uvg_search_cu_intra_chroma(state, chroma_loc, lcu, &intra_search, tree_type, cu_loc->x != chroma_loc->x || cu_loc->y != chroma_loc->y); if (intra_search.pred_cu.joint_cb_cr == 0) { intra_search.pred_cu.joint_cb_cr = 4; @@ -1118,13 +1121,13 @@ static double search_cu( intra_search.pred_cu.intra.mode_chroma = 0; } uvg_intra_recon_cu(state, - &intra_search, cu_loc, + &intra_search, chroma_loc, &intra_search.pred_cu, lcu, tree_type, false, true); if(tree_type != UVG_CHROMA_T) { - intra_cost += uvg_cu_rd_cost_chroma(state, &intra_search.pred_cu, lcu, cu_loc); + intra_cost += uvg_cu_rd_cost_chroma(state, &intra_search.pred_cu, lcu, chroma_loc); } else { intra_cost = intra_search.cost; @@ -1178,7 +1181,7 @@ static double search_cu( bool recon_chroma = true; bool recon_luma = tree_type != UVG_CHROMA_T; - if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { + if ((cur_cu->log2_height + cur_cu->log2_width < 6) || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { recon_chroma = false; } lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); @@ -1189,12 +1192,12 @@ static double search_cu( recon_luma, recon_chroma); - if((cur_cu->log2_height + cur_cu->log2_width < 6 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 ) + if((cur_cu->log2_height + cur_cu->log2_width < 6 && has_chroma && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 ) || tree_type == UVG_CHROMA_T) { intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; uvg_intra_recon_cu(state, - &intra_search, cu_loc, - NULL, lcu, + &intra_search, chroma_loc, + cur_cu, lcu, tree_type, false, true); @@ -1279,9 +1282,9 @@ static double search_cu( // The cabac functions assume chroma locations whereas the search uses luma locations // for the chroma tree, therefore we need to shift the chroma coordinates here for // passing to the bit cost calculating functions. - cu_loc_t chroma_loc = *cu_loc; - chroma_loc.y >>= 1; - chroma_loc.x >>= 1; + cu_loc_t separate_tree_chroma_loc = *cu_loc; + separate_tree_chroma_loc.y >>= 1; + separate_tree_chroma_loc.x >>= 1; if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) { double bits = 0; @@ -1291,7 +1294,7 @@ static double search_cu( bits += uvg_mock_encode_coding_unit( state, cabac, - tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc, + tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, lcu, cur_cu, tree_type, @@ -1300,7 +1303,7 @@ static double search_cu( cost = bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc); + cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc, chroma_loc, has_chroma); //if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) { // cost = inter_zero_coeff_cost; @@ -1335,7 +1338,7 @@ static double search_cu( // Recursively split all the way to max search depth. if (can_split_cu) { - const int split_type = depth == 0 ? QT_SPLIT : TT_HOR_SPLIT; + const int split_type = depth == 2 ? TT_HOR_SPLIT : QT_SPLIT; const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1, @@ -1378,7 +1381,7 @@ static double search_cu( &state->search_cabac, left_cu, above_cu, - tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc, + tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, split_tree, tree_type, &split_bits); @@ -1393,11 +1396,17 @@ static double search_cu( // It is ok to interrupt the search as soon as it is known that // the split costs at least as much as not splitting. if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { - initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type); cu_loc_t new_cu_loc[4]; - const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc); + uint8_t separate_chroma = 0; + const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc, &separate_chroma); + initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type); for (int split = 0; split < splits; ++split) { - split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split); + split_cost += search_cu(state, + &new_cu_loc[split], separate_chroma ? cu_loc : &new_cu_loc[split], + &split_lcu, + tree_type, new_split, + !separate_chroma || split == splits - 1); + // If there is no separate chroma the block will always have chroma, otherwise it is the last block of the split that has the chroma if (split_cost > cost) { break; } @@ -1460,7 +1469,7 @@ static double search_cu( double mode_bits = calc_mode_bits(state, lcu, cur_cu, cu_loc) + bits; cost += mode_bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc); + cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc, chroma_loc, has_chroma); memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac)); @@ -1724,9 +1733,11 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con double cost = search_cu( state, &start, + NULL, &work_tree, tree_type, - split_tree); + split_tree, + false); // Save squared cost for rate control. if(state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) { @@ -1743,8 +1754,10 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) { cost = search_cu( state, &start, - &work_tree, - UVG_CHROMA_T, split_tree); + NULL, + &work_tree, UVG_CHROMA_T, + split_tree, + false); if (state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) { uvg_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight += cost * cost; diff --git a/src/search_intra.c b/src/search_intra.c index 07826cec..92c4903f 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -590,7 +590,7 @@ static double search_intra_trdepth( } cu_loc_t split_cu_loc[4]; - const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc); + const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); for (int i = 0; i < split_count; ++i) { split_cost += search_intra_trdepth(state, &split_cu_loc[i], nosplit_cost, search_data, lcu, tree_type); } @@ -1418,10 +1418,11 @@ int8_t uvg_search_intra_chroma_rdo( encoder_state_t * const state, int8_t num_modes, lcu_t *const lcu, + const cu_loc_t* const cu_loc, intra_search_data_t* chroma_data, int8_t luma_mode, enum uvg_tree_type tree_type, - const cu_loc_t* const cu_loc) + bool is_separate) { const bool reconstruct_chroma = true; @@ -1446,7 +1447,7 @@ int8_t uvg_search_intra_chroma_rdo( const int offset = ((cu_loc->local_x & ~7) >> 1) + ((cu_loc->local_y & ~7) >> 1)* LCU_WIDTH_C; int lfnst_modes_to_check[3]; - if((cu_loc->width == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) { + if((is_separate || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) { for (int i = 0; i < 3; ++i) { lfnst_modes_to_check[i] = i; } @@ -1528,7 +1529,7 @@ int8_t uvg_search_intra_chroma_rdo( u_resi, v_resi, &chorma_ts_out, - tree_type); + is_separate ? UVG_CHROMA_T : tree_type); // LFNST constraint failed if(chorma_ts_out.best_u_index == -1 && chorma_ts_out.best_combined_index == -1) { @@ -1590,7 +1591,8 @@ int8_t uvg_search_cu_intra_chroma( const cu_loc_t* const cu_loc, lcu_t *lcu, intra_search_data_t *search_data, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + bool is_separate) { const cu_info_t *cur_pu = &search_data->pred_cu; @@ -1604,9 +1606,7 @@ int8_t uvg_search_cu_intra_chroma( break; } } - - cu_loc_t chroma_loc; - uvg_cu_loc_ctor(&chroma_loc, cu_loc->x & ~7, cu_loc->y & ~7, cu_loc->width, cu_loc->height); + // The number of modes to select for slower chroma search. Luma mode // is always one of the modes, so 2 means the final decision is made @@ -1638,11 +1638,11 @@ int8_t uvg_search_cu_intra_chroma( num_modes = search_intra_chroma_rough(state, chroma_data, lcu, intra_mode, tree_type, - &chroma_loc); + cu_loc); } if (num_modes > 1 || state->encoder_control->cfg.jccr) { - uvg_search_intra_chroma_rdo(state, num_modes, lcu, chroma_data, intra_mode, tree_type, &chroma_loc); + uvg_search_intra_chroma_rdo(state, num_modes, lcu, cu_loc, chroma_data, intra_mode, tree_type, is_separate); } else if(cur_pu->lfnst_idx) { chroma_data[0].pred_cu.cr_lfnst_idx = cur_pu->lfnst_idx; diff --git a/src/search_intra.h b/src/search_intra.h index faa26ff1..390187b2 100644 --- a/src/search_intra.h +++ b/src/search_intra.h @@ -55,7 +55,8 @@ int8_t uvg_search_cu_intra_chroma( const cu_loc_t* const cu_loc, lcu_t *lcu, intra_search_data_t* best_cclm, - enum uvg_tree_type tree_type); + enum uvg_tree_type tree_type, + bool is_separate); void uvg_search_cu_intra( encoder_state_t * const state, diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index c352b395..d5fdb88e 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -2586,7 +2586,7 @@ static void mts_dct_generic( //const int log2_width_minus2 = uvg_g_convert_to_bit[width]; //const int log2_height_minus2 = uvg_g_convert_to_bit[height]; - if(tu->lfnst_idx || tu->cr_lfnst_idx) { + if((tu->lfnst_idx && color == COLOR_Y) || (tu->cr_lfnst_idx && color != COLOR_Y)) { if ((width == 4 && height > 4) || (width > 4 && height == 4)) { skip_width = width - 4; @@ -2639,7 +2639,7 @@ static void mts_idct_generic( const int log2_width_minus1 = uvg_g_convert_to_log2[width] - 1; const int log2_height_minus1 = uvg_g_convert_to_log2[height] - 1; - if (tu->lfnst_idx || tu->cr_lfnst_idx) { + if ((tu->lfnst_idx && color == COLOR_Y) || (tu->cr_lfnst_idx && color != COLOR_Y)) { if ((width == 4 && height > 4) || (width > 4 && height == 4)) { skip_width = width - 4; skip_height = height - 4; diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index 84373d21..01364ab1 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -66,8 +66,7 @@ static void uvg_angular_pred_generic( const int log2_width = uvg_g_convert_to_log2[width]; const int log2_height = uvg_g_convert_to_log2[height]; - // Log2_dim 1 is possible with ISP blocks - assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5)); + assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5); // assert(intra_mode >= 2 && intra_mode <= 66); static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; @@ -249,7 +248,7 @@ static void uvg_angular_pred_generic( // PDPC - bool PDPC_filter = ((tmp_width >= TR_MIN_WIDTH && tmp_height >= TR_MIN_WIDTH) || channel_type != 0); + bool PDPC_filter = ((tmp_width >= TR_MIN_WIDTH && tmp_height >= TR_MIN_WIDTH) || channel_type != 0) && multi_ref_index == 0; if (pred_mode > 1 && pred_mode < 67) { if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL. PDPC_filter = false; diff --git a/src/transform.c b/src/transform.c index 968ae440..34514e82 100644 --- a/src/transform.c +++ b/src/transform.c @@ -574,7 +574,7 @@ void uvg_chroma_transform_search( pred_cu->cr_lfnst_idx); if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue; - if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (cu_loc->width == 4 || tree_type == UVG_CHROMA_T)) { + if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) { bool constraints[2] = { false, false }; uvg_derive_lfnst_constraints(pred_cu, constraints, u_quant_coeff, width, height, NULL, COLOR_U); if(!is_jccr) { @@ -863,6 +863,8 @@ void uvg_fwd_lfnst( const uint32_t log2_height = uvg_g_convert_to_log2[height]; int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma; bool mts_skip = cur_cu->tr_idx == MTS_SKIP; + // This check is safe for 8x16 cus split with TT, since it is checking the dimensions of the + // last luma CU which will be 8x4, i.e., 3 + 2 < 6 bool is_separate_tree = cur_cu->log2_height + cur_cu->log2_width < 6 || tree_type != UVG_BOTH_T; bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83] @@ -879,12 +881,12 @@ void uvg_fwd_lfnst( if (is_cclm_mode) { intra_mode = cur_cu->intra.mode; } - if (is_mip) { + if (is_mip && color == COLOR_Y) { intra_mode = 0; // Set to planar mode } assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]"); - int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height); + int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true); // Transform wide angle mode to intra mode intra_mode = get_lfnst_intra_mode(wide_adjusted_mode); @@ -1010,12 +1012,12 @@ void uvg_inv_lfnst( if (is_cclm_mode) { intra_mode = cur_cu->intra.mip_flag ? 0 : cur_cu->intra.mode; } - if (is_mip) { + if (is_mip && color == COLOR_Y) { intra_mode = 0; // Set to planar mode } assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]"); - int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height); + int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true); intra_mode = get_lfnst_intra_mode(wide_adjusted_mode); @@ -1175,6 +1177,7 @@ static void quantize_tr_residual( cur_pu->log2_width + cur_pu-> log2_height < 6&& (x % 4 != 0 || y % 4 != 0); if (handled_elsewhere) { + assert(0); return; } @@ -1413,7 +1416,7 @@ void uvg_quantize_lcu_residual( cu_loc_t split_cu_loc[4]; uint16_t child_cbfs[3]; - const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc); + const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); for (int i = 0; i < split_count; ++i) { uvg_quantize_lcu_residual(state, luma, chroma, 0, &split_cu_loc[i], NULL, lcu, early_skip, tree_type); if(i != 0) {