diff --git a/src/cu.c b/src/cu.c index 1159bc5e..d7c37108 100644 --- a/src/cu.c +++ b/src/cu.c @@ -100,7 +100,7 @@ cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px) } -void uvg_get_isp_cu_arr_coords(int *x, int *y) +void uvg_get_isp_cu_arr_coords(int *x, int *y, int dim) { // Do nothing if dimensions are divisible by 4 if (*y % 4 == 0 && *x % 4 == 0) return; @@ -109,7 +109,7 @@ void uvg_get_isp_cu_arr_coords(int *x, int *y) if (remainder_y != 0) { // Horizontal ISP split - if (remainder_y % 2 == 0) { + if (remainder_y % 2 == 0 && dim == 8) { // 8x2 block *y -= 2; *x += 4; @@ -122,7 +122,7 @@ void uvg_get_isp_cu_arr_coords(int *x, int *y) } else { // Vertical ISP split - if (*x % 2 == 0) { + if (*x % 2 == 0 && dim == 8) { // 2x8 block *y += 4; *x -= 2; diff --git a/src/cu.h b/src/cu.h index 46c1c4e2..8f3ec8bf 100644 --- a/src/cu.h +++ b/src/cu.h @@ -245,7 +245,7 @@ typedef struct cu_array_t { } cu_array_t; cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px); -void uvg_get_isp_cu_arr_coords(int* x, int* y); +void uvg_get_isp_cu_arr_coords(int* x, int* y, int dim); const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px); cu_array_t * uvg_cu_array_alloc(const int width, const int height); diff --git a/src/dep_quant.c b/src/dep_quant.c index a41bf6c5..8cb01860 100644 --- a/src/dep_quant.c +++ b/src/dep_quant.c @@ -343,7 +343,7 @@ static void init_quant_block( const bool needsSqrt2ScaleAdjustment, const int gValue) { - double lambda = state->lambda; + double lambda = color == COLOR_Y ? state->lambda : state->c_lambda; const int qpDQ = state->qp + 1; const int qpPer = qpDQ / 6; @@ -475,7 +475,7 @@ static void xSetLastCoeffOffset( cbf_ctx = &state->search_cabac.ctx.qt_cbf_model_cr[cbf_is_set(cur_tu->cbf, COLOR_U)]; break; } - cbfDeltaBits = (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 1) - (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 0); + cbfDeltaBits = compID != COLOR_Y && cur_tu->joint_cb_cr ? 0 : (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 1) - (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 0); } } diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 6cf99a61..858d89f4 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -143,7 +143,7 @@ bool uvg_is_lfnst_allowed( uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_width, cu_height, i, isp_mode, false); int local_split_x = lcu ? split_loc.local_x : split_loc.x; int local_split_y = lcu ? split_loc.local_y : split_loc.y; - uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y); + uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y, MAX(cu_width, cu_height)); const cu_info_t* split_cu = lcu ? LCU_GET_CU_AT_PX(lcu, local_split_x, local_split_y) : uvg_cu_array_at_const(frame->cu_array, local_split_x, local_split_y); @@ -550,7 +550,7 @@ static void encode_transform_unit( cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; int isp_x = x; int isp_y = y; - uvg_get_isp_cu_arr_coords(&isp_x, &isp_y); + uvg_get_isp_cu_arr_coords(&isp_x, &isp_y, MAX(width, height)); if(cur_pu == NULL) { cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y); } @@ -645,7 +645,7 @@ static void encode_transform_coeff( int x = cu_loc->x; int y = cu_loc->y; if (isp_split) { - uvg_get_isp_cu_arr_coords(&x, &y); + uvg_get_isp_cu_arr_coords(&x, &y, MAX(cu_loc->width, cu_loc->height)); } //const encoder_control_t *const ctrl = state->encoder_control; diff --git a/src/encoderstate.h b/src/encoderstate.h index 6df843d7..7afa78ab 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -332,6 +332,7 @@ typedef struct encoder_state_t { int8_t qp; double c_lambda; + double chroma_weights[4]; /** * \brief Whether a QP delta value must be coded for the current LCU. diff --git a/src/intra.c b/src/intra.c index 314f44ed..1b7026e5 100644 --- a/src/intra.c +++ b/src/intra.c @@ -2001,10 +2001,10 @@ bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, - const cu_loc_t* const cu_loc, - double cost_treshold, - intra_search_data_t* const search_data, - lcu_t* const lcu) { + const cu_loc_t* const cu_loc, + double cost_treshold, + intra_search_data_t* const search_data, + lcu_t* const lcu, bool* violates_lfnst) { assert(state->search_cabac.update && "ISP reconstruction must be done with CABAC update"); double cost = 0; @@ -2012,6 +2012,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, const int height = cu_loc->height; search_data->best_isp_cbfs = 0; + search_data->pred_cu.intra.isp_cbfs = 0; // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; @@ -2020,11 +2021,11 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, int cbf_context = 2; for (int i = 0; i < split_limit; ++i) { + search_data->pred_cu.intra.isp_index = i; cu_loc_t tu_loc; uvg_get_isp_split_loc(&tu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true); cu_loc_t pu_loc; uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false); - search_data->pred_cu.intra.isp_index = 0; if (tu_loc.x % 4 == 0) { intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data); } @@ -2036,20 +2037,23 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, int ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], LCU_WIDTH, LCU_WIDTH, tu_loc.width, tu_loc.height); - double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, NULL, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU); + double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, &search_data->pred_cu, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU); int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y); - if (i + 1 != split_limit || search_data->best_isp_cbfs != 1 << (split_limit - 1)) { + if (i + 1 != split_limit || search_data->best_isp_cbfs != 0) { CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon"); } cost += ssd + coeff_bits * state->lambda; cbf_context = 2 + cbf; + if(violates_lfnst) *violates_lfnst |= search_data->pred_cu.violates_lfnst_constrained_luma; + search_data->pred_cu.violates_lfnst_constrained_luma = false; search_data->best_isp_cbfs |= cbf << i; search_data->pred_cu.intra.isp_cbfs = search_data->best_isp_cbfs; } + search_data->pred_cu.intra.isp_index = 0; return cost; } \ No newline at end of file diff --git a/src/intra.h b/src/intra.h index 676588ec..c15b182a 100644 --- a/src/intra.h +++ b/src/intra.h @@ -152,10 +152,10 @@ void uvg_intra_recon_cu( bool recon_chroma); double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, - const cu_loc_t* const cu_loc, - double cost_treshold, - intra_search_data_t* const search_data, - lcu_t* const lcu); + const cu_loc_t* const cu_loc, + double cost_treshold, + intra_search_data_t* const search_data, + lcu_t* const lcu, bool* violates_lfnst); int8_t uvg_get_co_located_luma_mode( const cu_loc_t* const chroma_loc, diff --git a/src/rate_control.c b/src/rate_control.c index 0660f0ac..3dfa35fe 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -804,6 +804,11 @@ static double qp_to_lambda(encoder_state_t* const state, int qp) state->qp = est_qp; int8_t chroma_qp = encoder->qp_map[0][est_qp]; double tmpWeight = pow(2.0, (est_qp - chroma_qp) / 3.0); + if (state->encoder_control->cfg.dep_quant) + { + tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma) + } + state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight; state->c_lambda = est_lambda / tmpWeight; ctu->qp = est_qp; ctu->lambda = est_lambda; @@ -1174,6 +1179,11 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state, int8_t chroma_qp = ctrl->qp_map[0][state->qp]; double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0); + if (state->encoder_control->cfg.dep_quant) + { + tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma) + } + state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight; state->c_lambda = state->lambda / tmpWeight; // Apply variance adaptive quantization @@ -1201,3 +1211,23 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state, lcu->adjust_qp = state->qp; } } + + +double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode) +{ + const encoder_control_t * const ctrl = state->encoder_control; + double lambda = state->lambda; + int8_t chroma_qp = ctrl->qp_map[0][state->qp]; + double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0); + if (state->encoder_control->cfg.dep_quant) { + tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma) + } + lambda /= tmpWeight; + lambda *= use_jccr && state->qp > 18 ? 1.3 : 1.0; + if (jccr_mode == 1 || jccr_mode == 2) { + lambda *= 0.8; + } else if (jccr_mode == 3) { + lambda *= 0.5; + } + return lambda; +} \ No newline at end of file diff --git a/src/rate_control.h b/src/rate_control.h index f397e2a2..644d7fc4 100644 --- a/src/rate_control.h +++ b/src/rate_control.h @@ -76,4 +76,6 @@ void uvg_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos); void uvg_update_after_picture(encoder_state_t * const state); void uvg_estimate_pic_lambda(encoder_state_t * const state); +double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode); + #endif // RATE_CONTROL_H_ diff --git a/src/search.c b/src/search.c index f7cc5bc6..b1ac6944 100644 --- a/src/search.c +++ b/src/search.c @@ -43,6 +43,7 @@ #include "imagelist.h" #include "inter.h" #include "intra.h" +#include "rate_control.h" #include "uvg266.h" #include "rdo.h" #include "search_inter.h" @@ -731,7 +732,8 @@ static double cu_rd_cost_tr_split_accurate( cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y); double coeff_bits = 0; - double tr_tree_bits = 0; + double luma_bits = 0; + double chroma_bits = 0; const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, COLOR_U); const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, COLOR_V); @@ -743,7 +745,7 @@ static double cu_rd_cost_tr_split_accurate( // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual if (pred_cu->type != CU_INTRA && (!pred_cu->merged)) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf"); + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, luma_bits, "rqt_root_cbf"); } } @@ -768,13 +770,13 @@ static double cu_rd_cost_tr_split_accurate( for (int i = 0; i < split_count; ++i) { sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc[i], chroma_loc ? &split_chroma_cu_loc[i] : NULL, has_chroma); } - return sum + tr_tree_bits * state->lambda; + return sum + luma_bits * state->lambda; } has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && has_chroma && tree_type != UVG_LUMA_T; if (!skip_residual_coding && has_chroma) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb"); - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr"); + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, chroma_bits, "cbf_cb"); + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, chroma_bits, "cbf_cr"); } const int cb_flag_y = cbf_is_set(tr_cu->cbf, COLOR_Y) && tree_type != UVG_CHROMA_T; @@ -791,7 +793,7 @@ static double cu_rd_cost_tr_split_accurate( { cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]); - CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search"); + CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, luma_bits, "cbf_y_search"); } } else { @@ -802,7 +804,7 @@ static double cu_rd_cost_tr_split_accurate( for (int i = 0; i < split_limit; i++) { if (i != split_limit_minus_one || isp_cbf != 1 << split_limit_minus_one) { const int flag = (isp_cbf >> i) & 1; - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_luma[luma_ctx]), flag, tr_tree_bits, "cbf_y_search"); + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_luma[luma_ctx]), flag, luma_bits, "cbf_y_search"); luma_ctx = 2 + flag; } } @@ -812,7 +814,7 @@ static double cu_rd_cost_tr_split_accurate( // TODO qp_delta_sign_flag if ((cb_flag_u || cb_flag_v) && has_chroma && state->encoder_control->cfg.jccr) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, chroma_bits, "tu_joint_cbcr_residual_flag"); } } @@ -834,7 +836,7 @@ static double cu_rd_cost_tr_split_accurate( if(cb_flag_y || is_isp){ if (can_use_tr_skip) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, luma_bits, "transform_skip_flag"); } int8_t luma_scan_mode = SCAN_DIAG; if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { @@ -872,14 +874,14 @@ static double cu_rd_cost_tr_split_accurate( cabac, &cabac->ctx.lfnst_idx_model[1], lfnst_idx != 0, - tr_tree_bits, + luma_bits, "lfnst_idx"); if (lfnst_idx > 0) { CABAC_FBITS_UPDATE( cabac, &cabac->ctx.lfnst_idx_model[2], lfnst_idx == 2, - tr_tree_bits, + luma_bits, "lfnst_idx"); } } @@ -903,38 +905,34 @@ static double cu_rd_cost_tr_split_accurate( if (!state->encoder_control->cfg.lossless) { int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x; unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index], - LCU_WIDTH_C, LCU_WIDTH_C, - chroma_width, chroma_height); + LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[1]; unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index], - LCU_WIDTH_C, LCU_WIDTH_C, - chroma_width, chroma_height); + LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[2]; chroma_ssd = ssd_u + ssd_v; } if(chroma_can_use_tr_skip && cb_flag_u) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, chroma_bits, "transform_skip_flag"); } if(chroma_can_use_tr_skip && cb_flag_v) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, chroma_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); + chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); + chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); } else { { int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x; int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index], - LCU_WIDTH_C, LCU_WIDTH_C, - chroma_width, chroma_height); + LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[3]; int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index], - LCU_WIDTH_C, LCU_WIDTH_C, - chroma_width, chroma_height); + LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[3]; chroma_ssd = ssd_u_joint + ssd_v_joint; } if (chroma_can_use_tr_skip) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, chroma_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); + chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); } } @@ -945,14 +943,14 @@ static double cu_rd_cost_tr_split_accurate( cabac, &cabac->ctx.lfnst_idx_model[is_chroma_tree], lfnst_idx != 0, - tr_tree_bits, + luma_bits, "lfnst_idx"); if (lfnst_idx > 0) { CABAC_FBITS_UPDATE( cabac, &cabac->ctx.lfnst_idx_model[2], lfnst_idx == 2, - tr_tree_bits, + luma_bits, "lfnst_idx"); } } @@ -963,20 +961,20 @@ static double cu_rd_cost_tr_split_accurate( bool symbol = tr_cu->tr_idx != 0; int ctx_idx = 0; - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, tr_tree_bits, "mts_idx"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, luma_bits, "mts_idx"); ctx_idx++; for (int i = 0; i < 3 && symbol; i++, ctx_idx++) { symbol = tr_cu->tr_idx > i + MTS_DST7_DST7 ? 1 : 0; - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, tr_tree_bits, "mts_idx"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, luma_bits, "mts_idx"); } tr_cu->mts_last_scan_pos = false; tr_cu->violates_mts_coeff_constraint = false; } - double bits = tr_tree_bits + coeff_bits; - return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + bits * state->lambda; + double bits = luma_bits + coeff_bits; + return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + (bits + chroma_bits) * state->lambda; } @@ -1378,7 +1376,8 @@ static double search_cu( cu_loc, 0, &intra_search, - lcu + lcu, + NULL ); memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); } @@ -1478,20 +1477,23 @@ static double search_cu( recon_chroma = false; } lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); - uvg_intra_recon_cu(state, - &intra_search, cu_loc, - NULL, lcu, - tree_type, - recon_luma, recon_chroma); if (!state->encoder_control->cfg.cclm && cur_cu->intra.isp_mode != ISP_MODE_NO_ISP) { uvg_recon_and_estimate_cost_isp( state, cu_loc, 0, &intra_search, - lcu + lcu, + NULL ); } + else { + uvg_intra_recon_cu(state, + &intra_search, cu_loc, + NULL, lcu, + tree_type, + recon_luma, recon_chroma); + } if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) @@ -1518,7 +1520,7 @@ static double search_cu( // Set isp split cbfs here const int split_type = intra_search.pred_cu.intra.isp_mode; - const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); + const int split_num = split_type == ISP_MODE_NO_ISP || tree_type == UVG_CHROMA_T ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); const int cbf_cb = cbf_is_set(cur_cu->cbf, COLOR_U); const int cbf_cr = cbf_is_set(cur_cu->cbf, COLOR_V); @@ -1530,7 +1532,7 @@ static double search_cu( // Fetch proper x, y coords for isp blocks int tmp_x = isp_loc.x; int tmp_y = isp_loc.y; - uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y); + uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y, MAX(cu_width, cu_height)); cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH); bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1; cbf_clear(&split_cu->cbf, COLOR_Y); @@ -1733,6 +1735,13 @@ static double search_cu( for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) { if (!can_split[split_type]) continue; + split_tree_t new_split = { + split_tree.split_tree | split_type << (split_tree.current_depth * 3), + split_tree.current_depth + 1, + split_tree.mtt_depth + (split_type != QT_SPLIT), + split_tree.implicit_mtt_depth + (split_type != QT_SPLIT && is_implicit), + 0 + }; if (completely_inside && check_for_early_termission( cu_width, @@ -1798,13 +1807,6 @@ static double search_cu( continue; } - split_tree_t new_split = { - split_tree.split_tree | split_type << (split_tree.current_depth * 3), - split_tree.current_depth + 1, - split_tree.mtt_depth + (split_type != QT_SPLIT), - split_tree.implicit_mtt_depth + (split_type != QT_SPLIT && is_implicit), - 0 - }; state->search_cabac.update = 0; split_cost += split_bits * state->lambda; @@ -2166,7 +2168,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con } int tree_type = state->frame->slicetype == UVG_SLICE_I - && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T; + && state->encoder_control->cfg.dual_tree + ? UVG_LUMA_T + : UVG_BOTH_T; cu_loc_t start; uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH); diff --git a/src/search_intra.c b/src/search_intra.c index 9416f122..2e507f95 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -365,6 +365,7 @@ static double search_intra_trdepth( for (trafo = mts_start; trafo < num_transforms; trafo++) { for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) { // Initialize lfnst variables + search_data->best_isp_cbfs = 0; pred_cu->tr_idx = trafo; pred_cu->tr_skip = trafo == MTS_SKIP; pred_cu->lfnst_idx = lfnst_idx; @@ -400,8 +401,10 @@ static double search_intra_trdepth( cu_loc, cost_treshold, search_data, - lcu + lcu, + &constraints[0] ); + constraints[1] = search_data->best_isp_cbfs != 0; } else { uvg_intra_recon_cu( @@ -427,7 +430,7 @@ static double search_intra_trdepth( } } - if (trafo != MTS_SKIP && end_lfnst_idx != 0) { + if (trafo != MTS_SKIP && end_lfnst_idx != 0 && pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { uvg_derive_lfnst_constraints( pred_cu, constraints, @@ -438,7 +441,7 @@ static double search_intra_trdepth( COLOR_Y); } - if (!constraints[1] && (cbf_is_set(pred_cu->cbf, COLOR_Y) || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP)) { + if (!constraints[1] && cbf_is_set(pred_cu->cbf, COLOR_Y)) { //end_idx = 0; if (pred_cu->lfnst_idx > 0) { continue; @@ -456,8 +459,8 @@ static double search_intra_trdepth( } double transform_bits = 0; if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && - trafo != MTS_SKIP && end_lfnst_idx != 0) { - if ((!constraints[0] && constraints[1]) || lfnst_idx != 0) { + trafo != MTS_SKIP && end_lfnst_idx != 0 && (cbf_is_set(pred_cu->cbf, COLOR_Y) || search_data->best_isp_cbfs != 0)) { + if ((!constraints[0] && (constraints[1] || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP))) { transform_bits += CTX_ENTROPY_FBITS( &state->search_cabac.ctx.lfnst_idx_model[tree_type == UVG_LUMA_T], lfnst_idx != 0); @@ -469,6 +472,7 @@ static double search_intra_trdepth( } } if (num_transforms > 2 && trafo != MTS_SKIP + && (cbf_is_set(pred_cu->cbf, COLOR_Y) || search_data->best_isp_cbfs != 0) && pred_cu->intra.isp_mode == ISP_MODE_NO_ISP && lfnst_idx == 0 && width <= 32 @@ -952,8 +956,9 @@ static INLINE double count_bits( const double not_mpm_mode_bit, const double planar_mode_flag, const double not_planar_mode_flag, + const double not_isp_flag, int8_t mode - ) +) { int i = 0; int smaller_than_pred = 0; @@ -975,7 +980,7 @@ static INLINE double count_bits( else { bits = not_mpm_mode_bit + 5 + (mode - smaller_than_pred > 2); } - bits += not_mrl + not_mip; + bits += not_mrl + not_mip + not_isp_flag; return bits; } @@ -1023,13 +1028,14 @@ static uint8_t search_intra_rough( int8_t mode; double cost; }; - + const double not_mrl = state->encoder_control->cfg.mrl && (cu_loc->y % LCU_WIDTH) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 0) : 0; const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0; const double mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 1); const double not_mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 0); const double planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 0); const double not_planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 1); + const double not_isp_flag = state->encoder_control->cfg.isp && uvg_can_use_isp(width, height) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_subpart_model[0]), 0) : 0; const uint8_t mode_list_size = state->encoder_control->cfg.mip ? 6 : 3; struct mode_cost best_six_modes[6]; @@ -1059,7 +1065,7 @@ static uint8_t search_intra_rough( not_mpm_mode_bit, planar_mode_flag, not_planar_mode_flag, - 0) * state->lambda_sqrt; + not_isp_flag, 0) * state->lambda_sqrt; costs[1] += count_bits( state, intra_preds, @@ -1069,7 +1075,7 @@ static uint8_t search_intra_rough( not_mpm_mode_bit, planar_mode_flag, not_planar_mode_flag, - 1) * state->lambda_sqrt; + not_isp_flag, 1) * state->lambda_sqrt; if(costs[0] < costs[1]) { min_cost = costs[0]; max_cost = costs[1]; @@ -1113,7 +1119,7 @@ static uint8_t search_intra_rough( not_mpm_mode_bit, planar_mode_flag, not_planar_mode_flag, - mode + i * offset) * state->lambda_sqrt; + not_isp_flag, mode + i * offset) * state->lambda_sqrt; } } @@ -1184,7 +1190,7 @@ static uint8_t search_intra_rough( not_mpm_mode_bit, planar_mode_flag, not_planar_mode_flag, - modes_to_check[block + i]) * state->lambda_sqrt; + not_isp_flag, modes_to_check[block + i]) * state->lambda_sqrt; } @@ -1327,7 +1333,8 @@ static int8_t search_intra_rdo( for (int mode = 0; mode < modes_to_check; mode++) { bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false : true; // Cannot use ISP with MIP - can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL + // can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL + const uint8_t mrl_idx = search_data[mode].pred_cu.intra.multi_ref_idx; double best_isp_cost = MAX_DOUBLE; double best_bits = MAX_DOUBLE; int8_t best_isp_mode = 0; @@ -1340,6 +1347,7 @@ static int8_t search_intra_rdo( search_data[mode].pred_cu.intra.isp_mode = isp_mode; + search_data[mode].pred_cu.intra.multi_ref_idx = isp_mode == ISP_MODE_NO_ISP ? mrl_idx : 0; double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, cu_loc, lcu); search_data[mode].pred_cu.tr_idx = MTS_TR_NUM; search_data[mode].bits = rdo_bitcost; @@ -1362,6 +1370,7 @@ static int8_t search_intra_rdo( search_data[mode].cost = best_isp_cost; search_data[mode].bits = best_bits; search_data[mode].pred_cu.intra.isp_mode = best_isp_mode; + search_data[mode].pred_cu.intra.multi_ref_idx = best_isp_mode == ISP_MODE_NO_ISP ? mrl_idx : 0; search_data[mode].pred_cu.tr_idx = best_mts_mode_for_isp[best_isp_mode]; search_data[mode].pred_cu.tr_skip = best_mts_mode_for_isp[best_isp_mode] == MTS_SKIP; search_data[mode].pred_cu.lfnst_idx = best_lfnst_mode_for_isp[best_isp_mode]; @@ -1482,11 +1491,13 @@ int8_t uvg_search_intra_chroma_rdo( ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; + double original_c_lambda = state->c_lambda; for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) { const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma; double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode); - chroma_data[mode_i].cost = mode_bits * state->lambda; + chroma_data[mode_i].cost = mode_bits * state->c_lambda; + chroma_data[mode_i].bits = mode_bits; cu_info_t* pred_cu = &chroma_data[mode_i].pred_cu; uint8_t best_lfnst_index = 0; for (int lfnst_i = 0; lfnst_i < 3; ++lfnst_i) { @@ -1494,9 +1505,10 @@ int8_t uvg_search_intra_chroma_rdo( if (lfnst == -1) { continue; } + state->c_lambda = original_c_lambda * (state->encoder_control->cfg.jccr && state->qp > 18 ? 1.3 : 1.0); pred_cu->cr_lfnst_idx = lfnst; - chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->lambda; - if (PU_IS_TU(pred_cu) && (tree_type != UVG_CHROMA_T || (pred_cu->log2_width < 5 && pred_cu->log2_height < 5))) { + chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->c_lambda; + if (PU_IS_TU(pred_cu) && (tree_type != UVG_CHROMA_T || (pred_cu->log2_chroma_width < 5 && pred_cu->log2_chroma_height < 5))) { uvg_intra_predict( state, &refs[COLOR_U - 1], @@ -1552,8 +1564,9 @@ int8_t uvg_search_intra_chroma_rdo( continue; } + double actual_cost = state->lambda * (chorma_ts_out.u_bits + chorma_ts_out.v_bits + mode_bits) + (chorma_ts_out.u_distortion + chorma_ts_out.v_distortion); if(chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost < chorma_ts_out.best_combined_cost) { - chroma_data[mode_i].lfnst_costs[lfnst] += chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost; + chroma_data[mode_i].lfnst_costs[lfnst] = actual_cost; if( chroma_data[mode_i].lfnst_costs[lfnst] < chroma_data[mode_i].lfnst_costs[best_lfnst_index] || lfnst_i == 0) { chroma_data[mode_i].pred_cu.joint_cb_cr = 0; @@ -1565,7 +1578,7 @@ int8_t uvg_search_intra_chroma_rdo( } } else { - chroma_data[mode_i].lfnst_costs[lfnst] += chorma_ts_out.best_combined_cost; + chroma_data[mode_i].lfnst_costs[lfnst] = actual_cost; if (chroma_data[mode_i].lfnst_costs[lfnst] < chroma_data[mode_i].lfnst_costs[best_lfnst_index] || lfnst_i == 0) { chroma_data[mode_i].pred_cu.joint_cb_cr = chorma_ts_out.best_combined_index; @@ -1574,10 +1587,11 @@ int8_t uvg_search_intra_chroma_rdo( chroma_data[mode_i].cost = chroma_data[mode_i].lfnst_costs[lfnst]; } } + } else { state->search_cabac.update = 1; - chroma_data[mode_i].cost = mode_bits * state->lambda; + chroma_data[mode_i].cost = mode_bits * state->c_lambda; uvg_intra_recon_cu(state, &chroma_data[mode_i], cu_loc, pred_cu, lcu, @@ -1593,6 +1607,7 @@ int8_t uvg_search_intra_chroma_rdo( } sort_modes(chroma_data, num_modes); + state->c_lambda = original_c_lambda; return chroma_data[0].pred_cu.intra.mode_chroma; } diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index 2a673d21..ccddf17a 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -2608,7 +2608,7 @@ static void mts_dct_generic( if (height == 1) { dct_hor(input, output, shift_1st, height, 0, skip_width); } else if (width == 1) { - dct_ver(input, output, shift_2nd, width, 0, skip_height); + dct_ver(input, output, log2_height_minus1 + 1 + bitdepth + 6 - 15, width, 0, skip_height); } else { dct_hor(input, tmp, shift_1st, height, 0, skip_width); dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height); @@ -2666,9 +2666,9 @@ static void mts_idct_generic( const int32_t shift_2nd = (transform_matrix_shift + max_log2_tr_dynamic_range - 1) - bitdepth; if (height == 1) { - idct_hor(input, output, shift_1st, height, 0, skip_width); + idct_hor(input, output, shift_2nd + 1, height, 0, skip_width); } else if (width == 1) { - idct_ver(input, output, shift_2nd, width, 0, skip_height); + idct_ver(input, output, shift_2nd + 1, width, 0, skip_height); } else { idct_ver(input, tmp, shift_1st, width, skip_width, skip_height); idct_hor(tmp, output, shift_2nd, height, 0, skip_width); diff --git a/src/transform.c b/src/transform.c index 783d9f2b..58051a87 100644 --- a/src/transform.c +++ b/src/transform.c @@ -37,6 +37,7 @@ #include "intra.h" #include "uvg266.h" #include "lfnst_tables.h" +#include "rate_control.h" #include "rdo.h" #include "strategies/strategies-dct.h" #include "strategies/strategies-quant.h" @@ -362,7 +363,7 @@ static void generate_jccr_transforms( } } } - costs[jccr] = d2 != 0 ? MIN(d1, d2) : d1; + costs[jccr] = jccr == 0 ? MIN(d1, d2) : d1; } int64_t min_dist1 = costs[0]; int64_t min_dist2 = INT64_MAX; @@ -418,8 +419,7 @@ static void generate_jccr_transforms( static void quantize_chroma( encoder_state_t* const state, cu_info_t * const cur_tu, - int8_t width, - int8_t height, + const cu_loc_t* const cu_loc, coeff_t u_coeff[5120], coeff_t v_coeff[2048], enum uvg_chroma_transforms transform, @@ -428,9 +428,13 @@ static void quantize_chroma( const coeff_scan_order_t scan_order, bool* u_has_coeffs, bool* v_has_coeffs, - uint8_t lfnst_idx, - enum uvg_tree_type tree_type) + uint8_t lfnst_idx, + enum uvg_tree_type tree_type, + double* u_coeff_cost, + double* v_coeff_cost) { + int8_t width = cu_loc->chroma_width; + int8_t height = cu_loc->chroma_height; if(state->encoder_control->cfg.dep_quant && transform != CHROMA_TS) { int abs_sum = 0; uvg_dep_quant( @@ -445,10 +449,23 @@ static void quantize_chroma( &abs_sum, state->encoder_control->cfg.scaling_list ); + + cbf_clear(&cur_tu->cbf, COLOR_U); if (abs_sum > 0) { *u_has_coeffs = 1; cbf_set(&cur_tu->cbf, COLOR_U); } + + *u_coeff_cost = uvg_get_coeff_cost( + state, + u_quant_coeff, + cur_tu, + cu_loc, + COLOR_U, + SCAN_DIAG, + false, + COEFF_ORDER_LINEAR); + if (transform == DCT7_CHROMA) { abs_sum = 0; uvg_dep_quant( @@ -463,10 +480,24 @@ static void quantize_chroma( &abs_sum, state->encoder_control->cfg.scaling_list ); + + cbf_clear(&cur_tu->cbf, COLOR_V); if (abs_sum > 0) { *v_has_coeffs = 1; + cbf_set(&cur_tu->cbf, COLOR_V); } + + *v_coeff_cost = uvg_get_coeff_cost( + state, + v_quant_coeff, + cur_tu, + cu_loc, + COLOR_V, + SCAN_DIAG, + false, + COEFF_ORDER_LINEAR); cbf_clear(&cur_tu->cbf, COLOR_U); + cbf_clear(&cur_tu->cbf, COLOR_V); } return; } @@ -580,6 +611,9 @@ void uvg_chroma_transform_search( trans_offset, &num_transforms); } + + double lambda = state->c_lambda; + chorma_ts_out->best_u_cost = MAX_DOUBLE; chorma_ts_out->best_v_cost = MAX_DOUBLE; chorma_ts_out->best_combined_cost = MAX_DOUBLE; @@ -600,11 +634,27 @@ void uvg_chroma_transform_search( uvg_fwd_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type, state->collocated_luma_mode); } } + uint8_t old_jccr = pred_cu->joint_cb_cr; + pred_cu->joint_cb_cr = 0; + if(is_jccr) { + state->c_lambda = lambda * (transforms[i] == JCCR_3 ? 0.5 : 0.8); + pred_cu->joint_cb_cr = transforms[i]; + } + else if(state->encoder_control->cfg.dep_quant) { + state->search_cabac.update = 1; + } + + double u_coeff_cost = 0; + double v_coeff_cost = 0; + unsigned ssd_u = 0; + unsigned ssd_v = 0; + double u_bits = 0; + double v_bits = 0; + quantize_chroma( state, pred_cu, - width, - height, + cu_loc, &u_coeff[i * trans_offset], &v_coeff[i * trans_offset], transforms[i], @@ -612,8 +662,12 @@ void uvg_chroma_transform_search( v_quant_coeff, SCAN_DIAG, &u_has_coeffs, - &v_has_coeffs, tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx, tree_type); - if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue; + &v_has_coeffs, tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx, + tree_type, + &u_coeff_cost, + &v_coeff_cost); + pred_cu->joint_cb_cr = old_jccr; + if (pred_cu->cr_lfnst_idx != 0 && !u_has_coeffs && !v_has_coeffs) goto reset_cabac; if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) { bool constraints[2] = { false, false }; @@ -621,10 +675,10 @@ void uvg_chroma_transform_search( if(!is_jccr) { uvg_derive_lfnst_constraints(pred_cu, constraints, v_quant_coeff, width, height, NULL, COLOR_V); } - if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) continue; + if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) goto reset_cabac; } - if (is_jccr && !u_has_coeffs) continue; + if (is_jccr && !u_has_coeffs) goto reset_cabac; if (u_has_coeffs) { uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, @@ -697,8 +751,6 @@ void uvg_chroma_transform_search( uvg_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width); } - unsigned ssd_u = 0; - unsigned ssd_v = 0; if (!state->encoder_control->cfg.lossless) { ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i], LCU_WIDTH_C, width, @@ -706,10 +758,10 @@ void uvg_chroma_transform_search( ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i], LCU_WIDTH_C, width, width, height); + ssd_u = (double)ssd_u * state->chroma_weights[1]; + ssd_v = (double)ssd_v * state->chroma_weights[2]; } - double u_bits = 0; - double v_bits = 0; state->search_cabac.update = 1; int cbf_u = transforms[i] & 2 || (u_has_coeffs && !(transforms[i] & 1)); @@ -733,16 +785,17 @@ void uvg_chroma_transform_search( transforms[i] == CHROMA_TS, u_bits, "tr_skip_u" ); } - double coeff_cost = uvg_get_coeff_cost( - state, - u_quant_coeff, - pred_cu, - cu_loc, - COLOR_U, - SCAN_DIAG, - transforms[i] == CHROMA_TS, - COEFF_ORDER_LINEAR); - u_bits += coeff_cost; + if(u_coeff_cost == 0) { + u_coeff_cost = uvg_get_coeff_cost( + state, + u_quant_coeff, + pred_cu, + cu_loc, + COLOR_U, + SCAN_DIAG, + transforms[i] == CHROMA_TS, + COEFF_ORDER_LINEAR); + } } if (cbf_v && !is_jccr) { if (can_use_tr_skip) { @@ -750,16 +803,20 @@ void uvg_chroma_transform_search( transforms[i] == CHROMA_TS, v_bits, "tr_skip_v" ); } - v_bits += uvg_get_coeff_cost( - state, - v_quant_coeff, - pred_cu, - cu_loc, - COLOR_V, - SCAN_DIAG, - transforms[i] == CHROMA_TS, - COEFF_ORDER_LINEAR); + if (v_coeff_cost == 0) { + v_coeff_cost = uvg_get_coeff_cost( + state, + v_quant_coeff, + pred_cu, + cu_loc, + COLOR_V, + SCAN_DIAG, + transforms[i] == CHROMA_TS, + COEFF_ORDER_LINEAR); + } } + u_bits += u_coeff_cost; + v_bits += v_coeff_cost; if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) { if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc, lcu)) { const int lfnst_idx = pred_cu->cr_lfnst_idx; @@ -781,25 +838,35 @@ void uvg_chroma_transform_search( pred_cu->lfnst_last_scan_pos = false; pred_cu->violates_lfnst_constrained_chroma = false; } + if (!is_jccr) { - double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->c_lambda; - double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->c_lambda; + double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->lambda; + double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->lambda; if (u_cost < chorma_ts_out->best_u_cost) { chorma_ts_out->best_u_cost = u_cost; chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL; + chorma_ts_out->u_bits = u_bits; + chorma_ts_out->u_distortion = ssd_u; } if (v_cost < chorma_ts_out->best_v_cost) { chorma_ts_out->best_v_cost = v_cost; chorma_ts_out->best_v_index = v_has_coeffs ? transforms[i] : NO_RESIDUAL; + chorma_ts_out->v_bits = v_bits; + chorma_ts_out->v_distortion = ssd_v; } } else { - double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->c_lambda; - if (cost < chorma_ts_out->best_combined_cost) { + double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->lambda; + if (cost < chorma_ts_out->best_combined_cost && cost < chorma_ts_out->best_u_cost + chorma_ts_out->best_v_cost) { chorma_ts_out->best_combined_cost = cost; chorma_ts_out->best_combined_index = transforms[i]; + chorma_ts_out->u_bits = u_bits; + chorma_ts_out->u_distortion = ssd_u; + chorma_ts_out->v_bits = v_bits; + chorma_ts_out->v_distortion = ssd_v; } } +reset_cabac: memcpy(&state->search_cabac, temp_cabac, sizeof(cabac_data_t)); } } @@ -1493,9 +1560,24 @@ void uvg_quantize_lcu_residual( if (luma) { quantize_tr_residual(state, COLOR_Y, &loc, cur_pu, lcu, early_skip, tree_type); } + double c_lambda = state->c_lambda; + state->c_lambda = uvg_calculate_chroma_lambda(state, state->encoder_control->cfg.jccr, cur_pu->joint_cb_cr); if (chroma) { - quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type); - quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type); + if(state->encoder_control->cfg.dep_quant) { + cabac_data_t temp_cabac; + memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t)); + state->search_cabac.update = 1; + quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type); + cu_loc_t temp_chroma_loc; + uvg_cu_loc_ctor(&temp_chroma_loc, (cu_loc->x >> 1) % LCU_WIDTH_C, (cu_loc->y >> 1) % LCU_WIDTH_C, cu_loc->width, cu_loc->height); + uvg_get_coeff_cost(state, lcu->coeff.u, NULL, &temp_chroma_loc, COLOR_U, 0, (cur_pu->tr_skip & 2) >> 1, COEFF_ORDER_CU); + quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type); + memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); + } + else { + quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type); + quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type); + } } if (jccr && PU_IS_TU(cur_pu)) { quantize_tr_residual(state, COLOR_UV, &loc, cur_pu, lcu, early_skip, tree_type); @@ -1503,5 +1585,6 @@ void uvg_quantize_lcu_residual( if(chroma && jccr && PU_IS_TU(cur_pu)) { assert( 0 && "Trying to quantize both jccr and regular at the same time.\n"); } + state->c_lambda = c_lambda; } } diff --git a/src/transform.h b/src/transform.h index d2b95ca8..be485f46 100644 --- a/src/transform.h +++ b/src/transform.h @@ -88,6 +88,10 @@ typedef struct { int best_u_index; int best_v_index; int best_combined_index; + uint64_t u_distortion; + uint64_t v_distortion; + double u_bits; + double v_bits; } uvg_chorma_ts_out_t; void uvg_quantize_lcu_residual(