[mtt] Fix small issues with luma and chroma searches

This commit is contained in:
Joose Sainio 2023-03-02 15:31:34 +02:00 committed by Marko Viitanen
parent 7a5245c5a4
commit 8c14fa94ba
14 changed files with 273 additions and 130 deletions

View file

@ -100,7 +100,7 @@ cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px)
}
void uvg_get_isp_cu_arr_coords(int *x, int *y)
void uvg_get_isp_cu_arr_coords(int *x, int *y, int dim)
{
// Do nothing if dimensions are divisible by 4
if (*y % 4 == 0 && *x % 4 == 0) return;
@ -109,7 +109,7 @@ void uvg_get_isp_cu_arr_coords(int *x, int *y)
if (remainder_y != 0) {
// Horizontal ISP split
if (remainder_y % 2 == 0) {
if (remainder_y % 2 == 0 && dim == 8) {
// 8x2 block
*y -= 2;
*x += 4;
@ -122,7 +122,7 @@ void uvg_get_isp_cu_arr_coords(int *x, int *y)
}
else {
// Vertical ISP split
if (*x % 2 == 0) {
if (*x % 2 == 0 && dim == 8) {
// 2x8 block
*y += 4;
*x -= 2;

View file

@ -245,7 +245,7 @@ typedef struct cu_array_t {
} cu_array_t;
cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px);
void uvg_get_isp_cu_arr_coords(int* x, int* y);
void uvg_get_isp_cu_arr_coords(int* x, int* y, int dim);
const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px);
cu_array_t * uvg_cu_array_alloc(const int width, const int height);

View file

@ -343,7 +343,7 @@ static void init_quant_block(
const bool needsSqrt2ScaleAdjustment,
const int gValue)
{
double lambda = state->lambda;
double lambda = color == COLOR_Y ? state->lambda : state->c_lambda;
const int qpDQ = state->qp + 1;
const int qpPer = qpDQ / 6;
@ -475,7 +475,7 @@ static void xSetLastCoeffOffset(
cbf_ctx = &state->search_cabac.ctx.qt_cbf_model_cr[cbf_is_set(cur_tu->cbf, COLOR_U)];
break;
}
cbfDeltaBits = (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 1) - (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 0);
cbfDeltaBits = compID != COLOR_Y && cur_tu->joint_cb_cr ? 0 : (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 1) - (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 0);
}
}

View file

@ -143,7 +143,7 @@ bool uvg_is_lfnst_allowed(
uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_width, cu_height, i, isp_mode, false);
int local_split_x = lcu ? split_loc.local_x : split_loc.x;
int local_split_y = lcu ? split_loc.local_y : split_loc.y;
uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y);
uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y, MAX(cu_width, cu_height));
const cu_info_t* split_cu = lcu ? LCU_GET_CU_AT_PX(lcu, local_split_x, local_split_y) :
uvg_cu_array_at_const(frame->cu_array, local_split_x, local_split_y);
@ -550,7 +550,7 @@ static void encode_transform_unit(
cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
int isp_x = x;
int isp_y = y;
uvg_get_isp_cu_arr_coords(&isp_x, &isp_y);
uvg_get_isp_cu_arr_coords(&isp_x, &isp_y, MAX(width, height));
if(cur_pu == NULL) {
cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y);
}
@ -645,7 +645,7 @@ static void encode_transform_coeff(
int x = cu_loc->x;
int y = cu_loc->y;
if (isp_split) {
uvg_get_isp_cu_arr_coords(&x, &y);
uvg_get_isp_cu_arr_coords(&x, &y, MAX(cu_loc->width, cu_loc->height));
}
//const encoder_control_t *const ctrl = state->encoder_control;

View file

@ -332,6 +332,7 @@ typedef struct encoder_state_t {
int8_t qp;
double c_lambda;
double chroma_weights[4];
/**
* \brief Whether a QP delta value must be coded for the current LCU.

View file

@ -2004,7 +2004,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
const cu_loc_t* const cu_loc,
double cost_treshold,
intra_search_data_t* const search_data,
lcu_t* const lcu) {
lcu_t* const lcu, bool* violates_lfnst) {
assert(state->search_cabac.update && "ISP reconstruction must be done with CABAC update");
double cost = 0;
@ -2012,6 +2012,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
const int height = cu_loc->height;
search_data->best_isp_cbfs = 0;
search_data->pred_cu.intra.isp_cbfs = 0;
// ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions.
// Small blocks are split only twice.
int split_type = search_data->pred_cu.intra.isp_mode;
@ -2020,11 +2021,11 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
int cbf_context = 2;
for (int i = 0; i < split_limit; ++i) {
search_data->pred_cu.intra.isp_index = i;
cu_loc_t tu_loc;
uvg_get_isp_split_loc(&tu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true);
cu_loc_t pu_loc;
uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false);
search_data->pred_cu.intra.isp_index = 0;
if (tu_loc.x % 4 == 0) {
intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data);
}
@ -2036,20 +2037,23 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
int ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
LCU_WIDTH, LCU_WIDTH,
tu_loc.width, tu_loc.height);
double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, NULL, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU);
double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, &search_data->pred_cu, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU);
int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y);
if (i + 1 != split_limit || search_data->best_isp_cbfs != 1 << (split_limit - 1)) {
if (i + 1 != split_limit || search_data->best_isp_cbfs != 0) {
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon");
}
cost += ssd + coeff_bits * state->lambda;
cbf_context = 2 + cbf;
if(violates_lfnst) *violates_lfnst |= search_data->pred_cu.violates_lfnst_constrained_luma;
search_data->pred_cu.violates_lfnst_constrained_luma = false;
search_data->best_isp_cbfs |= cbf << i;
search_data->pred_cu.intra.isp_cbfs = search_data->best_isp_cbfs;
}
search_data->pred_cu.intra.isp_index = 0;
return cost;
}

View file

@ -155,7 +155,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
const cu_loc_t* const cu_loc,
double cost_treshold,
intra_search_data_t* const search_data,
lcu_t* const lcu);
lcu_t* const lcu, bool* violates_lfnst);
int8_t uvg_get_co_located_luma_mode(
const cu_loc_t* const chroma_loc,

View file

@ -804,6 +804,11 @@ static double qp_to_lambda(encoder_state_t* const state, int qp)
state->qp = est_qp;
int8_t chroma_qp = encoder->qp_map[0][est_qp];
double tmpWeight = pow(2.0, (est_qp - chroma_qp) / 3.0);
if (state->encoder_control->cfg.dep_quant)
{
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
}
state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight;
state->c_lambda = est_lambda / tmpWeight;
ctu->qp = est_qp;
ctu->lambda = est_lambda;
@ -1174,6 +1179,11 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
if (state->encoder_control->cfg.dep_quant)
{
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
}
state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight;
state->c_lambda = state->lambda / tmpWeight;
// Apply variance adaptive quantization
@ -1201,3 +1211,23 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
lcu->adjust_qp = state->qp;
}
}
double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode)
{
const encoder_control_t * const ctrl = state->encoder_control;
double lambda = state->lambda;
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
if (state->encoder_control->cfg.dep_quant) {
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
}
lambda /= tmpWeight;
lambda *= use_jccr && state->qp > 18 ? 1.3 : 1.0;
if (jccr_mode == 1 || jccr_mode == 2) {
lambda *= 0.8;
} else if (jccr_mode == 3) {
lambda *= 0.5;
}
return lambda;
}

View file

@ -76,4 +76,6 @@ void uvg_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos);
void uvg_update_after_picture(encoder_state_t * const state);
void uvg_estimate_pic_lambda(encoder_state_t * const state);
double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode);
#endif // RATE_CONTROL_H_

View file

@ -43,6 +43,7 @@
#include "imagelist.h"
#include "inter.h"
#include "intra.h"
#include "rate_control.h"
#include "uvg266.h"
#include "rdo.h"
#include "search_inter.h"
@ -731,7 +732,8 @@ static double cu_rd_cost_tr_split_accurate(
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y);
double coeff_bits = 0;
double tr_tree_bits = 0;
double luma_bits = 0;
double chroma_bits = 0;
const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, COLOR_U);
const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, COLOR_V);
@ -743,7 +745,7 @@ static double cu_rd_cost_tr_split_accurate(
// Only need to signal coded block flag if not skipped or merged
// skip = no coded residual, merge = coded residual
if (pred_cu->type != CU_INTRA && (!pred_cu->merged)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, luma_bits, "rqt_root_cbf");
}
}
@ -768,13 +770,13 @@ static double cu_rd_cost_tr_split_accurate(
for (int i = 0; i < split_count; ++i) {
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc[i], chroma_loc ? &split_chroma_cu_loc[i] : NULL, has_chroma);
}
return sum + tr_tree_bits * state->lambda;
return sum + luma_bits * state->lambda;
}
has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && has_chroma && tree_type != UVG_LUMA_T;
if (!skip_residual_coding && has_chroma) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, chroma_bits, "cbf_cb");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, chroma_bits, "cbf_cr");
}
const int cb_flag_y = cbf_is_set(tr_cu->cbf, COLOR_Y) && tree_type != UVG_CHROMA_T;
@ -791,7 +793,7 @@ static double cu_rd_cost_tr_split_accurate(
{
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]);
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, luma_bits, "cbf_y_search");
}
}
else {
@ -802,7 +804,7 @@ static double cu_rd_cost_tr_split_accurate(
for (int i = 0; i < split_limit; i++) {
if (i != split_limit_minus_one || isp_cbf != 1 << split_limit_minus_one) {
const int flag = (isp_cbf >> i) & 1;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_luma[luma_ctx]), flag, tr_tree_bits, "cbf_y_search");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_luma[luma_ctx]), flag, luma_bits, "cbf_y_search");
luma_ctx = 2 + flag;
}
}
@ -812,7 +814,7 @@ static double cu_rd_cost_tr_split_accurate(
// TODO qp_delta_sign_flag
if ((cb_flag_u || cb_flag_v) && has_chroma && state->encoder_control->cfg.jccr) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, chroma_bits, "tu_joint_cbcr_residual_flag");
}
}
@ -834,7 +836,7 @@ static double cu_rd_cost_tr_split_accurate(
if(cb_flag_y || is_isp){
if (can_use_tr_skip) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, luma_bits, "transform_skip_flag");
}
int8_t luma_scan_mode = SCAN_DIAG;
if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
@ -872,14 +874,14 @@ static double cu_rd_cost_tr_split_accurate(
cabac,
&cabac->ctx.lfnst_idx_model[1],
lfnst_idx != 0,
tr_tree_bits,
luma_bits,
"lfnst_idx");
if (lfnst_idx > 0) {
CABAC_FBITS_UPDATE(
cabac,
&cabac->ctx.lfnst_idx_model[2],
lfnst_idx == 2,
tr_tree_bits,
luma_bits,
"lfnst_idx");
}
}
@ -903,38 +905,34 @@ static double cu_rd_cost_tr_split_accurate(
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width, chroma_height);
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[1];
unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width, chroma_height);
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[2];
chroma_ssd = ssd_u + ssd_v;
}
if(chroma_can_use_tr_skip && cb_flag_u) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, chroma_bits, "transform_skip_flag");
}
if(chroma_can_use_tr_skip && cb_flag_v) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, chroma_bits, "transform_skip_flag");
}
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
}
else {
{
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width, chroma_height);
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[3];
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width, chroma_height);
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[3];
chroma_ssd = ssd_u_joint + ssd_v_joint;
}
if (chroma_can_use_tr_skip) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, chroma_bits, "transform_skip_flag");
}
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
}
}
@ -945,14 +943,14 @@ static double cu_rd_cost_tr_split_accurate(
cabac,
&cabac->ctx.lfnst_idx_model[is_chroma_tree],
lfnst_idx != 0,
tr_tree_bits,
luma_bits,
"lfnst_idx");
if (lfnst_idx > 0) {
CABAC_FBITS_UPDATE(
cabac,
&cabac->ctx.lfnst_idx_model[2],
lfnst_idx == 2,
tr_tree_bits,
luma_bits,
"lfnst_idx");
}
}
@ -963,20 +961,20 @@ static double cu_rd_cost_tr_split_accurate(
bool symbol = tr_cu->tr_idx != 0;
int ctx_idx = 0;
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, tr_tree_bits, "mts_idx");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, luma_bits, "mts_idx");
ctx_idx++;
for (int i = 0; i < 3 && symbol; i++, ctx_idx++)
{
symbol = tr_cu->tr_idx > i + MTS_DST7_DST7 ? 1 : 0;
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, tr_tree_bits, "mts_idx");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, luma_bits, "mts_idx");
}
tr_cu->mts_last_scan_pos = false;
tr_cu->violates_mts_coeff_constraint = false;
}
double bits = tr_tree_bits + coeff_bits;
return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + bits * state->lambda;
double bits = luma_bits + coeff_bits;
return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + (bits + chroma_bits) * state->lambda;
}
@ -1378,7 +1376,8 @@ static double search_cu(
cu_loc,
0,
&intra_search,
lcu
lcu,
NULL
);
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
}
@ -1478,20 +1477,23 @@ static double search_cu(
recon_chroma = false;
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
uvg_intra_recon_cu(state,
&intra_search, cu_loc,
NULL, lcu,
tree_type,
recon_luma, recon_chroma);
if (!state->encoder_control->cfg.cclm && cur_cu->intra.isp_mode != ISP_MODE_NO_ISP) {
uvg_recon_and_estimate_cost_isp(
state,
cu_loc,
0,
&intra_search,
lcu
lcu,
NULL
);
}
else {
uvg_intra_recon_cu(state,
&intra_search, cu_loc,
NULL, lcu,
tree_type,
recon_luma, recon_chroma);
}
if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T)
@ -1518,7 +1520,7 @@ static double search_cu(
// Set isp split cbfs here
const int split_type = intra_search.pred_cu.intra.isp_mode;
const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true);
const int split_num = split_type == ISP_MODE_NO_ISP || tree_type == UVG_CHROMA_T ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true);
const int cbf_cb = cbf_is_set(cur_cu->cbf, COLOR_U);
const int cbf_cr = cbf_is_set(cur_cu->cbf, COLOR_V);
@ -1530,7 +1532,7 @@ static double search_cu(
// Fetch proper x, y coords for isp blocks
int tmp_x = isp_loc.x;
int tmp_y = isp_loc.y;
uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y);
uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y, MAX(cu_width, cu_height));
cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH);
bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1;
cbf_clear(&split_cu->cbf, COLOR_Y);
@ -1733,6 +1735,13 @@ static double search_cu(
for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) {
if (!can_split[split_type])
continue;
split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1,
split_tree.mtt_depth + (split_type != QT_SPLIT),
split_tree.implicit_mtt_depth + (split_type != QT_SPLIT && is_implicit),
0
};
if (completely_inside && check_for_early_termission(
cu_width,
@ -1798,13 +1807,6 @@ static double search_cu(
continue;
}
split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1,
split_tree.mtt_depth + (split_type != QT_SPLIT),
split_tree.implicit_mtt_depth + (split_type != QT_SPLIT && is_implicit),
0
};
state->search_cabac.update = 0;
split_cost += split_bits * state->lambda;
@ -2166,7 +2168,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
}
int tree_type = state->frame->slicetype == UVG_SLICE_I
&& state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
&& state->encoder_control->cfg.dual_tree
? UVG_LUMA_T
: UVG_BOTH_T;
cu_loc_t start;
uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH);

View file

@ -365,6 +365,7 @@ static double search_intra_trdepth(
for (trafo = mts_start; trafo < num_transforms; trafo++) {
for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) {
// Initialize lfnst variables
search_data->best_isp_cbfs = 0;
pred_cu->tr_idx = trafo;
pred_cu->tr_skip = trafo == MTS_SKIP;
pred_cu->lfnst_idx = lfnst_idx;
@ -400,8 +401,10 @@ static double search_intra_trdepth(
cu_loc,
cost_treshold,
search_data,
lcu
lcu,
&constraints[0]
);
constraints[1] = search_data->best_isp_cbfs != 0;
}
else {
uvg_intra_recon_cu(
@ -427,7 +430,7 @@ static double search_intra_trdepth(
}
}
if (trafo != MTS_SKIP && end_lfnst_idx != 0) {
if (trafo != MTS_SKIP && end_lfnst_idx != 0 && pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
uvg_derive_lfnst_constraints(
pred_cu,
constraints,
@ -438,7 +441,7 @@ static double search_intra_trdepth(
COLOR_Y);
}
if (!constraints[1] && (cbf_is_set(pred_cu->cbf, COLOR_Y) || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP)) {
if (!constraints[1] && cbf_is_set(pred_cu->cbf, COLOR_Y)) {
//end_idx = 0;
if (pred_cu->lfnst_idx > 0) {
continue;
@ -456,8 +459,8 @@ static double search_intra_trdepth(
}
double transform_bits = 0;
if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) &&
trafo != MTS_SKIP && end_lfnst_idx != 0) {
if ((!constraints[0] && constraints[1]) || lfnst_idx != 0) {
trafo != MTS_SKIP && end_lfnst_idx != 0 && (cbf_is_set(pred_cu->cbf, COLOR_Y) || search_data->best_isp_cbfs != 0)) {
if ((!constraints[0] && (constraints[1] || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP))) {
transform_bits += CTX_ENTROPY_FBITS(
&state->search_cabac.ctx.lfnst_idx_model[tree_type == UVG_LUMA_T],
lfnst_idx != 0);
@ -469,6 +472,7 @@ static double search_intra_trdepth(
}
}
if (num_transforms > 2 && trafo != MTS_SKIP
&& (cbf_is_set(pred_cu->cbf, COLOR_Y) || search_data->best_isp_cbfs != 0)
&& pred_cu->intra.isp_mode == ISP_MODE_NO_ISP
&& lfnst_idx == 0
&& width <= 32
@ -952,8 +956,9 @@ static INLINE double count_bits(
const double not_mpm_mode_bit,
const double planar_mode_flag,
const double not_planar_mode_flag,
const double not_isp_flag,
int8_t mode
)
)
{
int i = 0;
int smaller_than_pred = 0;
@ -975,7 +980,7 @@ static INLINE double count_bits(
else {
bits = not_mpm_mode_bit + 5 + (mode - smaller_than_pred > 2);
}
bits += not_mrl + not_mip;
bits += not_mrl + not_mip + not_isp_flag;
return bits;
}
@ -1030,6 +1035,7 @@ static uint8_t search_intra_rough(
const double not_mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 0);
const double planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 0);
const double not_planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 1);
const double not_isp_flag = state->encoder_control->cfg.isp && uvg_can_use_isp(width, height) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_subpart_model[0]), 0) : 0;
const uint8_t mode_list_size = state->encoder_control->cfg.mip ? 6 : 3;
struct mode_cost best_six_modes[6];
@ -1059,7 +1065,7 @@ static uint8_t search_intra_rough(
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
0) * state->lambda_sqrt;
not_isp_flag, 0) * state->lambda_sqrt;
costs[1] += count_bits(
state,
intra_preds,
@ -1069,7 +1075,7 @@ static uint8_t search_intra_rough(
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
1) * state->lambda_sqrt;
not_isp_flag, 1) * state->lambda_sqrt;
if(costs[0] < costs[1]) {
min_cost = costs[0];
max_cost = costs[1];
@ -1113,7 +1119,7 @@ static uint8_t search_intra_rough(
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
mode + i * offset) * state->lambda_sqrt;
not_isp_flag, mode + i * offset) * state->lambda_sqrt;
}
}
@ -1184,7 +1190,7 @@ static uint8_t search_intra_rough(
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
modes_to_check[block + i]) * state->lambda_sqrt;
not_isp_flag, modes_to_check[block + i]) * state->lambda_sqrt;
}
@ -1327,7 +1333,8 @@ static int8_t search_intra_rdo(
for (int mode = 0; mode < modes_to_check; mode++) {
bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false : true; // Cannot use ISP with MIP
can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL
// can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL
const uint8_t mrl_idx = search_data[mode].pred_cu.intra.multi_ref_idx;
double best_isp_cost = MAX_DOUBLE;
double best_bits = MAX_DOUBLE;
int8_t best_isp_mode = 0;
@ -1340,6 +1347,7 @@ static int8_t search_intra_rdo(
search_data[mode].pred_cu.intra.isp_mode = isp_mode;
search_data[mode].pred_cu.intra.multi_ref_idx = isp_mode == ISP_MODE_NO_ISP ? mrl_idx : 0;
double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, cu_loc, lcu);
search_data[mode].pred_cu.tr_idx = MTS_TR_NUM;
search_data[mode].bits = rdo_bitcost;
@ -1362,6 +1370,7 @@ static int8_t search_intra_rdo(
search_data[mode].cost = best_isp_cost;
search_data[mode].bits = best_bits;
search_data[mode].pred_cu.intra.isp_mode = best_isp_mode;
search_data[mode].pred_cu.intra.multi_ref_idx = best_isp_mode == ISP_MODE_NO_ISP ? mrl_idx : 0;
search_data[mode].pred_cu.tr_idx = best_mts_mode_for_isp[best_isp_mode];
search_data[mode].pred_cu.tr_skip = best_mts_mode_for_isp[best_isp_mode] == MTS_SKIP;
search_data[mode].pred_cu.lfnst_idx = best_lfnst_mode_for_isp[best_isp_mode];
@ -1482,11 +1491,13 @@ int8_t uvg_search_intra_chroma_rdo(
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
double original_c_lambda = state->c_lambda;
for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode);
chroma_data[mode_i].cost = mode_bits * state->lambda;
chroma_data[mode_i].cost = mode_bits * state->c_lambda;
chroma_data[mode_i].bits = mode_bits;
cu_info_t* pred_cu = &chroma_data[mode_i].pred_cu;
uint8_t best_lfnst_index = 0;
for (int lfnst_i = 0; lfnst_i < 3; ++lfnst_i) {
@ -1494,9 +1505,10 @@ int8_t uvg_search_intra_chroma_rdo(
if (lfnst == -1) {
continue;
}
state->c_lambda = original_c_lambda * (state->encoder_control->cfg.jccr && state->qp > 18 ? 1.3 : 1.0);
pred_cu->cr_lfnst_idx = lfnst;
chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->lambda;
if (PU_IS_TU(pred_cu) && (tree_type != UVG_CHROMA_T || (pred_cu->log2_width < 5 && pred_cu->log2_height < 5))) {
chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->c_lambda;
if (PU_IS_TU(pred_cu) && (tree_type != UVG_CHROMA_T || (pred_cu->log2_chroma_width < 5 && pred_cu->log2_chroma_height < 5))) {
uvg_intra_predict(
state,
&refs[COLOR_U - 1],
@ -1552,8 +1564,9 @@ int8_t uvg_search_intra_chroma_rdo(
continue;
}
double actual_cost = state->lambda * (chorma_ts_out.u_bits + chorma_ts_out.v_bits + mode_bits) + (chorma_ts_out.u_distortion + chorma_ts_out.v_distortion);
if(chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost < chorma_ts_out.best_combined_cost) {
chroma_data[mode_i].lfnst_costs[lfnst] += chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost;
chroma_data[mode_i].lfnst_costs[lfnst] = actual_cost;
if( chroma_data[mode_i].lfnst_costs[lfnst]
< chroma_data[mode_i].lfnst_costs[best_lfnst_index] || lfnst_i == 0) {
chroma_data[mode_i].pred_cu.joint_cb_cr = 0;
@ -1565,7 +1578,7 @@ int8_t uvg_search_intra_chroma_rdo(
}
}
else {
chroma_data[mode_i].lfnst_costs[lfnst] += chorma_ts_out.best_combined_cost;
chroma_data[mode_i].lfnst_costs[lfnst] = actual_cost;
if (chroma_data[mode_i].lfnst_costs[lfnst]
< chroma_data[mode_i].lfnst_costs[best_lfnst_index] || lfnst_i == 0) {
chroma_data[mode_i].pred_cu.joint_cb_cr = chorma_ts_out.best_combined_index;
@ -1574,10 +1587,11 @@ int8_t uvg_search_intra_chroma_rdo(
chroma_data[mode_i].cost = chroma_data[mode_i].lfnst_costs[lfnst];
}
}
}
else {
state->search_cabac.update = 1;
chroma_data[mode_i].cost = mode_bits * state->lambda;
chroma_data[mode_i].cost = mode_bits * state->c_lambda;
uvg_intra_recon_cu(state,
&chroma_data[mode_i], cu_loc,
pred_cu, lcu,
@ -1593,6 +1607,7 @@ int8_t uvg_search_intra_chroma_rdo(
}
sort_modes(chroma_data, num_modes);
state->c_lambda = original_c_lambda;
return chroma_data[0].pred_cu.intra.mode_chroma;
}

View file

@ -2608,7 +2608,7 @@ static void mts_dct_generic(
if (height == 1) {
dct_hor(input, output, shift_1st, height, 0, skip_width);
} else if (width == 1) {
dct_ver(input, output, shift_2nd, width, 0, skip_height);
dct_ver(input, output, log2_height_minus1 + 1 + bitdepth + 6 - 15, width, 0, skip_height);
} else {
dct_hor(input, tmp, shift_1st, height, 0, skip_width);
dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height);
@ -2666,9 +2666,9 @@ static void mts_idct_generic(
const int32_t shift_2nd = (transform_matrix_shift + max_log2_tr_dynamic_range - 1) - bitdepth;
if (height == 1) {
idct_hor(input, output, shift_1st, height, 0, skip_width);
idct_hor(input, output, shift_2nd + 1, height, 0, skip_width);
} else if (width == 1) {
idct_ver(input, output, shift_2nd, width, 0, skip_height);
idct_ver(input, output, shift_2nd + 1, width, 0, skip_height);
} else {
idct_ver(input, tmp, shift_1st, width, skip_width, skip_height);
idct_hor(tmp, output, shift_2nd, height, 0, skip_width);

View file

@ -37,6 +37,7 @@
#include "intra.h"
#include "uvg266.h"
#include "lfnst_tables.h"
#include "rate_control.h"
#include "rdo.h"
#include "strategies/strategies-dct.h"
#include "strategies/strategies-quant.h"
@ -362,7 +363,7 @@ static void generate_jccr_transforms(
}
}
}
costs[jccr] = d2 != 0 ? MIN(d1, d2) : d1;
costs[jccr] = jccr == 0 ? MIN(d1, d2) : d1;
}
int64_t min_dist1 = costs[0];
int64_t min_dist2 = INT64_MAX;
@ -418,8 +419,7 @@ static void generate_jccr_transforms(
static void quantize_chroma(
encoder_state_t* const state,
cu_info_t * const cur_tu,
int8_t width,
int8_t height,
const cu_loc_t* const cu_loc,
coeff_t u_coeff[5120],
coeff_t v_coeff[2048],
enum uvg_chroma_transforms transform,
@ -429,8 +429,12 @@ static void quantize_chroma(
bool* u_has_coeffs,
bool* v_has_coeffs,
uint8_t lfnst_idx,
enum uvg_tree_type tree_type)
enum uvg_tree_type tree_type,
double* u_coeff_cost,
double* v_coeff_cost)
{
int8_t width = cu_loc->chroma_width;
int8_t height = cu_loc->chroma_height;
if(state->encoder_control->cfg.dep_quant && transform != CHROMA_TS) {
int abs_sum = 0;
uvg_dep_quant(
@ -445,10 +449,23 @@ static void quantize_chroma(
&abs_sum,
state->encoder_control->cfg.scaling_list
);
cbf_clear(&cur_tu->cbf, COLOR_U);
if (abs_sum > 0) {
*u_has_coeffs = 1;
cbf_set(&cur_tu->cbf, COLOR_U);
}
*u_coeff_cost = uvg_get_coeff_cost(
state,
u_quant_coeff,
cur_tu,
cu_loc,
COLOR_U,
SCAN_DIAG,
false,
COEFF_ORDER_LINEAR);
if (transform == DCT7_CHROMA) {
abs_sum = 0;
uvg_dep_quant(
@ -463,10 +480,24 @@ static void quantize_chroma(
&abs_sum,
state->encoder_control->cfg.scaling_list
);
cbf_clear(&cur_tu->cbf, COLOR_V);
if (abs_sum > 0) {
*v_has_coeffs = 1;
cbf_set(&cur_tu->cbf, COLOR_V);
}
*v_coeff_cost = uvg_get_coeff_cost(
state,
v_quant_coeff,
cur_tu,
cu_loc,
COLOR_V,
SCAN_DIAG,
false,
COEFF_ORDER_LINEAR);
cbf_clear(&cur_tu->cbf, COLOR_U);
cbf_clear(&cur_tu->cbf, COLOR_V);
}
return;
}
@ -580,6 +611,9 @@ void uvg_chroma_transform_search(
trans_offset,
&num_transforms);
}
double lambda = state->c_lambda;
chorma_ts_out->best_u_cost = MAX_DOUBLE;
chorma_ts_out->best_v_cost = MAX_DOUBLE;
chorma_ts_out->best_combined_cost = MAX_DOUBLE;
@ -600,11 +634,27 @@ void uvg_chroma_transform_search(
uvg_fwd_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type, state->collocated_luma_mode);
}
}
uint8_t old_jccr = pred_cu->joint_cb_cr;
pred_cu->joint_cb_cr = 0;
if(is_jccr) {
state->c_lambda = lambda * (transforms[i] == JCCR_3 ? 0.5 : 0.8);
pred_cu->joint_cb_cr = transforms[i];
}
else if(state->encoder_control->cfg.dep_quant) {
state->search_cabac.update = 1;
}
double u_coeff_cost = 0;
double v_coeff_cost = 0;
unsigned ssd_u = 0;
unsigned ssd_v = 0;
double u_bits = 0;
double v_bits = 0;
quantize_chroma(
state,
pred_cu,
width,
height,
cu_loc,
&u_coeff[i * trans_offset],
&v_coeff[i * trans_offset],
transforms[i],
@ -612,8 +662,12 @@ void uvg_chroma_transform_search(
v_quant_coeff,
SCAN_DIAG,
&u_has_coeffs,
&v_has_coeffs, tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx, tree_type);
if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue;
&v_has_coeffs, tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx,
tree_type,
&u_coeff_cost,
&v_coeff_cost);
pred_cu->joint_cb_cr = old_jccr;
if (pred_cu->cr_lfnst_idx != 0 && !u_has_coeffs && !v_has_coeffs) goto reset_cabac;
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) {
bool constraints[2] = { false, false };
@ -621,10 +675,10 @@ void uvg_chroma_transform_search(
if(!is_jccr) {
uvg_derive_lfnst_constraints(pred_cu, constraints, v_quant_coeff, width, height, NULL, COLOR_V);
}
if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) continue;
if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) goto reset_cabac;
}
if (is_jccr && !u_has_coeffs) continue;
if (is_jccr && !u_has_coeffs) goto reset_cabac;
if (u_has_coeffs) {
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
@ -697,8 +751,6 @@ void uvg_chroma_transform_search(
uvg_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width);
}
unsigned ssd_u = 0;
unsigned ssd_v = 0;
if (!state->encoder_control->cfg.lossless) {
ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i],
LCU_WIDTH_C, width,
@ -706,10 +758,10 @@ void uvg_chroma_transform_search(
ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i],
LCU_WIDTH_C, width,
width, height);
ssd_u = (double)ssd_u * state->chroma_weights[1];
ssd_v = (double)ssd_v * state->chroma_weights[2];
}
double u_bits = 0;
double v_bits = 0;
state->search_cabac.update = 1;
int cbf_u = transforms[i] & 2 || (u_has_coeffs && !(transforms[i] & 1));
@ -733,7 +785,8 @@ void uvg_chroma_transform_search(
transforms[i] == CHROMA_TS, u_bits, "tr_skip_u"
);
}
double coeff_cost = uvg_get_coeff_cost(
if(u_coeff_cost == 0) {
u_coeff_cost = uvg_get_coeff_cost(
state,
u_quant_coeff,
pred_cu,
@ -742,7 +795,7 @@ void uvg_chroma_transform_search(
SCAN_DIAG,
transforms[i] == CHROMA_TS,
COEFF_ORDER_LINEAR);
u_bits += coeff_cost;
}
}
if (cbf_v && !is_jccr) {
if (can_use_tr_skip) {
@ -750,7 +803,8 @@ void uvg_chroma_transform_search(
transforms[i] == CHROMA_TS, v_bits, "tr_skip_v"
);
}
v_bits += uvg_get_coeff_cost(
if (v_coeff_cost == 0) {
v_coeff_cost = uvg_get_coeff_cost(
state,
v_quant_coeff,
pred_cu,
@ -760,6 +814,9 @@ void uvg_chroma_transform_search(
transforms[i] == CHROMA_TS,
COEFF_ORDER_LINEAR);
}
}
u_bits += u_coeff_cost;
v_bits += v_coeff_cost;
if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) {
if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc, lcu)) {
const int lfnst_idx = pred_cu->cr_lfnst_idx;
@ -781,25 +838,35 @@ void uvg_chroma_transform_search(
pred_cu->lfnst_last_scan_pos = false;
pred_cu->violates_lfnst_constrained_chroma = false;
}
if (!is_jccr) {
double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->c_lambda;
double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->c_lambda;
double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->lambda;
double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->lambda;
if (u_cost < chorma_ts_out->best_u_cost) {
chorma_ts_out->best_u_cost = u_cost;
chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL;
chorma_ts_out->u_bits = u_bits;
chorma_ts_out->u_distortion = ssd_u;
}
if (v_cost < chorma_ts_out->best_v_cost) {
chorma_ts_out->best_v_cost = v_cost;
chorma_ts_out->best_v_index = v_has_coeffs ? transforms[i] : NO_RESIDUAL;
chorma_ts_out->v_bits = v_bits;
chorma_ts_out->v_distortion = ssd_v;
}
}
else {
double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->c_lambda;
if (cost < chorma_ts_out->best_combined_cost) {
double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->lambda;
if (cost < chorma_ts_out->best_combined_cost && cost < chorma_ts_out->best_u_cost + chorma_ts_out->best_v_cost) {
chorma_ts_out->best_combined_cost = cost;
chorma_ts_out->best_combined_index = transforms[i];
chorma_ts_out->u_bits = u_bits;
chorma_ts_out->u_distortion = ssd_u;
chorma_ts_out->v_bits = v_bits;
chorma_ts_out->v_distortion = ssd_v;
}
}
reset_cabac:
memcpy(&state->search_cabac, temp_cabac, sizeof(cabac_data_t));
}
}
@ -1493,15 +1560,31 @@ void uvg_quantize_lcu_residual(
if (luma) {
quantize_tr_residual(state, COLOR_Y, &loc, cur_pu, lcu, early_skip, tree_type);
}
double c_lambda = state->c_lambda;
state->c_lambda = uvg_calculate_chroma_lambda(state, state->encoder_control->cfg.jccr, cur_pu->joint_cb_cr);
if (chroma) {
if(state->encoder_control->cfg.dep_quant) {
cabac_data_t temp_cabac;
memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t));
state->search_cabac.update = 1;
quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type);
cu_loc_t temp_chroma_loc;
uvg_cu_loc_ctor(&temp_chroma_loc, (cu_loc->x >> 1) % LCU_WIDTH_C, (cu_loc->y >> 1) % LCU_WIDTH_C, cu_loc->width, cu_loc->height);
uvg_get_coeff_cost(state, lcu->coeff.u, NULL, &temp_chroma_loc, COLOR_U, 0, (cur_pu->tr_skip & 2) >> 1, COEFF_ORDER_CU);
quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type);
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
}
else {
quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type);
quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type);
}
}
if (jccr && PU_IS_TU(cur_pu)) {
quantize_tr_residual(state, COLOR_UV, &loc, cur_pu, lcu, early_skip, tree_type);
}
if(chroma && jccr && PU_IS_TU(cur_pu)) {
assert( 0 && "Trying to quantize both jccr and regular at the same time.\n");
}
state->c_lambda = c_lambda;
}
}

View file

@ -88,6 +88,10 @@ typedef struct {
int best_u_index;
int best_v_index;
int best_combined_index;
uint64_t u_distortion;
uint64_t v_distortion;
double u_bits;
double v_bits;
} uvg_chorma_ts_out_t;
void uvg_quantize_lcu_residual(