mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
[mtt] Fix small issues with luma and chroma searches
This commit is contained in:
parent
7022e44b5c
commit
6f41929e5a
6
src/cu.c
6
src/cu.c
|
@ -100,7 +100,7 @@ cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px)
|
|||
}
|
||||
|
||||
|
||||
void uvg_get_isp_cu_arr_coords(int *x, int *y)
|
||||
void uvg_get_isp_cu_arr_coords(int *x, int *y, int dim)
|
||||
{
|
||||
// Do nothing if dimensions are divisible by 4
|
||||
if (*y % 4 == 0 && *x % 4 == 0) return;
|
||||
|
@ -109,7 +109,7 @@ void uvg_get_isp_cu_arr_coords(int *x, int *y)
|
|||
|
||||
if (remainder_y != 0) {
|
||||
// Horizontal ISP split
|
||||
if (remainder_y % 2 == 0) {
|
||||
if (remainder_y % 2 == 0 && dim == 8) {
|
||||
// 8x2 block
|
||||
*y -= 2;
|
||||
*x += 4;
|
||||
|
@ -122,7 +122,7 @@ void uvg_get_isp_cu_arr_coords(int *x, int *y)
|
|||
}
|
||||
else {
|
||||
// Vertical ISP split
|
||||
if (*x % 2 == 0) {
|
||||
if (*x % 2 == 0 && dim == 8) {
|
||||
// 2x8 block
|
||||
*y += 4;
|
||||
*x -= 2;
|
||||
|
|
2
src/cu.h
2
src/cu.h
|
@ -244,7 +244,7 @@ typedef struct cu_array_t {
|
|||
} cu_array_t;
|
||||
|
||||
cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px);
|
||||
void uvg_get_isp_cu_arr_coords(int* x, int* y);
|
||||
void uvg_get_isp_cu_arr_coords(int* x, int* y, int dim);
|
||||
const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px);
|
||||
|
||||
cu_array_t * uvg_cu_array_alloc(const int width, const int height);
|
||||
|
|
|
@ -343,7 +343,7 @@ static void init_quant_block(
|
|||
const bool needsSqrt2ScaleAdjustment,
|
||||
const int gValue)
|
||||
{
|
||||
double lambda = state->lambda;
|
||||
double lambda = color == COLOR_Y ? state->lambda : state->c_lambda;
|
||||
|
||||
const int qpDQ = state->qp + 1;
|
||||
const int qpPer = qpDQ / 6;
|
||||
|
@ -475,7 +475,7 @@ static void xSetLastCoeffOffset(
|
|||
cbf_ctx = &state->search_cabac.ctx.qt_cbf_model_cr[cbf_is_set(cur_tu->cbf, COLOR_U)];
|
||||
break;
|
||||
}
|
||||
cbfDeltaBits = (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 1) - (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 0);
|
||||
cbfDeltaBits = compID != COLOR_Y && cur_tu->joint_cb_cr ? 0 : (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 1) - (int32_t)CTX_ENTROPY_BITS(cbf_ctx, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -143,7 +143,7 @@ bool uvg_is_lfnst_allowed(
|
|||
uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_width, cu_height, i, isp_mode, false);
|
||||
int local_split_x = lcu ? split_loc.local_x : split_loc.x;
|
||||
int local_split_y = lcu ? split_loc.local_y : split_loc.y;
|
||||
uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y);
|
||||
uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y, MAX(cu_width, cu_height));
|
||||
const cu_info_t* split_cu = lcu ? LCU_GET_CU_AT_PX(lcu, local_split_x, local_split_y) :
|
||||
uvg_cu_array_at_const(frame->cu_array, local_split_x, local_split_y);
|
||||
|
||||
|
@ -550,7 +550,7 @@ static void encode_transform_unit(
|
|||
cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
|
||||
int isp_x = x;
|
||||
int isp_y = y;
|
||||
uvg_get_isp_cu_arr_coords(&isp_x, &isp_y);
|
||||
uvg_get_isp_cu_arr_coords(&isp_x, &isp_y, MAX(width, height));
|
||||
if(cur_pu == NULL) {
|
||||
cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y);
|
||||
}
|
||||
|
@ -645,7 +645,7 @@ static void encode_transform_coeff(
|
|||
int x = cu_loc->x;
|
||||
int y = cu_loc->y;
|
||||
if (isp_split) {
|
||||
uvg_get_isp_cu_arr_coords(&x, &y);
|
||||
uvg_get_isp_cu_arr_coords(&x, &y, MAX(cu_loc->width, cu_loc->height));
|
||||
}
|
||||
|
||||
//const encoder_control_t *const ctrl = state->encoder_control;
|
||||
|
|
|
@ -335,6 +335,7 @@ typedef struct encoder_state_t {
|
|||
int8_t qp;
|
||||
|
||||
double c_lambda;
|
||||
double chroma_weights[4];
|
||||
|
||||
/**
|
||||
* \brief Whether a QP delta value must be coded for the current LCU.
|
||||
|
|
12
src/intra.c
12
src/intra.c
|
@ -2004,7 +2004,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
|
|||
const cu_loc_t* const cu_loc,
|
||||
double cost_treshold,
|
||||
intra_search_data_t* const search_data,
|
||||
lcu_t* const lcu) {
|
||||
lcu_t* const lcu, bool* violates_lfnst) {
|
||||
assert(state->search_cabac.update && "ISP reconstruction must be done with CABAC update");
|
||||
double cost = 0;
|
||||
|
||||
|
@ -2012,6 +2012,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
|
|||
const int height = cu_loc->height;
|
||||
|
||||
search_data->best_isp_cbfs = 0;
|
||||
search_data->pred_cu.intra.isp_cbfs = 0;
|
||||
// ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions.
|
||||
// Small blocks are split only twice.
|
||||
int split_type = search_data->pred_cu.intra.isp_mode;
|
||||
|
@ -2020,11 +2021,11 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
|
|||
int cbf_context = 2;
|
||||
|
||||
for (int i = 0; i < split_limit; ++i) {
|
||||
search_data->pred_cu.intra.isp_index = i;
|
||||
cu_loc_t tu_loc;
|
||||
uvg_get_isp_split_loc(&tu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true);
|
||||
cu_loc_t pu_loc;
|
||||
uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false);
|
||||
search_data->pred_cu.intra.isp_index = 0;
|
||||
if (tu_loc.x % 4 == 0) {
|
||||
intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data);
|
||||
}
|
||||
|
@ -2036,20 +2037,23 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
|
|||
int ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
|
||||
LCU_WIDTH, LCU_WIDTH,
|
||||
tu_loc.width, tu_loc.height);
|
||||
double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, NULL, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU);
|
||||
double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, &search_data->pred_cu, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU);
|
||||
|
||||
|
||||
int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y);
|
||||
if (i + 1 != split_limit || search_data->best_isp_cbfs != 1 << (split_limit - 1)) {
|
||||
if (i + 1 != split_limit || search_data->best_isp_cbfs != 0) {
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon");
|
||||
}
|
||||
cost += ssd + coeff_bits * state->lambda;
|
||||
|
||||
cbf_context = 2 + cbf;
|
||||
if(violates_lfnst) *violates_lfnst |= search_data->pred_cu.violates_lfnst_constrained_luma;
|
||||
search_data->pred_cu.violates_lfnst_constrained_luma = false;
|
||||
|
||||
search_data->best_isp_cbfs |= cbf << i;
|
||||
search_data->pred_cu.intra.isp_cbfs = search_data->best_isp_cbfs;
|
||||
|
||||
}
|
||||
search_data->pred_cu.intra.isp_index = 0;
|
||||
return cost;
|
||||
}
|
|
@ -155,7 +155,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
|
|||
const cu_loc_t* const cu_loc,
|
||||
double cost_treshold,
|
||||
intra_search_data_t* const search_data,
|
||||
lcu_t* const lcu);
|
||||
lcu_t* const lcu, bool* violates_lfnst);
|
||||
|
||||
int8_t uvg_get_co_located_luma_mode(
|
||||
const cu_loc_t* const chroma_loc,
|
||||
|
|
|
@ -804,6 +804,11 @@ static double qp_to_lambda(encoder_state_t* const state, int qp)
|
|||
state->qp = est_qp;
|
||||
int8_t chroma_qp = encoder->qp_map[0][est_qp];
|
||||
double tmpWeight = pow(2.0, (est_qp - chroma_qp) / 3.0);
|
||||
if (state->encoder_control->cfg.dep_quant)
|
||||
{
|
||||
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
|
||||
}
|
||||
state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight;
|
||||
state->c_lambda = est_lambda / tmpWeight;
|
||||
ctu->qp = est_qp;
|
||||
ctu->lambda = est_lambda;
|
||||
|
@ -1174,6 +1179,11 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
|
||||
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
|
||||
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
|
||||
if (state->encoder_control->cfg.dep_quant)
|
||||
{
|
||||
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
|
||||
}
|
||||
state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight;
|
||||
state->c_lambda = state->lambda / tmpWeight;
|
||||
|
||||
// Apply variance adaptive quantization
|
||||
|
@ -1201,3 +1211,23 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
lcu->adjust_qp = state->qp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode)
|
||||
{
|
||||
const encoder_control_t * const ctrl = state->encoder_control;
|
||||
double lambda = state->lambda;
|
||||
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
|
||||
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
|
||||
}
|
||||
lambda /= tmpWeight;
|
||||
lambda *= use_jccr && state->qp > 18 ? 1.3 : 1.0;
|
||||
if (jccr_mode == 1 || jccr_mode == 2) {
|
||||
lambda *= 0.8;
|
||||
} else if (jccr_mode == 3) {
|
||||
lambda *= 0.5;
|
||||
}
|
||||
return lambda;
|
||||
}
|
|
@ -76,4 +76,6 @@ void uvg_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos);
|
|||
void uvg_update_after_picture(encoder_state_t * const state);
|
||||
void uvg_estimate_pic_lambda(encoder_state_t * const state);
|
||||
|
||||
double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode);
|
||||
|
||||
#endif // RATE_CONTROL_H_
|
||||
|
|
100
src/search.c
100
src/search.c
|
@ -43,6 +43,7 @@
|
|||
#include "imagelist.h"
|
||||
#include "inter.h"
|
||||
#include "intra.h"
|
||||
#include "rate_control.h"
|
||||
#include "uvg266.h"
|
||||
#include "rdo.h"
|
||||
#include "search_inter.h"
|
||||
|
@ -730,7 +731,8 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y);
|
||||
|
||||
double coeff_bits = 0;
|
||||
double tr_tree_bits = 0;
|
||||
double luma_bits = 0;
|
||||
double chroma_bits = 0;
|
||||
|
||||
const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, COLOR_U);
|
||||
const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, COLOR_V);
|
||||
|
@ -742,7 +744,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
// Only need to signal coded block flag if not skipped or merged
|
||||
// skip = no coded residual, merge = coded residual
|
||||
if (pred_cu->type == CU_INTER && (!pred_cu->merged)) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, luma_bits, "rqt_root_cbf");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -767,13 +769,13 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
for (int i = 0; i < split_count; ++i) {
|
||||
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc[i], chroma_loc ? &split_chroma_cu_loc[i] : NULL, has_chroma);
|
||||
}
|
||||
return sum + tr_tree_bits * state->lambda;
|
||||
return sum + luma_bits * state->lambda;
|
||||
}
|
||||
|
||||
has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && has_chroma && tree_type != UVG_LUMA_T;
|
||||
if (!skip_residual_coding && has_chroma) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, chroma_bits, "cbf_cb");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, chroma_bits, "cbf_cr");
|
||||
}
|
||||
|
||||
const int cb_flag_y = cbf_is_set(tr_cu->cbf, COLOR_Y) && tree_type != UVG_CHROMA_T;
|
||||
|
@ -790,7 +792,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
{
|
||||
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]);
|
||||
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, luma_bits, "cbf_y_search");
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -801,7 +803,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
for (int i = 0; i < split_limit; i++) {
|
||||
if (i != split_limit_minus_one || isp_cbf != 1 << split_limit_minus_one) {
|
||||
const int flag = (isp_cbf >> i) & 1;
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_luma[luma_ctx]), flag, tr_tree_bits, "cbf_y_search");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_luma[luma_ctx]), flag, luma_bits, "cbf_y_search");
|
||||
luma_ctx = 2 + flag;
|
||||
}
|
||||
}
|
||||
|
@ -811,7 +813,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
// TODO qp_delta_sign_flag
|
||||
|
||||
if ((cb_flag_u || cb_flag_v) && has_chroma && state->encoder_control->cfg.jccr) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, chroma_bits, "tu_joint_cbcr_residual_flag");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -833,7 +835,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
|
||||
if(cb_flag_y || is_isp){
|
||||
if (can_use_tr_skip) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, luma_bits, "transform_skip_flag");
|
||||
}
|
||||
int8_t luma_scan_mode = SCAN_DIAG;
|
||||
if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
|
||||
|
@ -871,14 +873,14 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
cabac,
|
||||
&cabac->ctx.lfnst_idx_model[1],
|
||||
lfnst_idx != 0,
|
||||
tr_tree_bits,
|
||||
luma_bits,
|
||||
"lfnst_idx");
|
||||
if (lfnst_idx > 0) {
|
||||
CABAC_FBITS_UPDATE(
|
||||
cabac,
|
||||
&cabac->ctx.lfnst_idx_model[2],
|
||||
lfnst_idx == 2,
|
||||
tr_tree_bits,
|
||||
luma_bits,
|
||||
"lfnst_idx");
|
||||
}
|
||||
}
|
||||
|
@ -902,38 +904,34 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width, chroma_height);
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[1];
|
||||
unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width, chroma_height);
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[2];
|
||||
chroma_ssd = ssd_u + ssd_v;
|
||||
}
|
||||
if(chroma_can_use_tr_skip && cb_flag_u) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, chroma_bits, "transform_skip_flag");
|
||||
}
|
||||
if(chroma_can_use_tr_skip && cb_flag_v) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, chroma_bits, "transform_skip_flag");
|
||||
}
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
|
||||
chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
|
||||
chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
|
||||
|
||||
}
|
||||
else {
|
||||
{
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width, chroma_height);
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[3];
|
||||
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width, chroma_height);
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, chroma_width, chroma_height) * state->chroma_weights[3];
|
||||
chroma_ssd = ssd_u_joint + ssd_v_joint;
|
||||
}
|
||||
if (chroma_can_use_tr_skip) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, chroma_bits, "transform_skip_flag");
|
||||
}
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
|
||||
chroma_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -944,14 +942,14 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
cabac,
|
||||
&cabac->ctx.lfnst_idx_model[is_chroma_tree],
|
||||
lfnst_idx != 0,
|
||||
tr_tree_bits,
|
||||
luma_bits,
|
||||
"lfnst_idx");
|
||||
if (lfnst_idx > 0) {
|
||||
CABAC_FBITS_UPDATE(
|
||||
cabac,
|
||||
&cabac->ctx.lfnst_idx_model[2],
|
||||
lfnst_idx == 2,
|
||||
tr_tree_bits,
|
||||
luma_bits,
|
||||
"lfnst_idx");
|
||||
}
|
||||
}
|
||||
|
@ -962,20 +960,20 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
|
||||
bool symbol = tr_cu->tr_idx != 0;
|
||||
int ctx_idx = 0;
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, tr_tree_bits, "mts_idx");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, luma_bits, "mts_idx");
|
||||
|
||||
ctx_idx++;
|
||||
for (int i = 0; i < 3 && symbol; i++, ctx_idx++)
|
||||
{
|
||||
symbol = tr_cu->tr_idx > i + MTS_DST7_DST7 ? 1 : 0;
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, tr_tree_bits, "mts_idx");
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.mts_idx_model[ctx_idx], symbol, luma_bits, "mts_idx");
|
||||
}
|
||||
tr_cu->mts_last_scan_pos = false;
|
||||
tr_cu->violates_mts_coeff_constraint = false;
|
||||
}
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + bits * state->lambda;
|
||||
double bits = luma_bits + coeff_bits;
|
||||
return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + (bits + chroma_bits) * state->lambda;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1374,7 +1372,8 @@ static double search_cu(
|
|||
cu_loc,
|
||||
0,
|
||||
&intra_search,
|
||||
lcu
|
||||
lcu,
|
||||
NULL
|
||||
);
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
|
||||
}
|
||||
|
@ -1447,20 +1446,23 @@ static double search_cu(
|
|||
recon_chroma = false;
|
||||
}
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
uvg_intra_recon_cu(state,
|
||||
&intra_search, cu_loc,
|
||||
NULL, lcu,
|
||||
tree_type,
|
||||
recon_luma, recon_chroma);
|
||||
if (!state->encoder_control->cfg.cclm && cur_cu->intra.isp_mode != ISP_MODE_NO_ISP) {
|
||||
uvg_recon_and_estimate_cost_isp(
|
||||
state,
|
||||
cu_loc,
|
||||
0,
|
||||
&intra_search,
|
||||
lcu
|
||||
lcu,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
else {
|
||||
uvg_intra_recon_cu(state,
|
||||
&intra_search, cu_loc,
|
||||
NULL, lcu,
|
||||
tree_type,
|
||||
recon_luma, recon_chroma);
|
||||
}
|
||||
|
||||
|
||||
if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T)
|
||||
|
@ -1487,7 +1489,7 @@ static double search_cu(
|
|||
|
||||
// Set isp split cbfs here
|
||||
const int split_type = intra_search.pred_cu.intra.isp_mode;
|
||||
const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true);
|
||||
const int split_num = split_type == ISP_MODE_NO_ISP || tree_type == UVG_CHROMA_T ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true);
|
||||
|
||||
const int cbf_cb = cbf_is_set(cur_cu->cbf, COLOR_U);
|
||||
const int cbf_cr = cbf_is_set(cur_cu->cbf, COLOR_V);
|
||||
|
@ -1499,7 +1501,7 @@ static double search_cu(
|
|||
// Fetch proper x, y coords for isp blocks
|
||||
int tmp_x = isp_loc.x;
|
||||
int tmp_y = isp_loc.y;
|
||||
uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y);
|
||||
uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y, MAX(cu_width, cu_height));
|
||||
cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH);
|
||||
bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1;
|
||||
cbf_clear(&split_cu->cbf, COLOR_Y);
|
||||
|
@ -1701,6 +1703,13 @@ static double search_cu(
|
|||
for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) {
|
||||
if (!can_split[split_type])
|
||||
continue;
|
||||
split_tree_t new_split = {
|
||||
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
|
||||
split_tree.current_depth + 1,
|
||||
split_tree.mtt_depth + (split_type != QT_SPLIT),
|
||||
split_tree.implicit_mtt_depth + (split_type != QT_SPLIT && is_implicit),
|
||||
0
|
||||
};
|
||||
|
||||
if (completely_inside && check_for_early_termission(
|
||||
cu_width,
|
||||
|
@ -1766,13 +1775,6 @@ static double search_cu(
|
|||
continue;
|
||||
}
|
||||
|
||||
split_tree_t new_split = {
|
||||
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
|
||||
split_tree.current_depth + 1,
|
||||
split_tree.mtt_depth + (split_type != QT_SPLIT),
|
||||
split_tree.implicit_mtt_depth + (split_type != QT_SPLIT && is_implicit),
|
||||
0
|
||||
};
|
||||
|
||||
state->search_cabac.update = 0;
|
||||
split_cost += split_bits * state->lambda;
|
||||
|
@ -2120,7 +2122,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
|
|||
}
|
||||
|
||||
int tree_type = state->frame->slicetype == UVG_SLICE_I
|
||||
&& state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
|
||||
&& state->encoder_control->cfg.dual_tree
|
||||
? UVG_LUMA_T
|
||||
: UVG_BOTH_T;
|
||||
|
||||
cu_loc_t start;
|
||||
uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH);
|
||||
|
|
|
@ -365,6 +365,7 @@ static double search_intra_trdepth(
|
|||
for (trafo = mts_start; trafo < num_transforms; trafo++) {
|
||||
for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) {
|
||||
// Initialize lfnst variables
|
||||
search_data->best_isp_cbfs = 0;
|
||||
pred_cu->tr_idx = trafo;
|
||||
pred_cu->tr_skip = trafo == MTS_SKIP;
|
||||
pred_cu->lfnst_idx = lfnst_idx;
|
||||
|
@ -400,8 +401,10 @@ static double search_intra_trdepth(
|
|||
cu_loc,
|
||||
cost_treshold,
|
||||
search_data,
|
||||
lcu
|
||||
lcu,
|
||||
&constraints[0]
|
||||
);
|
||||
constraints[1] = search_data->best_isp_cbfs != 0;
|
||||
}
|
||||
else {
|
||||
uvg_intra_recon_cu(
|
||||
|
@ -427,7 +430,7 @@ static double search_intra_trdepth(
|
|||
}
|
||||
}
|
||||
|
||||
if (trafo != MTS_SKIP && end_lfnst_idx != 0) {
|
||||
if (trafo != MTS_SKIP && end_lfnst_idx != 0 && pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
|
||||
uvg_derive_lfnst_constraints(
|
||||
pred_cu,
|
||||
constraints,
|
||||
|
@ -438,7 +441,7 @@ static double search_intra_trdepth(
|
|||
COLOR_Y);
|
||||
}
|
||||
|
||||
if (!constraints[1] && (cbf_is_set(pred_cu->cbf, COLOR_Y) || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP)) {
|
||||
if (!constraints[1] && cbf_is_set(pred_cu->cbf, COLOR_Y)) {
|
||||
//end_idx = 0;
|
||||
if (pred_cu->lfnst_idx > 0) {
|
||||
continue;
|
||||
|
@ -456,8 +459,8 @@ static double search_intra_trdepth(
|
|||
}
|
||||
double transform_bits = 0;
|
||||
if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) &&
|
||||
trafo != MTS_SKIP && end_lfnst_idx != 0) {
|
||||
if ((!constraints[0] && constraints[1]) || lfnst_idx != 0) {
|
||||
trafo != MTS_SKIP && end_lfnst_idx != 0 && (cbf_is_set(pred_cu->cbf, COLOR_Y) || search_data->best_isp_cbfs != 0)) {
|
||||
if ((!constraints[0] && (constraints[1] || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP))) {
|
||||
transform_bits += CTX_ENTROPY_FBITS(
|
||||
&state->search_cabac.ctx.lfnst_idx_model[tree_type == UVG_LUMA_T],
|
||||
lfnst_idx != 0);
|
||||
|
@ -469,6 +472,7 @@ static double search_intra_trdepth(
|
|||
}
|
||||
}
|
||||
if (num_transforms > 2 && trafo != MTS_SKIP
|
||||
&& (cbf_is_set(pred_cu->cbf, COLOR_Y) || search_data->best_isp_cbfs != 0)
|
||||
&& pred_cu->intra.isp_mode == ISP_MODE_NO_ISP
|
||||
&& lfnst_idx == 0
|
||||
&& width <= 32
|
||||
|
@ -952,8 +956,9 @@ static double count_bits(
|
|||
const double not_mpm_mode_bit,
|
||||
const double planar_mode_flag,
|
||||
const double not_planar_mode_flag,
|
||||
const double not_isp_flag,
|
||||
int8_t mode
|
||||
)
|
||||
)
|
||||
{
|
||||
int i = 0;
|
||||
int smaller_than_pred = 0;
|
||||
|
@ -975,7 +980,7 @@ static double count_bits(
|
|||
else {
|
||||
bits = not_mpm_mode_bit + 5 + (mode - smaller_than_pred > 2);
|
||||
}
|
||||
bits += not_mrl + not_mip;
|
||||
bits += not_mrl + not_mip + not_isp_flag;
|
||||
return bits;
|
||||
}
|
||||
|
||||
|
@ -1030,6 +1035,7 @@ static uint8_t search_intra_rough(
|
|||
const double not_mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 0);
|
||||
const double planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 0);
|
||||
const double not_planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 1);
|
||||
const double not_isp_flag = state->encoder_control->cfg.isp && uvg_can_use_isp(width, height) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_subpart_model[0]), 0) : 0;
|
||||
|
||||
const uint8_t mode_list_size = state->encoder_control->cfg.mip ? 6 : 3;
|
||||
struct mode_cost best_six_modes[6];
|
||||
|
@ -1059,7 +1065,7 @@ static uint8_t search_intra_rough(
|
|||
not_mpm_mode_bit,
|
||||
planar_mode_flag,
|
||||
not_planar_mode_flag,
|
||||
0) * state->lambda_sqrt;
|
||||
not_isp_flag, 0) * state->lambda_sqrt;
|
||||
costs[1] += count_bits(
|
||||
state,
|
||||
intra_preds,
|
||||
|
@ -1069,7 +1075,7 @@ static uint8_t search_intra_rough(
|
|||
not_mpm_mode_bit,
|
||||
planar_mode_flag,
|
||||
not_planar_mode_flag,
|
||||
1) * state->lambda_sqrt;
|
||||
not_isp_flag, 1) * state->lambda_sqrt;
|
||||
if(costs[0] < costs[1]) {
|
||||
min_cost = costs[0];
|
||||
max_cost = costs[1];
|
||||
|
@ -1113,7 +1119,7 @@ static uint8_t search_intra_rough(
|
|||
not_mpm_mode_bit,
|
||||
planar_mode_flag,
|
||||
not_planar_mode_flag,
|
||||
mode + i * offset) * state->lambda_sqrt;
|
||||
not_isp_flag, mode + i * offset) * state->lambda_sqrt;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1184,7 +1190,7 @@ static uint8_t search_intra_rough(
|
|||
not_mpm_mode_bit,
|
||||
planar_mode_flag,
|
||||
not_planar_mode_flag,
|
||||
modes_to_check[block + i]) * state->lambda_sqrt;
|
||||
not_isp_flag, modes_to_check[block + i]) * state->lambda_sqrt;
|
||||
|
||||
}
|
||||
|
||||
|
@ -1327,7 +1333,8 @@ static int8_t search_intra_rdo(
|
|||
|
||||
for (int mode = 0; mode < modes_to_check; mode++) {
|
||||
bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false : true; // Cannot use ISP with MIP
|
||||
can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL
|
||||
// can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL
|
||||
const uint8_t mrl_idx = search_data[mode].pred_cu.intra.multi_ref_idx;
|
||||
double best_isp_cost = MAX_DOUBLE;
|
||||
double best_bits = MAX_DOUBLE;
|
||||
int8_t best_isp_mode = 0;
|
||||
|
@ -1340,6 +1347,7 @@ static int8_t search_intra_rdo(
|
|||
|
||||
|
||||
search_data[mode].pred_cu.intra.isp_mode = isp_mode;
|
||||
search_data[mode].pred_cu.intra.multi_ref_idx = isp_mode == ISP_MODE_NO_ISP ? mrl_idx : 0;
|
||||
double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, cu_loc, lcu);
|
||||
search_data[mode].pred_cu.tr_idx = MTS_TR_NUM;
|
||||
search_data[mode].bits = rdo_bitcost;
|
||||
|
@ -1362,6 +1370,7 @@ static int8_t search_intra_rdo(
|
|||
search_data[mode].cost = best_isp_cost;
|
||||
search_data[mode].bits = best_bits;
|
||||
search_data[mode].pred_cu.intra.isp_mode = best_isp_mode;
|
||||
search_data[mode].pred_cu.intra.multi_ref_idx = best_isp_mode == ISP_MODE_NO_ISP ? mrl_idx : 0;
|
||||
search_data[mode].pred_cu.tr_idx = best_mts_mode_for_isp[best_isp_mode];
|
||||
search_data[mode].pred_cu.tr_skip = best_mts_mode_for_isp[best_isp_mode] == MTS_SKIP;
|
||||
search_data[mode].pred_cu.lfnst_idx = best_lfnst_mode_for_isp[best_isp_mode];
|
||||
|
@ -1482,11 +1491,13 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
|
||||
double original_c_lambda = state->c_lambda;
|
||||
|
||||
for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
|
||||
const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
|
||||
double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode);
|
||||
chroma_data[mode_i].cost = mode_bits * state->lambda;
|
||||
chroma_data[mode_i].cost = mode_bits * state->c_lambda;
|
||||
chroma_data[mode_i].bits = mode_bits;
|
||||
cu_info_t* pred_cu = &chroma_data[mode_i].pred_cu;
|
||||
uint8_t best_lfnst_index = 0;
|
||||
for (int lfnst_i = 0; lfnst_i < 3; ++lfnst_i) {
|
||||
|
@ -1494,9 +1505,10 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
if (lfnst == -1) {
|
||||
continue;
|
||||
}
|
||||
state->c_lambda = original_c_lambda * (state->encoder_control->cfg.jccr && state->qp > 18 ? 1.3 : 1.0);
|
||||
pred_cu->cr_lfnst_idx = lfnst;
|
||||
chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->lambda;
|
||||
if (PU_IS_TU(pred_cu) && (tree_type != UVG_CHROMA_T || (pred_cu->log2_width < 5 && pred_cu->log2_height < 5))) {
|
||||
chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->c_lambda;
|
||||
if (PU_IS_TU(pred_cu) && (tree_type != UVG_CHROMA_T || (pred_cu->log2_chroma_width < 5 && pred_cu->log2_chroma_height < 5))) {
|
||||
uvg_intra_predict(
|
||||
state,
|
||||
&refs[COLOR_U - 1],
|
||||
|
@ -1552,8 +1564,9 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
continue;
|
||||
}
|
||||
|
||||
double actual_cost = state->lambda * (chorma_ts_out.u_bits + chorma_ts_out.v_bits + mode_bits) + (chorma_ts_out.u_distortion + chorma_ts_out.v_distortion);
|
||||
if(chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost < chorma_ts_out.best_combined_cost) {
|
||||
chroma_data[mode_i].lfnst_costs[lfnst] += chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost;
|
||||
chroma_data[mode_i].lfnst_costs[lfnst] = actual_cost;
|
||||
if( chroma_data[mode_i].lfnst_costs[lfnst]
|
||||
< chroma_data[mode_i].lfnst_costs[best_lfnst_index] || lfnst_i == 0) {
|
||||
chroma_data[mode_i].pred_cu.joint_cb_cr = 0;
|
||||
|
@ -1565,7 +1578,7 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
}
|
||||
}
|
||||
else {
|
||||
chroma_data[mode_i].lfnst_costs[lfnst] += chorma_ts_out.best_combined_cost;
|
||||
chroma_data[mode_i].lfnst_costs[lfnst] = actual_cost;
|
||||
if (chroma_data[mode_i].lfnst_costs[lfnst]
|
||||
< chroma_data[mode_i].lfnst_costs[best_lfnst_index] || lfnst_i == 0) {
|
||||
chroma_data[mode_i].pred_cu.joint_cb_cr = chorma_ts_out.best_combined_index;
|
||||
|
@ -1574,10 +1587,11 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
chroma_data[mode_i].cost = chroma_data[mode_i].lfnst_costs[lfnst];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
state->search_cabac.update = 1;
|
||||
chroma_data[mode_i].cost = mode_bits * state->lambda;
|
||||
chroma_data[mode_i].cost = mode_bits * state->c_lambda;
|
||||
uvg_intra_recon_cu(state,
|
||||
&chroma_data[mode_i], cu_loc,
|
||||
pred_cu, lcu,
|
||||
|
@ -1593,6 +1607,7 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
}
|
||||
sort_modes(chroma_data, num_modes);
|
||||
|
||||
state->c_lambda = original_c_lambda;
|
||||
return chroma_data[0].pred_cu.intra.mode_chroma;
|
||||
}
|
||||
|
||||
|
|
|
@ -2608,7 +2608,7 @@ static void mts_dct_generic(
|
|||
if (height == 1) {
|
||||
dct_hor(input, output, shift_1st, height, 0, skip_width);
|
||||
} else if (width == 1) {
|
||||
dct_ver(input, output, shift_2nd, width, 0, skip_height);
|
||||
dct_ver(input, output, log2_height_minus1 + 1 + bitdepth + 6 - 15, width, 0, skip_height);
|
||||
} else {
|
||||
dct_hor(input, tmp, shift_1st, height, 0, skip_width);
|
||||
dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height);
|
||||
|
@ -2666,9 +2666,9 @@ static void mts_idct_generic(
|
|||
const int32_t shift_2nd = (transform_matrix_shift + max_log2_tr_dynamic_range - 1) - bitdepth;
|
||||
|
||||
if (height == 1) {
|
||||
idct_hor(input, output, shift_1st, height, 0, skip_width);
|
||||
idct_hor(input, output, shift_2nd + 1, height, 0, skip_width);
|
||||
} else if (width == 1) {
|
||||
idct_ver(input, output, shift_2nd, width, 0, skip_height);
|
||||
idct_ver(input, output, shift_2nd + 1, width, 0, skip_height);
|
||||
} else {
|
||||
idct_ver(input, tmp, shift_1st, width, skip_width, skip_height);
|
||||
idct_hor(tmp, output, shift_2nd, height, 0, skip_width);
|
||||
|
|
125
src/transform.c
125
src/transform.c
|
@ -37,6 +37,7 @@
|
|||
#include "intra.h"
|
||||
#include "uvg266.h"
|
||||
#include "lfnst_tables.h"
|
||||
#include "rate_control.h"
|
||||
#include "rdo.h"
|
||||
#include "strategies/strategies-dct.h"
|
||||
#include "strategies/strategies-quant.h"
|
||||
|
@ -362,7 +363,7 @@ static void generate_jccr_transforms(
|
|||
}
|
||||
}
|
||||
}
|
||||
costs[jccr] = d2 != 0 ? MIN(d1, d2) : d1;
|
||||
costs[jccr] = jccr == 0 ? MIN(d1, d2) : d1;
|
||||
}
|
||||
int64_t min_dist1 = costs[0];
|
||||
int64_t min_dist2 = INT64_MAX;
|
||||
|
@ -418,8 +419,7 @@ static void generate_jccr_transforms(
|
|||
static void quantize_chroma(
|
||||
encoder_state_t* const state,
|
||||
cu_info_t * const cur_tu,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
const cu_loc_t* const cu_loc,
|
||||
coeff_t u_coeff[5120],
|
||||
coeff_t v_coeff[2048],
|
||||
enum uvg_chroma_transforms transform,
|
||||
|
@ -429,8 +429,12 @@ static void quantize_chroma(
|
|||
bool* u_has_coeffs,
|
||||
bool* v_has_coeffs,
|
||||
uint8_t lfnst_idx,
|
||||
enum uvg_tree_type tree_type)
|
||||
enum uvg_tree_type tree_type,
|
||||
double* u_coeff_cost,
|
||||
double* v_coeff_cost)
|
||||
{
|
||||
int8_t width = cu_loc->chroma_width;
|
||||
int8_t height = cu_loc->chroma_height;
|
||||
if(state->encoder_control->cfg.dep_quant && transform != CHROMA_TS) {
|
||||
int abs_sum = 0;
|
||||
uvg_dep_quant(
|
||||
|
@ -445,10 +449,23 @@ static void quantize_chroma(
|
|||
&abs_sum,
|
||||
state->encoder_control->cfg.scaling_list
|
||||
);
|
||||
|
||||
cbf_clear(&cur_tu->cbf, COLOR_U);
|
||||
if (abs_sum > 0) {
|
||||
*u_has_coeffs = 1;
|
||||
cbf_set(&cur_tu->cbf, COLOR_U);
|
||||
}
|
||||
|
||||
*u_coeff_cost = uvg_get_coeff_cost(
|
||||
state,
|
||||
u_quant_coeff,
|
||||
cur_tu,
|
||||
cu_loc,
|
||||
COLOR_U,
|
||||
SCAN_DIAG,
|
||||
false,
|
||||
COEFF_ORDER_LINEAR);
|
||||
|
||||
if (transform == DCT7_CHROMA) {
|
||||
abs_sum = 0;
|
||||
uvg_dep_quant(
|
||||
|
@ -463,10 +480,24 @@ static void quantize_chroma(
|
|||
&abs_sum,
|
||||
state->encoder_control->cfg.scaling_list
|
||||
);
|
||||
|
||||
cbf_clear(&cur_tu->cbf, COLOR_V);
|
||||
if (abs_sum > 0) {
|
||||
*v_has_coeffs = 1;
|
||||
cbf_set(&cur_tu->cbf, COLOR_V);
|
||||
}
|
||||
|
||||
*v_coeff_cost = uvg_get_coeff_cost(
|
||||
state,
|
||||
v_quant_coeff,
|
||||
cur_tu,
|
||||
cu_loc,
|
||||
COLOR_V,
|
||||
SCAN_DIAG,
|
||||
false,
|
||||
COEFF_ORDER_LINEAR);
|
||||
cbf_clear(&cur_tu->cbf, COLOR_U);
|
||||
cbf_clear(&cur_tu->cbf, COLOR_V);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -580,6 +611,9 @@ void uvg_chroma_transform_search(
|
|||
trans_offset,
|
||||
&num_transforms);
|
||||
}
|
||||
|
||||
double lambda = state->c_lambda;
|
||||
|
||||
chorma_ts_out->best_u_cost = MAX_DOUBLE;
|
||||
chorma_ts_out->best_v_cost = MAX_DOUBLE;
|
||||
chorma_ts_out->best_combined_cost = MAX_DOUBLE;
|
||||
|
@ -600,11 +634,27 @@ void uvg_chroma_transform_search(
|
|||
uvg_fwd_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
}
|
||||
uint8_t old_jccr = pred_cu->joint_cb_cr;
|
||||
pred_cu->joint_cb_cr = 0;
|
||||
if(is_jccr) {
|
||||
state->c_lambda = lambda * (transforms[i] == JCCR_3 ? 0.5 : 0.8);
|
||||
pred_cu->joint_cb_cr = transforms[i];
|
||||
}
|
||||
else if(state->encoder_control->cfg.dep_quant) {
|
||||
state->search_cabac.update = 1;
|
||||
}
|
||||
|
||||
double u_coeff_cost = 0;
|
||||
double v_coeff_cost = 0;
|
||||
unsigned ssd_u = 0;
|
||||
unsigned ssd_v = 0;
|
||||
double u_bits = 0;
|
||||
double v_bits = 0;
|
||||
|
||||
quantize_chroma(
|
||||
state,
|
||||
pred_cu,
|
||||
width,
|
||||
height,
|
||||
cu_loc,
|
||||
&u_coeff[i * trans_offset],
|
||||
&v_coeff[i * trans_offset],
|
||||
transforms[i],
|
||||
|
@ -612,8 +662,12 @@ void uvg_chroma_transform_search(
|
|||
v_quant_coeff,
|
||||
SCAN_DIAG,
|
||||
&u_has_coeffs,
|
||||
&v_has_coeffs, tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx, tree_type);
|
||||
if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue;
|
||||
&v_has_coeffs, tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx,
|
||||
tree_type,
|
||||
&u_coeff_cost,
|
||||
&v_coeff_cost);
|
||||
pred_cu->joint_cb_cr = old_jccr;
|
||||
if (pred_cu->cr_lfnst_idx != 0 && !u_has_coeffs && !v_has_coeffs) goto reset_cabac;
|
||||
|
||||
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) {
|
||||
bool constraints[2] = { false, false };
|
||||
|
@ -621,10 +675,10 @@ void uvg_chroma_transform_search(
|
|||
if(!is_jccr) {
|
||||
uvg_derive_lfnst_constraints(pred_cu, constraints, v_quant_coeff, width, height, NULL, COLOR_V);
|
||||
}
|
||||
if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) continue;
|
||||
if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) goto reset_cabac;
|
||||
}
|
||||
|
||||
if (is_jccr && !u_has_coeffs) continue;
|
||||
if (is_jccr && !u_has_coeffs) goto reset_cabac;
|
||||
|
||||
if (u_has_coeffs) {
|
||||
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
|
@ -697,8 +751,6 @@ void uvg_chroma_transform_search(
|
|||
uvg_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width);
|
||||
}
|
||||
|
||||
unsigned ssd_u = 0;
|
||||
unsigned ssd_v = 0;
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i],
|
||||
LCU_WIDTH_C, width,
|
||||
|
@ -706,10 +758,10 @@ void uvg_chroma_transform_search(
|
|||
ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i],
|
||||
LCU_WIDTH_C, width,
|
||||
width, height);
|
||||
ssd_u = (double)ssd_u * state->chroma_weights[1];
|
||||
ssd_v = (double)ssd_v * state->chroma_weights[2];
|
||||
}
|
||||
|
||||
double u_bits = 0;
|
||||
double v_bits = 0;
|
||||
state->search_cabac.update = 1;
|
||||
|
||||
int cbf_u = transforms[i] & 2 || (u_has_coeffs && !(transforms[i] & 1));
|
||||
|
@ -733,7 +785,8 @@ void uvg_chroma_transform_search(
|
|||
transforms[i] == CHROMA_TS, u_bits, "tr_skip_u"
|
||||
);
|
||||
}
|
||||
double coeff_cost = uvg_get_coeff_cost(
|
||||
if(u_coeff_cost == 0) {
|
||||
u_coeff_cost = uvg_get_coeff_cost(
|
||||
state,
|
||||
u_quant_coeff,
|
||||
pred_cu,
|
||||
|
@ -742,7 +795,7 @@ void uvg_chroma_transform_search(
|
|||
SCAN_DIAG,
|
||||
transforms[i] == CHROMA_TS,
|
||||
COEFF_ORDER_LINEAR);
|
||||
u_bits += coeff_cost;
|
||||
}
|
||||
}
|
||||
if (cbf_v && !is_jccr) {
|
||||
if (can_use_tr_skip) {
|
||||
|
@ -750,7 +803,8 @@ void uvg_chroma_transform_search(
|
|||
transforms[i] == CHROMA_TS, v_bits, "tr_skip_v"
|
||||
);
|
||||
}
|
||||
v_bits += uvg_get_coeff_cost(
|
||||
if (v_coeff_cost == 0) {
|
||||
v_coeff_cost = uvg_get_coeff_cost(
|
||||
state,
|
||||
v_quant_coeff,
|
||||
pred_cu,
|
||||
|
@ -760,6 +814,9 @@ void uvg_chroma_transform_search(
|
|||
transforms[i] == CHROMA_TS,
|
||||
COEFF_ORDER_LINEAR);
|
||||
}
|
||||
}
|
||||
u_bits += u_coeff_cost;
|
||||
v_bits += v_coeff_cost;
|
||||
if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) {
|
||||
if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc, lcu)) {
|
||||
const int lfnst_idx = pred_cu->cr_lfnst_idx;
|
||||
|
@ -781,25 +838,35 @@ void uvg_chroma_transform_search(
|
|||
pred_cu->lfnst_last_scan_pos = false;
|
||||
pred_cu->violates_lfnst_constrained_chroma = false;
|
||||
}
|
||||
|
||||
if (!is_jccr) {
|
||||
double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->c_lambda;
|
||||
double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->c_lambda;
|
||||
double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->lambda;
|
||||
double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->lambda;
|
||||
if (u_cost < chorma_ts_out->best_u_cost) {
|
||||
chorma_ts_out->best_u_cost = u_cost;
|
||||
chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL;
|
||||
chorma_ts_out->u_bits = u_bits;
|
||||
chorma_ts_out->u_distortion = ssd_u;
|
||||
}
|
||||
if (v_cost < chorma_ts_out->best_v_cost) {
|
||||
chorma_ts_out->best_v_cost = v_cost;
|
||||
chorma_ts_out->best_v_index = v_has_coeffs ? transforms[i] : NO_RESIDUAL;
|
||||
chorma_ts_out->v_bits = v_bits;
|
||||
chorma_ts_out->v_distortion = ssd_v;
|
||||
}
|
||||
}
|
||||
else {
|
||||
double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->c_lambda;
|
||||
if (cost < chorma_ts_out->best_combined_cost) {
|
||||
double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->lambda;
|
||||
if (cost < chorma_ts_out->best_combined_cost && cost < chorma_ts_out->best_u_cost + chorma_ts_out->best_v_cost) {
|
||||
chorma_ts_out->best_combined_cost = cost;
|
||||
chorma_ts_out->best_combined_index = transforms[i];
|
||||
chorma_ts_out->u_bits = u_bits;
|
||||
chorma_ts_out->u_distortion = ssd_u;
|
||||
chorma_ts_out->v_bits = v_bits;
|
||||
chorma_ts_out->v_distortion = ssd_v;
|
||||
}
|
||||
}
|
||||
reset_cabac:
|
||||
memcpy(&state->search_cabac, temp_cabac, sizeof(cabac_data_t));
|
||||
}
|
||||
}
|
||||
|
@ -1493,15 +1560,31 @@ void uvg_quantize_lcu_residual(
|
|||
if (luma) {
|
||||
quantize_tr_residual(state, COLOR_Y, &loc, cur_pu, lcu, early_skip, tree_type);
|
||||
}
|
||||
double c_lambda = state->c_lambda;
|
||||
state->c_lambda = uvg_calculate_chroma_lambda(state, state->encoder_control->cfg.jccr, cur_pu->joint_cb_cr);
|
||||
if (chroma) {
|
||||
if(state->encoder_control->cfg.dep_quant) {
|
||||
cabac_data_t temp_cabac;
|
||||
memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t));
|
||||
state->search_cabac.update = 1;
|
||||
quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type);
|
||||
cu_loc_t temp_chroma_loc;
|
||||
uvg_cu_loc_ctor(&temp_chroma_loc, (cu_loc->x >> 1) % LCU_WIDTH_C, (cu_loc->y >> 1) % LCU_WIDTH_C, cu_loc->width, cu_loc->height);
|
||||
uvg_get_coeff_cost(state, lcu->coeff.u, NULL, &temp_chroma_loc, COLOR_U, 0, (cur_pu->tr_skip & 2) >> 1, COEFF_ORDER_CU);
|
||||
quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type);
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
|
||||
}
|
||||
else {
|
||||
quantize_tr_residual(state, COLOR_U, &loc, cur_pu, lcu, early_skip, tree_type);
|
||||
quantize_tr_residual(state, COLOR_V, &loc, cur_pu, lcu, early_skip, tree_type);
|
||||
}
|
||||
}
|
||||
if (jccr && PU_IS_TU(cur_pu)) {
|
||||
quantize_tr_residual(state, COLOR_UV, &loc, cur_pu, lcu, early_skip, tree_type);
|
||||
}
|
||||
if(chroma && jccr && PU_IS_TU(cur_pu)) {
|
||||
assert( 0 && "Trying to quantize both jccr and regular at the same time.\n");
|
||||
}
|
||||
state->c_lambda = c_lambda;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,6 +88,10 @@ typedef struct {
|
|||
int best_u_index;
|
||||
int best_v_index;
|
||||
int best_combined_index;
|
||||
uint64_t u_distortion;
|
||||
uint64_t v_distortion;
|
||||
double u_bits;
|
||||
double v_bits;
|
||||
} uvg_chorma_ts_out_t;
|
||||
|
||||
void uvg_quantize_lcu_residual(
|
||||
|
|
Loading…
Reference in a new issue