This commit is contained in:
Joose Sainio 2022-11-16 12:27:28 +02:00 committed by Marko Viitanen
parent bbbd391b9e
commit f19084569d
16 changed files with 185 additions and 107 deletions

View file

@ -77,6 +77,8 @@ typedef struct
cabac_ctx_t mts_idx_model[4];
cabac_ctx_t split_flag_model[9]; //!< \brief split flag context models
cabac_ctx_t qt_split_flag_model[6]; //!< \brief qt split flag context models
cabac_ctx_t mtt_vertical_model[5];
cabac_ctx_t mtt_binary_model[4];
cabac_ctx_t intra_luma_mpm_flag_model; //!< \brief intra mode context models
cabac_ctx_t intra_subpart_model[2]; //!< \brief intra sub part context models
cabac_ctx_t chroma_pred_model;

View file

@ -222,6 +222,22 @@ int uvg_config_init(uvg_config *cfg)
cfg->cabac_debug_file_name = NULL;
cfg->dual_tree = 0;
cfg->min_qt_size[0] = 4;
cfg->min_qt_size[1] = 4;
cfg->min_qt_size[2] = 4;
cfg->max_btt_depth[0] = 1;
cfg->max_btt_depth[1] = 0;
cfg->max_btt_depth[2] = 0;
cfg->max_tt_size[0] = 64;
cfg->max_bt_size[0] = 64;
cfg->max_tt_size[1] = 64;
cfg->max_bt_size[1] = 64;
cfg->max_tt_size[2] = 64;
cfg->max_bt_size[2] = 64;
cfg->intra_rough_search_levels = 2;
cfg->ibc = 0;

View file

@ -50,6 +50,21 @@ static const uint8_t INIT_QT_SPLIT_FLAG[4][6] = {
{ 0, 8, 8, 12, 12, 8, },
};
static const uint8_t INIT_VERTICAL_SPLIT_FLAG[4][5] = {
{ 43, 42, 37, 42, 44, },
{ 43, 35, 37, 34, 52, },
{ 43, 42, 29, 27, 44, },
{ 9, 8, 9, 8, 5, },
};
static const uint8_t INIT_BINARY_SPLIT_FLAG[4][4] = {
{ 28, 29, 28, 29, },
{ 43, 37, 21, 22, },
{ 36, 45, 36, 45, },
{ 12, 13, 12, 13, },
};
static const uint8_t INIT_SKIP_FLAG[4][3] = {
{ 57, 60, 46, },
{ 57, 59, 45, },
@ -574,6 +589,11 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
uvg_ctx_init(&cabac->ctx.part_size_model[i], QP, INIT_PART_SIZE[slice][i], INIT_PART_SIZE[3][i]);
uvg_ctx_init(&cabac->ctx.bdpcm_mode[i], QP, BDPCM_MODE_INIT[slice][i], BDPCM_MODE_INIT[3][i]);
uvg_ctx_init(&cabac->ctx.qt_cbf_model_luma[i], QP, INIT_QT_CBF[slice][i], INIT_QT_CBF[3][i]);
uvg_ctx_init(&cabac->ctx.mtt_binary_model[i], QP, INIT_BINARY_SPLIT_FLAG[slice][i], INIT_BINARY_SPLIT_FLAG[3][i]);
}
for (i = 0; i < 5; i++) {
uvg_ctx_init(&cabac->ctx.mtt_vertical_model[i], QP, INIT_VERTICAL_SPLIT_FLAG[slice][i], INIT_VERTICAL_SPLIT_FLAG[3][i]);
}
for (i = 0; i < 6; i++) {

View file

@ -105,6 +105,7 @@ enum split_type {
typedef struct {
uint32_t split_tree;
uint8_t current_depth;
uint8_t mtt_depth;
} split_tree_t;

View file

@ -1199,14 +1199,13 @@ void uvg_encode_intra_luma_coding_unit(
}
bool uvg_write_split_flag(
uint8_t uvg_write_split_flag(
const encoder_state_t* const state,
cabac_data_t* cabac,
const cu_info_t* left_cu,
const cu_info_t* above_cu,
const cu_loc_t* const cu_loc,
const uint32_t split_tree,
int depth,
split_tree_t split_tree,
enum uvg_tree_type tree_type,
double* bits_out)
{
@ -1217,15 +1216,15 @@ bool uvg_write_split_flag(
// Implisit split flag when on border
// Exception made in VVC with flag not being implicit if the BT can be used for
// horizontal or vertical split, then this flag tells if QT or BT is used
const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1;
bool no_split, allow_qt, bh_split, bv_split, th_split, tv_split;
no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true;
if (depth > MAX_DEPTH) allow_qt = false;
// ToDo: update this when btt is actually used
bool allow_btt = true;// when mt_depth < MAX_BT_DEPTH
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
if (cu_width == state->encoder_control->cfg.min_qt_size[slice_type] || split_tree.mtt_depth > 0) allow_qt = false;
bool allow_btt = state->encoder_control->cfg.max_btt_depth[slice_type] > split_tree.mtt_depth && cu_width <= 64;
uint8_t implicit_split_mode = UVG_NO_SPLIT;
//bool implicit_split = border;
@ -1255,10 +1254,16 @@ bool uvg_write_split_flag(
if (!allow_btt) {
bh_split = bv_split = th_split = tv_split = false;
}
else {
bv_split &= cu_width <= state->encoder_control->cfg.max_bt_size[slice_type];
tv_split &= cu_width <= state->encoder_control->cfg.max_tt_size[slice_type];
bh_split &= cu_height <= state->encoder_control->cfg.max_bt_size[slice_type];
th_split &= cu_height <= state->encoder_control->cfg.max_tt_size[slice_type];
}
bool allow_split = allow_qt | bh_split | bv_split | th_split | tv_split;
int split_flag = (split_tree >> (depth * 3)) & 7;
int split_flag = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7;
split_flag = implicit_split_mode != UVG_NO_SPLIT ? implicit_split_mode : split_flag;
@ -1286,33 +1291,41 @@ bool uvg_write_split_flag(
cabac->cur_ctx = &(cabac->ctx.split_flag_model[split_model]);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != NO_SPLIT, bits, "split_cu_flag");
}
if (implicit_split_mode == UVG_NO_SPLIT && allow_qt && (bh_split || bv_split || th_split || tv_split) && split_flag != NO_SPLIT) {
bool qt_split = split_flag == QT_SPLIT;
if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) {
split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "QT_split_flag");
if((bv_split || bh_split || tv_split || th_split) && allow_qt) {
split_model = (left_cu && GET_SPLITDATA(left_cu, split_tree.current_depth)) + (above_cu && GET_SPLITDATA(above_cu, split_tree.current_depth)) + (split_tree.current_depth < 2 ? 0 : 3);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "qt_split_flag");
}
// Only signal split when it is not implicit, currently only Qt split supported
if (!(implicit_split_mode == UVG_NO_SPLIT) && !qt_split && (bh_split | bv_split | th_split | tv_split)) {
if (!qt_split) {
const bool is_vertical = split_flag == BT_VER_SPLIT || split_flag == TT_VER_SPLIT;
if((bh_split || th_split) && (bv_split || tv_split)) {
split_model = 0;
// TODO: These are incorrect
if (left_cu && (1 << left_cu->log2_height) > cu_height) {
split_model++;
if(bv_split + tv_split > bh_split + th_split) {
split_model = 4;
} else if(bv_split + tv_split < bh_split + th_split) {
split_model = 3;
} else {
const int d_a = cu_width / (above_cu ? (1 << above_cu->log2_width) : 1);
const int d_l = cu_height / (left_cu ? (1 << left_cu->log2_height) : 1);
if(d_a != d_l && above_cu && left_cu) {
split_model = d_a < d_l ? 1 : 2;
}
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_vertical_model[split_model]), is_vertical, bits, "mtt_vertical_flag");
}
if ((bv_split && tv_split && is_vertical) || (bh_split && th_split && !is_vertical)) {
split_model = 2 * is_vertical + split_tree.mtt_depth <= 1;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_binary_model[split_model]),
split_flag == BT_VER_SPLIT || split_flag == BT_HOR_SPLIT, bits, "mtt_binary_flag");
}
}
}
if (above_cu && (1 << above_cu->log2_width) > cu_width) {
split_model++;
}
split_model += (depth > 2 ? 0 : 3);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "split_cu_mode");
}
if (bits_out) *bits_out += bits;
return split_flag;
}
@ -1322,7 +1335,7 @@ void uvg_encode_coding_tree(
lcu_coeff_t *coeff,
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc,
const split_tree_t split_tree)
split_tree_t split_tree)
{
cabac_data_t * const cabac = &state->cabac;
const encoder_control_t * const ctrl = state->encoder_control;
@ -1332,7 +1345,6 @@ void uvg_encode_coding_tree(
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const int half_cu = cu_width >> 1;
const int x = cu_loc->x;
const int y = cu_loc->y;
@ -1357,9 +1369,9 @@ void uvg_encode_coding_tree(
int32_t frame_height = tree_type != UVG_CHROMA_T ? ctrl->in.height : ctrl->in.height / 2;
// Check for slice border
bool border_x = frame_width < abs_x + cu_width;
bool border_y = frame_height < abs_y + cu_width;
bool border_split_x = frame_width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + half_cu;
bool border_split_y = frame_height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + half_cu;
bool border_y = frame_height < abs_y + cu_height;
bool border_split_x = frame_width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + cu_width / 2;
bool border_split_y = frame_height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + cu_height / 2;
bool border = border_x || border_y; /*!< are we in any border CU */
if (depth <= state->frame->max_qp_delta_depth) {
@ -1368,21 +1380,20 @@ void uvg_encode_coding_tree(
// When not in MAX_DEPTH, insert split flag and split the blocks if needed
if (cu_width + cu_height > 8) {
split_tree.split_tree = cur_cu->split_tree;
const int split_flag = uvg_write_split_flag(
state,
cabac,
left_cu,
above_cu,
cu_loc,
cur_cu->split_tree,
depth,
split_tree,
tree_type,
NULL);
if (split_flag || border) {
const int half_luma = cu_loc->width / 2;
split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 };
const split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1, split_tree.mtt_depth + (split_flag != QT_SPLIT)};
cu_loc_t new_cu_loc[4];
const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc);
@ -1650,7 +1661,8 @@ double uvg_mock_encode_coding_unit(
const cu_loc_t* const cu_loc,
lcu_t* lcu,
cu_info_t* cur_cu,
enum uvg_tree_type tree_type) {
enum uvg_tree_type tree_type,
const split_tree_t split_tree) {
double bits = 0;
const encoder_control_t* const ctrl = state->encoder_control;
@ -1692,8 +1704,7 @@ double uvg_mock_encode_coding_unit(
left_cu,
above_cu,
cu_loc,
cur_cu->split_tree,
depth,
split_tree,
tree_type,
&bits);
}

View file

@ -54,7 +54,7 @@ void uvg_encode_coding_tree(
lcu_coeff_t *coeff,
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc,
const split_tree_t split_tree);
split_tree_t split_tree);
void uvg_encode_ts_residual(encoder_state_t* const state,
cabac_data_t* const cabac,
@ -77,7 +77,8 @@ double uvg_mock_encode_coding_unit(
const cu_loc_t* const cu_loc,
lcu_t* lcu,
cu_info_t* cur_cu,
enum uvg_tree_type tree_type);
enum uvg_tree_type tree_type,
const split_tree_t split_tree);
int uvg_encode_inter_prediction_unit(
encoder_state_t* const state,
@ -96,14 +97,13 @@ void uvg_encode_intra_luma_coding_unit(
double* bits_out);
bool uvg_write_split_flag(
uint8_t uvg_write_split_flag(
const encoder_state_t* const state,
cabac_data_t* cabac,
const cu_info_t* left_cu,
const cu_info_t* above_cu,
const cu_loc_t* const cu_loc,
const uint32_t split_tree,
int depth,
split_tree_t,
enum uvg_tree_type tree_type,
double* bits_out);

View file

@ -529,10 +529,10 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
// if(!no_partition_constraints_override_constraint_flag)
WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
if (encoder->cfg.max_intra_slice_btt_depth) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
WRITE_UE(stream, encoder->cfg.max_btt_depth[0], "sps_max_mtt_hierarchy_depth_intra_slice_luma");
if (encoder->cfg.max_btt_depth[0]) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
}
if (encoder->chroma_format != UVG_CSP_400)
@ -541,17 +541,17 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
}
if (encoder->cfg.dual_tree) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth_chroma, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
if (encoder->cfg.max_intra_slice_btt_depth_chroma) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
WRITE_UE(stream, encoder->cfg.max_btt_depth[2], "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
if (encoder->cfg.max_btt_depth[2]) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
}
}
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice");
WRITE_UE(stream, encoder->cfg.max_inter_slice_btt_depth, "sps_max_mtt_hierarchy_depth_inter_slice");
if (encoder->cfg.max_inter_slice_btt_depth != 0) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
WRITE_UE(stream, encoder->cfg.max_btt_depth[1], "sps_max_mtt_hierarchy_depth_inter_slice");
if (encoder->cfg.max_btt_depth[1] != 0) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
}
if (LCU_WIDTH > 32)

View file

@ -883,7 +883,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
//Encode coding tree
cu_loc_t start;
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
split_tree_t split_tree = { 0, 0 };
split_tree_t split_tree = { 0, 0, 0 };
uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, split_tree);

View file

@ -712,12 +712,13 @@ void uvg_rdoq_sign_hiding(
const int32_t last_pos,
const coeff_t *const coeffs,
coeff_t *const quant_coeffs,
const int8_t color)
const int8_t color,
const bool need_sqrt_adjust)
{
const encoder_control_t * const ctrl = state->encoder_control;
const double lambda = color ? state->c_lambda : state->lambda;
int inv_quant = uvg_g_inv_quant_scales[qp_scaled % 6];
int inv_quant = uvg_g_inv_quant_scales[need_sqrt_adjust][qp_scaled % 6];
// This somehow scales quant_delta into fractional bits. Instead of the bits
// being multiplied by lambda, the residual is divided by it, or something
// like that.
@ -1203,7 +1204,7 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
const bool needs_sqrt2_scale = false; // from VTM: should always be false - transform-skipped blocks don't require sqrt(2) compensation.
const int q_bits = QUANT_SHIFT + qp_scaled / 6 + (needs_sqrt2_scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
const int32_t quant_coeff = uvg_g_quant_scales[qp_scaled % 6];
const int32_t quant_coeff = uvg_g_quant_scales[needs_sqrt2_scale][qp_scaled % 6];
const double error_scale = (double)(1 << CTX_FRAC_BITS) / quant_coeff / quant_coeff;
@ -1416,8 +1417,10 @@ void uvg_rdoq(
cabac_data_t * const cabac = &state->cabac;
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
bool needs_block_size_trafo_scale = !false && ((log2_block_width + log2_block_height) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1); // Represents scaling through forward transform
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1) + needs_block_size_trafo_scale; // Represents scaling through forward transform
uint16_t go_rice_param = 0;
uint32_t reg_bins = (width * height * 28) >> 4;
@ -1789,7 +1792,7 @@ void uvg_rdoq(
}
if (encoder->cfg.signhide_enable && abs_sum >= 2) {
uvg_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, color);
uvg_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, color, needs_block_size_trafo_scale);
}
}

View file

@ -88,8 +88,14 @@ static const int32_t g_quant_inter_default_8x8[64] =
24, 25, 28, 33, 41, 54, 71, 91
};
const int16_t uvg_g_quant_scales[6] = {26214, 23302, 20560, 18396, 16384, 14564};
const int16_t uvg_g_inv_quant_scales[6] = {40, 45, 51, 57, 64, 72};
const int16_t uvg_g_quant_scales[2][6] = {
{26214, 23302, 20560, 18396, 16384, 14564},
{ 18396,16384,14564,13107,11651,10280 }
};
const int16_t uvg_g_inv_quant_scales[2][6] = {
{40, 45, 51, 57, 64, 72},
{ 57,64,72,80,90,102 }
};
/**
@ -406,11 +412,11 @@ void uvg_scalinglist_set(scaling_list_t* const scaling_list, const int32_t* cons
int32_t* quantcoeff = (int32_t*)scaling_list->quant_coeff[size_id_x][size_id_y][listId][qp];
int32_t* dequantcoeff = (int32_t*)scaling_list->de_quant_coeff[size_id_x][size_id_y][listId][qp];
// Encoder list
uvg_scalinglist_process_enc(coeff, quantcoeff, uvg_g_quant_scales[qp] << 4, height, width, ratio,
// Encoder list TODO: the sqrt adjusted lists
uvg_scalinglist_process_enc(coeff, quantcoeff, uvg_g_quant_scales[0][qp] << 4, height, width, ratio,
MIN(8, g_scaling_list_size_x[size_id_x]), dc, !scaling_list->enable);
// Decoder list
scalinglist_process_dec(coeff, dequantcoeff, uvg_g_inv_quant_scales[qp], height, width, ratio,
scalinglist_process_dec(coeff, dequantcoeff, uvg_g_inv_quant_scales[0][qp], height, width, ratio,
MIN(8, g_scaling_list_size_x[size_id_x]), dc, !scaling_list->enable);

View file

@ -1294,7 +1294,8 @@ static double search_cu(
tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc,
lcu,
cur_cu,
tree_type);
tree_type,
split_tree);
cost = bits * state->lambda;
@ -1335,7 +1336,11 @@ static double search_cu(
// Recursively split all the way to max search depth.
if (can_split_cu) {
const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT;
const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1 };
const split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1,
split_tree.mtt_depth + (split_type != QT_SPLIT),
};
double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf);
@ -1374,8 +1379,7 @@ static double search_cu(
left_cu,
above_cu,
tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc,
new_split.split_tree,
depth,
split_tree,
tree_type,
&split_bits);
}
@ -1394,7 +1398,7 @@ static double search_cu(
const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc);
for (int split = 0; split < splits; ++split) {
split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split);
if (split_cost < cost) {
if (split_cost > cost) {
break;
}
}
@ -1426,7 +1430,7 @@ static double search_cu(
double bits = 0;
uvg_write_split_flag(state, &state->search_cabac,
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, cu_loc, cur_cu->split_tree, depth, tree_type, &bits);
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, cu_loc, split_tree, tree_type, &bits);
cur_cu->intra = cu_d1->intra;
cur_cu->type = CU_INTRA;
@ -1715,7 +1719,7 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
cu_loc_t start;
uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH);
split_tree_t split_tree = { 0, 0 };
split_tree_t split_tree = { 0, 0, 0 };
// Start search from depth 0.
double cost = search_cu(
state,

View file

@ -2125,8 +2125,6 @@ void uvg_cu_cost_inter_rd2(
double* inter_bitcost,
const cu_loc_t* const cu_loc){
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const int x_px = SUB_SCU(cu_loc->x);
const int y_px = SUB_SCU(cu_loc->y);
const int width = cu_loc->width;
@ -2160,12 +2158,24 @@ void uvg_cu_cost_inter_rd2(
double no_cbf_bits;
double bits = 0;
const int skip_context = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, NULL);
int8_t depth = 0;
int8_t mtt_depth = 0;
uint32_t splits = cur_cu->split_tree;
while (splits & 7) {
if ((splits & 7) != QT_SPLIT) {
mtt_depth++;
}
depth++;
splits >>= 3;
}
const split_tree_t splitt_tree = { cur_cu->split_tree, depth, mtt_depth };
if (cur_cu->merged) {
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost;
bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T);
bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree);
}
else {
no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T);
no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree);
bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1);
}
double no_cbf_cost = ssd + no_cbf_bits * state->lambda;

View file

@ -386,11 +386,13 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); //!< Represents scaling through forward transform
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift);
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale);
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
const int32_t q_bits8 = q_bits - 8;
@ -792,13 +794,15 @@ void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
int32_t n;
const uint32_t log2_tr_width = uvg_g_convert_to_log2[width];
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); // Represents scaling through forward transform
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1);
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size// Represents scaling through forward transform
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift);
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale);
if (encoder->scaling_list.enable)
{
@ -822,7 +826,7 @@ void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
}
}
} else {
int32_t scale = uvg_g_inv_quant_scales[qp_scaled%6] << (qp_scaled/6);
int32_t scale = uvg_g_inv_quant_scales[needs_block_size_trafo_scale][qp_scaled%6] << (qp_scaled/6);
add = 1 << (shift-1);
__m256i v_scale = _mm256_set1_epi32(scale);

View file

@ -68,11 +68,12 @@ void uvg_quant_generic(
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); //!< Represents scaling through forward transform
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1) - needs_block_size_trafo_scale; //!< Represents scaling through forward transform
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift );
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
const int32_t q_bits8 = q_bits - 8;
@ -592,11 +593,13 @@ void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); // Represents scaling through forward transform
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift);
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale);
if (encoder->scaling_list.enable)
{
@ -620,7 +623,7 @@ void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
}
}
} else {
int32_t scale = uvg_g_inv_quant_scales[qp_scaled%6] << (qp_scaled/6);
int32_t scale = uvg_g_inv_quant_scales[needs_block_size_trafo_scale][qp_scaled%6] << (qp_scaled/6);
add = 1 << (shift-1);
for (n = 0; n < width * height; n++) {

View file

@ -44,8 +44,8 @@
#include "global.h" // IWYU pragma: keep
extern const uint8_t uvg_g_chroma_scale[58];
extern const int16_t uvg_g_inv_quant_scales[6];
extern const int16_t uvg_g_quant_scales[6];
extern const int16_t uvg_g_inv_quant_scales[2][6];
extern const int16_t uvg_g_quant_scales[2][6];
#define COEFF_ORDER_LINEAR 0
#define COEFF_ORDER_CU 1

View file

@ -543,13 +543,11 @@ typedef struct uvg_config
uint8_t dual_tree;
uint8_t min_qt_size[3];
uint8_t min_qt_size[3]; /* intra, inter, dual tree chroma*/
uint8_t max_bt_size[3];
uint8_t max_tt_size[3];
uint8_t max_intra_slice_btt_depth;
uint8_t max_intra_slice_btt_depth_chroma;
uint8_t max_inter_slice_btt_depth;
uint8_t max_btt_depth[3];
uint8_t intra_rough_search_levels;