diff --git a/src/cabac.h b/src/cabac.h index be249ba2..f38030a9 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -77,6 +77,8 @@ typedef struct cabac_ctx_t mts_idx_model[4]; cabac_ctx_t split_flag_model[9]; //!< \brief split flag context models cabac_ctx_t qt_split_flag_model[6]; //!< \brief qt split flag context models + cabac_ctx_t mtt_vertical_model[5]; + cabac_ctx_t mtt_binary_model[4]; cabac_ctx_t intra_luma_mpm_flag_model; //!< \brief intra mode context models cabac_ctx_t intra_subpart_model[2]; //!< \brief intra sub part context models cabac_ctx_t chroma_pred_model; diff --git a/src/cfg.c b/src/cfg.c index f2073da5..39643e9f 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -222,6 +222,22 @@ int uvg_config_init(uvg_config *cfg) cfg->cabac_debug_file_name = NULL; cfg->dual_tree = 0; + + cfg->min_qt_size[0] = 4; + cfg->min_qt_size[1] = 4; + cfg->min_qt_size[2] = 4; + + cfg->max_btt_depth[0] = 1; + cfg->max_btt_depth[1] = 0; + cfg->max_btt_depth[2] = 0; + + cfg->max_tt_size[0] = 64; + cfg->max_bt_size[0] = 64; + cfg->max_tt_size[1] = 64; + cfg->max_bt_size[1] = 64; + cfg->max_tt_size[2] = 64; + cfg->max_bt_size[2] = 64; + cfg->intra_rough_search_levels = 2; cfg->ibc = 0; diff --git a/src/context.c b/src/context.c index 708b9da4..30861849 100644 --- a/src/context.c +++ b/src/context.c @@ -50,6 +50,21 @@ static const uint8_t INIT_QT_SPLIT_FLAG[4][6] = { { 0, 8, 8, 12, 12, 8, }, }; + +static const uint8_t INIT_VERTICAL_SPLIT_FLAG[4][5] = { + { 43, 42, 37, 42, 44, }, + { 43, 35, 37, 34, 52, }, + { 43, 42, 29, 27, 44, }, + { 9, 8, 9, 8, 5, }, +}; + +static const uint8_t INIT_BINARY_SPLIT_FLAG[4][4] = { + { 28, 29, 28, 29, }, + { 43, 37, 21, 22, }, + { 36, 45, 36, 45, }, + { 12, 13, 12, 13, }, + }; + static const uint8_t INIT_SKIP_FLAG[4][3] = { { 57, 60, 46, }, { 57, 59, 45, }, @@ -574,6 +589,11 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) uvg_ctx_init(&cabac->ctx.part_size_model[i], QP, INIT_PART_SIZE[slice][i], INIT_PART_SIZE[3][i]); uvg_ctx_init(&cabac->ctx.bdpcm_mode[i], QP, BDPCM_MODE_INIT[slice][i], BDPCM_MODE_INIT[3][i]); uvg_ctx_init(&cabac->ctx.qt_cbf_model_luma[i], QP, INIT_QT_CBF[slice][i], INIT_QT_CBF[3][i]); + uvg_ctx_init(&cabac->ctx.mtt_binary_model[i], QP, INIT_BINARY_SPLIT_FLAG[slice][i], INIT_BINARY_SPLIT_FLAG[3][i]); + } + + for (i = 0; i < 5; i++) { + uvg_ctx_init(&cabac->ctx.mtt_vertical_model[i], QP, INIT_VERTICAL_SPLIT_FLAG[slice][i], INIT_VERTICAL_SPLIT_FLAG[3][i]); } for (i = 0; i < 6; i++) { diff --git a/src/cu.h b/src/cu.h index 7f1bd0e3..cc2f6925 100644 --- a/src/cu.h +++ b/src/cu.h @@ -105,6 +105,7 @@ enum split_type { typedef struct { uint32_t split_tree; uint8_t current_depth; + uint8_t mtt_depth; } split_tree_t; diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 4468390c..ac8d206e 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1199,14 +1199,13 @@ void uvg_encode_intra_luma_coding_unit( } -bool uvg_write_split_flag( - const encoder_state_t * const state, +uint8_t uvg_write_split_flag( + const encoder_state_t* const state, cabac_data_t* cabac, - const cu_info_t * left_cu, - const cu_info_t * above_cu, + const cu_info_t* left_cu, + const cu_info_t* above_cu, const cu_loc_t* const cu_loc, - const uint32_t split_tree, - int depth, + split_tree_t split_tree, enum uvg_tree_type tree_type, double* bits_out) { @@ -1217,15 +1216,15 @@ bool uvg_write_split_flag( // Implisit split flag when on border // Exception made in VVC with flag not being implicit if the BT can be used for // horizontal or vertical split, then this flag tells if QT or BT is used + const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1; bool no_split, allow_qt, bh_split, bv_split, th_split, tv_split; no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true; - if (depth > MAX_DEPTH) allow_qt = false; - // ToDo: update this when btt is actually used - bool allow_btt = true;// when mt_depth < MAX_BT_DEPTH const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; + if (cu_width == state->encoder_control->cfg.min_qt_size[slice_type] || split_tree.mtt_depth > 0) allow_qt = false; + bool allow_btt = state->encoder_control->cfg.max_btt_depth[slice_type] > split_tree.mtt_depth && cu_width <= 64; uint8_t implicit_split_mode = UVG_NO_SPLIT; //bool implicit_split = border; @@ -1255,10 +1254,16 @@ bool uvg_write_split_flag( if (!allow_btt) { bh_split = bv_split = th_split = tv_split = false; } + else { + bv_split &= cu_width <= state->encoder_control->cfg.max_bt_size[slice_type]; + tv_split &= cu_width <= state->encoder_control->cfg.max_tt_size[slice_type]; + bh_split &= cu_height <= state->encoder_control->cfg.max_bt_size[slice_type]; + th_split &= cu_height <= state->encoder_control->cfg.max_tt_size[slice_type]; + } bool allow_split = allow_qt | bh_split | bv_split | th_split | tv_split; - int split_flag = (split_tree >> (depth * 3)) & 7; + int split_flag = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7; split_flag = implicit_split_mode != UVG_NO_SPLIT ? implicit_split_mode : split_flag; @@ -1286,33 +1291,41 @@ bool uvg_write_split_flag( cabac->cur_ctx = &(cabac->ctx.split_flag_model[split_model]); - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag"); + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != NO_SPLIT, bits, "split_cu_flag"); } - bool qt_split = split_flag == QT_SPLIT; - if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) { - split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3); - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "QT_split_flag"); - } - - // Only signal split when it is not implicit, currently only Qt split supported - if (!(implicit_split_mode == UVG_NO_SPLIT) && !qt_split && (bh_split | bv_split | th_split | tv_split)) { - - split_model = 0; - - // TODO: These are incorrect - if (left_cu && (1 << left_cu->log2_height) > cu_height) { - split_model++; + if (implicit_split_mode == UVG_NO_SPLIT && allow_qt && (bh_split || bv_split || th_split || tv_split) && split_flag != NO_SPLIT) { + bool qt_split = split_flag == QT_SPLIT; + if((bv_split || bh_split || tv_split || th_split) && allow_qt) { + split_model = (left_cu && GET_SPLITDATA(left_cu, split_tree.current_depth)) + (above_cu && GET_SPLITDATA(above_cu, split_tree.current_depth)) + (split_tree.current_depth < 2 ? 0 : 3); + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "qt_split_flag"); } - - if (above_cu && (1 << above_cu->log2_width) > cu_width) { - split_model++; + if (!qt_split) { + const bool is_vertical = split_flag == BT_VER_SPLIT || split_flag == TT_VER_SPLIT; + if((bh_split || th_split) && (bv_split || tv_split)) { + split_model = 0; + if(bv_split + tv_split > bh_split + th_split) { + split_model = 4; + } else if(bv_split + tv_split < bh_split + th_split) { + split_model = 3; + } else { + const int d_a = cu_width / (above_cu ? (1 << above_cu->log2_width) : 1); + const int d_l = cu_height / (left_cu ? (1 << left_cu->log2_height) : 1); + if(d_a != d_l && above_cu && left_cu) { + split_model = d_a < d_l ? 1 : 2; + } + } + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_vertical_model[split_model]), is_vertical, bits, "mtt_vertical_flag"); + } + if ((bv_split && tv_split && is_vertical) || (bh_split && th_split && !is_vertical)) { + split_model = 2 * is_vertical + split_tree.mtt_depth <= 1; + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_binary_model[split_model]), + split_flag == BT_VER_SPLIT || split_flag == BT_HOR_SPLIT, bits, "mtt_binary_flag"); + } } - - split_model += (depth > 2 ? 0 : 3); - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "split_cu_mode"); } + if (bits_out) *bits_out += bits; return split_flag; } @@ -1322,7 +1335,7 @@ void uvg_encode_coding_tree( lcu_coeff_t *coeff, enum uvg_tree_type tree_type, const cu_loc_t* const cu_loc, - const split_tree_t split_tree) + split_tree_t split_tree) { cabac_data_t * const cabac = &state->cabac; const encoder_control_t * const ctrl = state->encoder_control; @@ -1332,8 +1345,7 @@ void uvg_encode_coding_tree( const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; - const int half_cu = cu_width >> 1; - + const int x = cu_loc->x; const int y = cu_loc->y; @@ -1357,9 +1369,9 @@ void uvg_encode_coding_tree( int32_t frame_height = tree_type != UVG_CHROMA_T ? ctrl->in.height : ctrl->in.height / 2; // Check for slice border bool border_x = frame_width < abs_x + cu_width; - bool border_y = frame_height < abs_y + cu_width; - bool border_split_x = frame_width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + half_cu; - bool border_split_y = frame_height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + half_cu; + bool border_y = frame_height < abs_y + cu_height; + bool border_split_x = frame_width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + cu_width / 2; + bool border_split_y = frame_height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + cu_height / 2; bool border = border_x || border_y; /*!< are we in any border CU */ if (depth <= state->frame->max_qp_delta_depth) { @@ -1368,21 +1380,20 @@ void uvg_encode_coding_tree( // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (cu_width + cu_height > 8) { - + split_tree.split_tree = cur_cu->split_tree; const int split_flag = uvg_write_split_flag( state, cabac, left_cu, above_cu, cu_loc, - cur_cu->split_tree, - depth, + split_tree, tree_type, NULL); if (split_flag || border) { const int half_luma = cu_loc->width / 2; - split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 }; + const split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1, split_tree.mtt_depth + (split_flag != QT_SPLIT)}; cu_loc_t new_cu_loc[4]; const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc); @@ -1650,7 +1661,8 @@ double uvg_mock_encode_coding_unit( const cu_loc_t* const cu_loc, lcu_t* lcu, cu_info_t* cur_cu, - enum uvg_tree_type tree_type) { + enum uvg_tree_type tree_type, + const split_tree_t split_tree) { double bits = 0; const encoder_control_t* const ctrl = state->encoder_control; @@ -1692,8 +1704,7 @@ double uvg_mock_encode_coding_unit( left_cu, above_cu, cu_loc, - cur_cu->split_tree, - depth, + split_tree, tree_type, &bits); } diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 86605e4d..357e059a 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -54,7 +54,7 @@ void uvg_encode_coding_tree( lcu_coeff_t *coeff, enum uvg_tree_type tree_type, const cu_loc_t* const cu_loc, - const split_tree_t split_tree); + split_tree_t split_tree); void uvg_encode_ts_residual(encoder_state_t* const state, cabac_data_t* const cabac, @@ -77,7 +77,8 @@ double uvg_mock_encode_coding_unit( const cu_loc_t* const cu_loc, lcu_t* lcu, cu_info_t* cur_cu, - enum uvg_tree_type tree_type); + enum uvg_tree_type tree_type, + const split_tree_t split_tree); int uvg_encode_inter_prediction_unit( encoder_state_t* const state, @@ -96,14 +97,13 @@ void uvg_encode_intra_luma_coding_unit( double* bits_out); -bool uvg_write_split_flag( +uint8_t uvg_write_split_flag( const encoder_state_t* const state, cabac_data_t* cabac, const cu_info_t* left_cu, const cu_info_t* above_cu, const cu_loc_t* const cu_loc, - const uint32_t split_tree, - int depth, + split_tree_t, enum uvg_tree_type tree_type, double* bits_out); diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 1649d944..8e9f7c52 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -529,10 +529,10 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, // if(!no_partition_constraints_override_constraint_flag) WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag"); WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma"); - WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth, "sps_max_mtt_hierarchy_depth_intra_slice_luma"); - if (encoder->cfg.max_intra_slice_btt_depth) { - WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma"); - WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma"); + WRITE_UE(stream, encoder->cfg.max_btt_depth[0], "sps_max_mtt_hierarchy_depth_intra_slice_luma"); + if (encoder->cfg.max_btt_depth[0]) { + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma"); } if (encoder->chroma_format != UVG_CSP_400) @@ -541,17 +541,17 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, } if (encoder->cfg.dual_tree) { WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); - WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth_chroma, "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); - if (encoder->cfg.max_intra_slice_btt_depth_chroma) { - WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); - WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); + WRITE_UE(stream, encoder->cfg.max_btt_depth[2], "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); + if (encoder->cfg.max_btt_depth[2]) { + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); } } WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice"); - WRITE_UE(stream, encoder->cfg.max_inter_slice_btt_depth, "sps_max_mtt_hierarchy_depth_inter_slice"); - if (encoder->cfg.max_inter_slice_btt_depth != 0) { - WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group"); - WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group"); + WRITE_UE(stream, encoder->cfg.max_btt_depth[1], "sps_max_mtt_hierarchy_depth_inter_slice"); + if (encoder->cfg.max_btt_depth[1] != 0) { + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group"); } if (LCU_WIDTH > 32) diff --git a/src/encoderstate.c b/src/encoderstate.c index eb529b2b..e8af6add 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -883,7 +883,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque) //Encode coding tree cu_loc_t start; uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH); - split_tree_t split_tree = { 0, 0 }; + split_tree_t split_tree = { 0, 0, 0 }; uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, split_tree); diff --git a/src/rdo.c b/src/rdo.c index 26f31634..5fef3b3c 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -705,19 +705,20 @@ static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t * tables generated during RDOQ to select the best coefficient to change. */ void uvg_rdoq_sign_hiding( - const encoder_state_t *const state, - const int32_t qp_scaled, - const uint32_t *const scan2raster, - const struct sh_rates_t *const sh_rates, - const int32_t last_pos, - const coeff_t *const coeffs, - coeff_t *const quant_coeffs, - const int8_t color) + const encoder_state_t *const state, + const int32_t qp_scaled, + const uint32_t *const scan2raster, + const struct sh_rates_t *const sh_rates, + const int32_t last_pos, + const coeff_t *const coeffs, + coeff_t *const quant_coeffs, + const int8_t color, + const bool need_sqrt_adjust) { const encoder_control_t * const ctrl = state->encoder_control; const double lambda = color ? state->c_lambda : state->lambda; - int inv_quant = uvg_g_inv_quant_scales[qp_scaled % 6]; + int inv_quant = uvg_g_inv_quant_scales[need_sqrt_adjust][qp_scaled % 6]; // This somehow scales quant_delta into fractional bits. Instead of the bits // being multiplied by lambda, the residual is divided by it, or something // like that. @@ -1203,7 +1204,7 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_ const bool needs_sqrt2_scale = false; // from VTM: should always be false - transform-skipped blocks don't require sqrt(2) compensation. const int q_bits = QUANT_SHIFT + qp_scaled / 6 + (needs_sqrt2_scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits - const int32_t quant_coeff = uvg_g_quant_scales[qp_scaled % 6]; + const int32_t quant_coeff = uvg_g_quant_scales[needs_sqrt2_scale][qp_scaled % 6]; const double error_scale = (double)(1 << CTX_FRAC_BITS) / quant_coeff / quant_coeff; @@ -1416,8 +1417,10 @@ void uvg_rdoq( cabac_data_t * const cabac = &state->cabac; const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; + bool needs_block_size_trafo_scale = !false && ((log2_block_width + log2_block_height) % 2 == 1); + needs_block_size_trafo_scale |= 1; // Non log2 block size - int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1); // Represents scaling through forward transform + int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1) + needs_block_size_trafo_scale; // Represents scaling through forward transform uint16_t go_rice_param = 0; uint32_t reg_bins = (width * height * 28) >> 4; @@ -1789,7 +1792,7 @@ void uvg_rdoq( } if (encoder->cfg.signhide_enable && abs_sum >= 2) { - uvg_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, color); + uvg_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, color, needs_block_size_trafo_scale); } } diff --git a/src/scalinglist.c b/src/scalinglist.c index 5c32ac4c..01edfa27 100644 --- a/src/scalinglist.c +++ b/src/scalinglist.c @@ -88,8 +88,14 @@ static const int32_t g_quant_inter_default_8x8[64] = 24, 25, 28, 33, 41, 54, 71, 91 }; -const int16_t uvg_g_quant_scales[6] = {26214, 23302, 20560, 18396, 16384, 14564}; -const int16_t uvg_g_inv_quant_scales[6] = {40, 45, 51, 57, 64, 72}; +const int16_t uvg_g_quant_scales[2][6] = { + {26214, 23302, 20560, 18396, 16384, 14564}, + { 18396,16384,14564,13107,11651,10280 } +}; +const int16_t uvg_g_inv_quant_scales[2][6] = { + {40, 45, 51, 57, 64, 72}, + { 57,64,72,80,90,102 } +}; /** @@ -406,11 +412,11 @@ void uvg_scalinglist_set(scaling_list_t* const scaling_list, const int32_t* cons int32_t* quantcoeff = (int32_t*)scaling_list->quant_coeff[size_id_x][size_id_y][listId][qp]; int32_t* dequantcoeff = (int32_t*)scaling_list->de_quant_coeff[size_id_x][size_id_y][listId][qp]; - // Encoder list - uvg_scalinglist_process_enc(coeff, quantcoeff, uvg_g_quant_scales[qp] << 4, height, width, ratio, + // Encoder list TODO: the sqrt adjusted lists + uvg_scalinglist_process_enc(coeff, quantcoeff, uvg_g_quant_scales[0][qp] << 4, height, width, ratio, MIN(8, g_scaling_list_size_x[size_id_x]), dc, !scaling_list->enable); // Decoder list - scalinglist_process_dec(coeff, dequantcoeff, uvg_g_inv_quant_scales[qp], height, width, ratio, + scalinglist_process_dec(coeff, dequantcoeff, uvg_g_inv_quant_scales[0][qp], height, width, ratio, MIN(8, g_scaling_list_size_x[size_id_x]), dc, !scaling_list->enable); diff --git a/src/search.c b/src/search.c index 56f8f566..f61ce721 100644 --- a/src/search.c +++ b/src/search.c @@ -1294,7 +1294,8 @@ static double search_cu( tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc, lcu, cur_cu, - tree_type); + tree_type, + split_tree); cost = bits * state->lambda; @@ -1335,7 +1336,11 @@ static double search_cu( // Recursively split all the way to max search depth. if (can_split_cu) { const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT; - const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1 }; + const split_tree_t new_split = { + split_tree.split_tree | split_type << (split_tree.current_depth * 3), + split_tree.current_depth + 1, + split_tree.mtt_depth + (split_type != QT_SPLIT), + }; double split_cost = 0.0; int cbf = cbf_is_set_any(cur_cu->cbf); @@ -1374,8 +1379,7 @@ static double search_cu( left_cu, above_cu, tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc, - new_split.split_tree, - depth, + split_tree, tree_type, &split_bits); } @@ -1394,7 +1398,7 @@ static double search_cu( const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc); for (int split = 0; split < splits; ++split) { split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split); - if (split_cost < cost) { + if (split_cost > cost) { break; } } @@ -1426,7 +1430,7 @@ static double search_cu( double bits = 0; uvg_write_split_flag(state, &state->search_cabac, x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL, - y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, cu_loc, cur_cu->split_tree, depth, tree_type, &bits); + y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, cu_loc, split_tree, tree_type, &bits); cur_cu->intra = cu_d1->intra; cur_cu->type = CU_INTRA; @@ -1715,7 +1719,7 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con cu_loc_t start; uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH); - split_tree_t split_tree = { 0, 0 }; + split_tree_t split_tree = { 0, 0, 0 }; // Start search from depth 0. double cost = search_cu( state, diff --git a/src/search_inter.c b/src/search_inter.c index 37adaf27..4703152a 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -2124,9 +2124,7 @@ void uvg_cu_cost_inter_rd2( double *inter_cost, double* inter_bitcost, const cu_loc_t* const cu_loc){ - - const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; - + const int x_px = SUB_SCU(cu_loc->x); const int y_px = SUB_SCU(cu_loc->y); const int width = cu_loc->width; @@ -2160,12 +2158,24 @@ void uvg_cu_cost_inter_rd2( double no_cbf_bits; double bits = 0; const int skip_context = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, NULL); + + int8_t depth = 0; + int8_t mtt_depth = 0; + uint32_t splits = cur_cu->split_tree; + while (splits & 7) { + if ((splits & 7) != QT_SPLIT) { + mtt_depth++; + } + depth++; + splits >>= 3; + } + const split_tree_t splitt_tree = { cur_cu->split_tree, depth, mtt_depth }; if (cur_cu->merged) { no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost; - bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T); + bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); } else { - no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T); + no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1); } double no_cbf_cost = ssd + no_cbf_bits * state->lambda; diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index 2d45166c..8c967bdb 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -386,11 +386,13 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; + bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1); + needs_block_size_trafo_scale |= 1; // Non log2 block size const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color; const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6]; const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); //!< Represents scaling through forward transform - const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift); + const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale); const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9); const int32_t q_bits8 = q_bits - 8; @@ -792,13 +794,15 @@ void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef int32_t n; const uint32_t log2_tr_width = uvg_g_convert_to_log2[width]; const uint32_t log2_tr_height = uvg_g_convert_to_log2[height]; - int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); // Represents scaling through forward transform + int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); + bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1); + needs_block_size_trafo_scale |= 1; // Non log2 block size// Represents scaling through forward transform int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; - shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift); + shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale); if (encoder->scaling_list.enable) { @@ -822,7 +826,7 @@ void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef } } } else { - int32_t scale = uvg_g_inv_quant_scales[qp_scaled%6] << (qp_scaled/6); + int32_t scale = uvg_g_inv_quant_scales[needs_block_size_trafo_scale][qp_scaled%6] << (qp_scaled/6); add = 1 << (shift-1); __m256i v_scale = _mm256_set1_epi32(scale); diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index eed95e59..13e08f3a 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -68,12 +68,13 @@ void uvg_quant_generic( int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; - - + bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1); + needs_block_size_trafo_scale |= 1; // Non log2 block size + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color; const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6]; - const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); //!< Represents scaling through forward transform - const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift); + const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1) - needs_block_size_trafo_scale; //!< Represents scaling through forward transform + const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift ); const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9); const int32_t q_bits8 = q_bits - 8; @@ -592,11 +593,13 @@ void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c const uint32_t log2_tr_height = uvg_g_convert_to_log2[height]; int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); // Represents scaling through forward transform + bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1); + needs_block_size_trafo_scale |= 1; // Non log2 block size int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; - shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift); + shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale); if (encoder->scaling_list.enable) { @@ -620,7 +623,7 @@ void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c } } } else { - int32_t scale = uvg_g_inv_quant_scales[qp_scaled%6] << (qp_scaled/6); + int32_t scale = uvg_g_inv_quant_scales[needs_block_size_trafo_scale][qp_scaled%6] << (qp_scaled/6); add = 1 << (shift-1); for (n = 0; n < width * height; n++) { diff --git a/src/transform.h b/src/transform.h index ebe31109..e96a2893 100644 --- a/src/transform.h +++ b/src/transform.h @@ -44,8 +44,8 @@ #include "global.h" // IWYU pragma: keep extern const uint8_t uvg_g_chroma_scale[58]; -extern const int16_t uvg_g_inv_quant_scales[6]; -extern const int16_t uvg_g_quant_scales[6]; +extern const int16_t uvg_g_inv_quant_scales[2][6]; +extern const int16_t uvg_g_quant_scales[2][6]; #define COEFF_ORDER_LINEAR 0 #define COEFF_ORDER_CU 1 diff --git a/src/uvg266.h b/src/uvg266.h index d2726655..7d772780 100644 --- a/src/uvg266.h +++ b/src/uvg266.h @@ -543,13 +543,11 @@ typedef struct uvg_config uint8_t dual_tree; - uint8_t min_qt_size[3]; + uint8_t min_qt_size[3]; /* intra, inter, dual tree chroma*/ uint8_t max_bt_size[3]; uint8_t max_tt_size[3]; - uint8_t max_intra_slice_btt_depth; - uint8_t max_intra_slice_btt_depth_chroma; - uint8_t max_inter_slice_btt_depth; + uint8_t max_btt_depth[3]; uint8_t intra_rough_search_levels;