From 540cf1fa41b6a6911485849f257c30baec0745c9 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 15 Nov 2022 15:31:44 +0200 Subject: [PATCH] [mtt] WIP --- src/cu.c | 16 +-- src/cu.h | 5 +- src/encode_coding_tree.c | 34 ++----- src/encoder_state-bitstream.c | 51 ++++------ src/global.h | 2 +- src/intra.c | 4 +- src/rdo.c | 7 +- src/search.c | 53 +++++----- src/search_inter.c | 4 +- src/search_intra.c | 97 ++++++++++-------- src/strategies/avx2/quant-avx2.c | 4 +- src/strategies/generic/quant-generic.c | 12 +-- src/strategies/strategies-picture.c | 133 ++++++++++++++----------- src/strategies/strategies-picture.h | 4 +- src/transform.c | 2 +- src/uvg266.h | 8 ++ tools/generate_tables.c | 2 +- 17 files changed, 224 insertions(+), 214 deletions(-) diff --git a/src/cu.c b/src/cu.c index 4ae74da0..0256bd3d 100644 --- a/src/cu.c +++ b/src/cu.c @@ -317,7 +317,10 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height) } -void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4]) +int uvg_get_split_locs( + const cu_loc_t* const origin, + enum split_type split, + cu_loc_t out[4]) { const int half_width = origin->width >> 1; const int half_height = origin->height >> 1; @@ -333,24 +336,25 @@ void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_ uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height); uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height); uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height); - break; + return 4; case BT_HOR_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height); uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height); - break; + return 2; case BT_VER_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height); - break; + return 2; case TT_HOR_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height); uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height); uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height); - break; + return 3; case TT_VER_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height); - break; + return 3; } + return 0; } \ No newline at end of file diff --git a/src/cu.h b/src/cu.h index b46d47fd..39fb96b5 100644 --- a/src/cu.h +++ b/src/cu.h @@ -184,7 +184,10 @@ typedef struct { void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height); -void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4]); +int uvg_get_split_locs( + const cu_loc_t* const origin, + enum split_type split, + cu_loc_t out[4]); #define CU_GET_MV_CAND(cu_info_ptr, reflist) \ (((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index d503e3c0..191caab2 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -543,7 +543,7 @@ static void encode_transform_unit( if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) { cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma; CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag"); - DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, height, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0); } if(cur_pu->tr_idx == MTS_SKIP) { uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL); @@ -1040,7 +1040,7 @@ void uvg_encode_intra_luma_coding_unit( int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0; #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, width, multi_ref_idx); + if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, height, multi_ref_idx); #endif if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl && !mip_flag) { @@ -1222,7 +1222,7 @@ bool uvg_write_split_flag( no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true; if (depth > MAX_DEPTH) allow_qt = false; // ToDo: update this when btt is actually used - bool allow_btt = false;// when mt_depth < MAX_BT_DEPTH + bool allow_btt = true;// when mt_depth < MAX_BT_DEPTH const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; @@ -1289,7 +1289,7 @@ bool uvg_write_split_flag( CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag"); } - bool qt_split = split_flag == UVG_QUAD_SPLIT; + bool qt_split = split_flag == QT_SPLIT; if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) { split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3); @@ -1384,28 +1384,16 @@ void uvg_encode_coding_tree( const int half_luma = cu_loc->width / 2; split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 }; - cu_loc_t new_cu_loc; - uvg_cu_loc_ctor(&new_cu_loc, x, y, half_luma, half_luma); - // Split blocks and remember to change x and y block positions - uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); - - if (!border_x || border_split_x) { - uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_luma, half_luma); - uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); - } - if (!border_y || border_split_y) { - uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_luma, half_luma); - uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); - } - if (!border || (border_split_x && border_split_y)) { - uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_luma, half_luma); - uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); + cu_loc_t new_cu_loc[4]; + const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc); + for (int split = 0; split frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, (cur_cu->type == CU_INTRA)?0:1); if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; @@ -1446,8 +1434,8 @@ void uvg_encode_coding_tree( } } #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); - if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); + if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); + if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); #endif goto end; diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 1cff5f05..abf7fd9b 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -529,48 +529,31 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, WRITE_UE(stream, MIN_SIZE-2, "log2_min_luma_coding_block_size_minus2"); // Min size 2^3 = 8x8 // if(!no_partition_constraints_override_constraint_flag) WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag"); - WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_luma"); - WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_luma"); - + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma"); + WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth, "sps_max_mtt_hierarchy_depth_intra_slice_luma"); + if (encoder->cfg.max_intra_slice_btt_depth) { + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma"); + } + if (encoder->chroma_format != UVG_CSP_400) { WRITE_U(stream, encoder->cfg.dual_tree, 1, "qtbtt_dual_tree_intra_flag"); } if (encoder->cfg.dual_tree) { - WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); - WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); - if (0 /*sps_max_mtt_hierarchy_depth_intra_slice_chroma != 0*/) { - WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); - WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); + WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth_chroma, "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); + if (encoder->cfg.max_intra_slice_btt_depth_chroma) { + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); } } - WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_inter_slice"); - WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_inter_slice"); - - -#if 0 // mtt depth intra - if (max_mtt_depth_intra != 0) { - WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma"); - WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice"); + WRITE_UE(stream, encoder->cfg.max_inter_slice_btt_depth, "sps_max_mtt_hierarchy_depth_inter_slice"); + if (encoder->cfg.max_inter_slice_btt_depth != 0) { + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group"); + WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group"); } -#endif -#if 0 // mtt depth inter - if (max_mtt_depth_inter != 0) { - WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_inter_tile_group"); - WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_inter_tile_group"); - } -#endif -#if 0 // Dual Tree - if (encoder->cfg.dual_i_tree) { - WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma"); - WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma"); - - if (max_mtt_depth_intra != 0) { - WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma"); - WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma"); - } - } -#endif if (LCU_WIDTH > 32) WRITE_U(stream, (TR_MAX_LOG2_SIZE - 5) ? 1 : 0, 1, "sps_max_luma_transform_size_64_flag"); diff --git a/src/global.h b/src/global.h index cc691605..9c006fe6 100644 --- a/src/global.h +++ b/src/global.h @@ -129,7 +129,7 @@ typedef int16_t coeff_t; typedef int16_t mv_t; //#define VERBOSE 1 -//#define UVG_DEBUG_PRINT_CABAC 1 +#define UVG_DEBUG_PRINT_CABAC 1 //#define UVG_DEBUG 1 //#define UVG_DEBUG_PRINT_YUVIEW_CSV 1 diff --git a/src/intra.c b/src/intra.c index 9df9acf7..99150ef2 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1552,7 +1552,7 @@ void uvg_intra_predict( } } else { - uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width); + uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, height, stride / 2, width); if (!PU_IS_TU(&data->pred_cu) || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) { predict_cclm( state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst, @@ -1560,7 +1560,7 @@ void uvg_intra_predict( tree_type); } else { - linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width); + linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height); } } } diff --git a/src/rdo.c b/src/rdo.c index 0ed561e1..94f19caf 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -447,7 +447,7 @@ double uvg_get_coeff_cost( } else { double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu); if (save_cccs) { - save_ccc(state->qp, coeff, width * width, ccc); + save_ccc(state->qp, coeff, width * height, ccc); } return ccc; } @@ -1474,10 +1474,13 @@ void uvg_rdoq( // Hope the compiler is able to utilize this information. switch (cg_num) { case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break; + case 2: FILL_ARRAY(sig_coeffgroup_flag, 0, 2); break; case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break; + case 8: FILL_ARRAY(sig_coeffgroup_flag, 0, 8); break; case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break; + case 32: FILL_ARRAY(sig_coeffgroup_flag, 0, 32); break; case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break; - default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups"); + default: assert(0 && "There should be 1, 2, 4, 8, 16, 32 or 64 coefficient groups"); } cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[color ? 2 : 0]); diff --git a/src/search.c b/src/search.c index 99eef24a..5aa4430f 100644 --- a/src/search.c +++ b/src/search.c @@ -252,15 +252,16 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in } -static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu) +static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu) { - const uint32_t mask = ~((MIN(width, TR_MAX_WIDTH))-1); + const uint32_t x_mask = ~((MIN(width, TR_MAX_WIDTH))-1); + const uint32_t y_mask = ~((MIN(height, TR_MAX_WIDTH))-1); // Set coeff flags in every CU covered by part_mode in this depth. - for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) { + for (uint32_t y = y_local; y < y_local + height; y += SCU_WIDTH) { for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) { // Use TU top-left CU to propagate coeff flags - cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask); + cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & x_mask, y & y_mask); cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y); if (cu_from != cu_to) { // Chroma and luma coeff data is needed for deblocking @@ -942,6 +943,7 @@ static double search_cu( const int x = cu_loc->x; const int y = cu_loc->y; const int luma_width = cu_loc->width; + const int luma_height = cu_loc->height; assert(cu_width >= 4); double cost = MAX_DOUBLE; double inter_zero_coeff_cost = MAX_DOUBLE; @@ -1005,7 +1007,7 @@ static double search_cu( // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. - if ( x + luma_width <= frame_width && y + luma_width <= frame_height) + if ( x + luma_width <= frame_width && y + luma_height <= frame_height) { int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max; bool can_use_inter = @@ -1018,7 +1020,7 @@ static double search_cu( // otherwise forbid it. (x & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_width || (y & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_height - ); + ) && cu_loc->width == cu_loc->height; // Don't allow non square inter CUs for now if (can_use_inter) { double mode_cost; @@ -1148,7 +1150,7 @@ static double search_cu( if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { recon_chroma = false; } - lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); uvg_intra_recon_cu(state, &intra_search, cu_loc, NULL, lcu, @@ -1195,7 +1197,7 @@ static double search_cu( if(cbf_cr) cbf_set(&split_cu->cbf, COLOR_V); split_cu->joint_cb_cr = jccr; } - lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); } else if (cur_cu->type == CU_INTER) { @@ -1238,7 +1240,7 @@ static double search_cu( } } lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); - lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); + lcu_fill_cbf(lcu, x_local, y_local, cu_width, cu_height, cur_cu); } } @@ -1276,7 +1278,7 @@ static double search_cu( // if (cur_cu->merged) { // cur_cu->merged = 0; // cur_cu->skipped = 1; - // lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); + // lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); // } // cur_cu->cbf = 0; @@ -1300,9 +1302,9 @@ static double search_cu( // Recursively split all the way to max search depth. if (can_split_cu) { - const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << (split_tree.current_depth * 3), split_tree.current_depth + 1 }; - - int half_cu = cu_width >> (tree_type != UVG_CHROMA_T); + const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT; + const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1 }; + double split_cost = 0.0; int cbf = cbf_is_set_any(cur_cu->cbf); cabac_data_t post_seach_cabac; @@ -1357,19 +1359,14 @@ static double search_cu( if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type); cu_loc_t new_cu_loc[4]; - uvg_get_split_locs(cu_loc, QT_SPLIT, new_cu_loc); - if (split_cost < cost) { - split_cost += search_cu(state, &new_cu_loc[0], &split_lcu, tree_type, new_split); - } - if (split_cost < cost) { - split_cost += search_cu(state, &new_cu_loc[1], &split_lcu, tree_type, new_split); - } - if (split_cost < cost) { - split_cost += search_cu(state, &new_cu_loc[2], &split_lcu, tree_type, new_split); - } - if (split_cost < cost) { - split_cost += search_cu(state, &new_cu_loc[3], &split_lcu, tree_type, new_split); + const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc); + for (int split = 0; split < splits; ++split) { + split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split); + if (split_cost < cost) { + break; + } } + } else { split_cost = INT_MAX; } @@ -1410,7 +1407,7 @@ static double search_cu( cur_cu->lfnst_idx = 0; cur_cu->cr_lfnst_idx = 0; - lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); intra_search_data_t proxy; FILL(proxy, 0); @@ -1453,7 +1450,7 @@ static double search_cu( // Reset HMVP to the beginning of this CU level search and add this CU as the mvp memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size; - uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu); + uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu); } } else { @@ -1474,7 +1471,7 @@ static double search_cu( // Reset HMVP to the beginning of this CU level search and add this CU as the mvp memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size; - uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu); + uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu); } } diff --git a/src/search_inter.c b/src/search_inter.c index e4124ee0..02432f59 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -2138,8 +2138,8 @@ void uvg_cu_cost_inter_rd2( UVG_BOTH_T); ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C]; - uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width); - uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width); + uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, height, LCU_WIDTH_C, width); + uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, height, LCU_WIDTH_C, width); ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; diff --git a/src/search_intra.c b/src/search_intra.c index 5c5ed32b..69237c67 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -130,17 +130,31 @@ static INLINE uint8_t select_best_mode_index(const int8_t *modes, const double * * * \return */ -static void get_cost_dual(encoder_state_t * const state, - const pred_buffer preds, const uvg_pixel *orig_block, - cost_pixel_nxn_multi_func *satd_twin_func, - cost_pixel_nxn_multi_func *sad_twin_func, - int width, double *costs_out) +static void get_cost_dual( + encoder_state_t * const state, + const pred_buffer preds, + const uvg_pixel *orig_block, + cost_pixel_nxn_multi_func *satd_twin_func, + cost_pixel_nxn_multi_func *sad_twin_func, + int width, + int height, + double *costs_out) { #define PARALLEL_BLKS 2 unsigned satd_costs[PARALLEL_BLKS] = { 0 }; - satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs); + if (satd_twin_func != NULL) { + satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs); + } else { + satd_costs[0] = uvg_satd_any_size(width, height, preds[0], width, orig_block, LCU_WIDTH); + satd_costs[1] = uvg_satd_any_size(width, height, preds[1], width, orig_block, LCU_WIDTH); + } unsigned unsigned_sad_costs[PARALLEL_BLKS] = { 0 }; - sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs); + if (sad_twin_func != NULL) { + sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs); + } else { + unsigned_sad_costs[0] = uvg_reg_sad(preds[0], orig_block, width, height, width, LCU_WIDTH); + unsigned_sad_costs[1] = uvg_reg_sad(preds[1], orig_block, width, height, width, LCU_WIDTH); + } costs_out[0] = (double)MIN(satd_costs[0], unsigned_sad_costs[0] * 2); costs_out[1] = (double)MIN(satd_costs[1], unsigned_sad_costs[1] * 2); @@ -651,7 +665,7 @@ static int search_intra_chroma_rough( uvg_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT]; uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); - uvg_pixels_blit(orig_u, orig_block, width, width, LCU_WIDTH_C, width); + uvg_pixels_blit(orig_u, orig_block, width, height, LCU_WIDTH_C, width); int modes_count = (state->encoder_control->cfg.cclm ? 8 : 5); for (int i = 0; i < modes_count; ++i) { const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; @@ -671,7 +685,7 @@ static int search_intra_chroma_rough( } } - uvg_pixels_blit(orig_v, orig_block, width, width, LCU_WIDTH_C, width); + uvg_pixels_blit(orig_v, orig_block, width, height, LCU_WIDTH_C, width); for (int i = 0; i < modes_count; ++i) { const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; @@ -764,7 +778,7 @@ static int16_t search_intra_rough( uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); // Store original block for SAD computation - uvg_pixels_blit(orig, orig_block, width, width, origstride, width); + uvg_pixels_blit(orig, orig_block, width, height, origstride, width); int8_t modes_selected = 0; // Note: get_cost and get_cost_dual may return negative costs. @@ -783,7 +797,7 @@ static int16_t search_intra_rough( // Calculate SAD for evenly spaced modes to select the starting point for // the recursive search. - cu_loc_t loc = { 0, 0, width, width, width, width }; + cu_loc_t loc = { 0, 0, width, height, width, height }; intra_search_data_t search_proxy; FILL(search_proxy, 0); search_proxy.pred_cu = *pred_cu; @@ -963,19 +977,19 @@ static uint8_t search_intra_rough( uvg_pixel *orig, int32_t origstride, uvg_intra_references *refs, - int log2_width, + int width, + int height, int8_t *intra_preds, intra_search_data_t* modes_out, cu_info_t* const pred_cu, uint8_t mip_ctx) { #define PARALLEL_BLKS 2 // TODO: use 4 for AVX-512 in the future? - assert(log2_width >= 2 && log2_width <= 5); - int_fast8_t width = 1 << log2_width; + assert(width >= 4 && width <= 32); // cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width); // cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width); - cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width); - cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width); + cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width, height); + cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width, height); bool mode_checked[UVG_NUM_INTRA_MODES] = {0}; double costs[UVG_NUM_INTRA_MODES]; @@ -990,7 +1004,7 @@ static uint8_t search_intra_rough( uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); // Store original block for SAD computation - uvg_pixels_blit(orig, orig_block, width, width, origstride, width); + uvg_pixels_blit(orig, orig_block, width, height, origstride, width); int8_t modes_selected = 0; // Note: get_cost and get_cost_dual may return negative costs. @@ -1016,17 +1030,16 @@ static uint8_t search_intra_rough( // Calculate SAD for evenly spaced modes to select the starting point for // the recursive search. - cu_loc_t loc = { 0, 0, width, width, width, width }; intra_search_data_t search_proxy; FILL(search_proxy, 0); search_proxy.pred_cu = *pred_cu; int offset = 4; search_proxy.pred_cu.intra.mode = 0; - uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T); search_proxy.pred_cu.intra.mode = 1; - uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T); - get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs); + uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T); + get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs); mode_checked[0] = true; mode_checked[1] = true; costs[0] += count_bits( @@ -1075,12 +1088,12 @@ static uint8_t search_intra_rough( for (int i = 0; i < PARALLEL_BLKS; ++i) { if (mode + i * offset <= 66) { search_proxy.pred_cu.intra.mode = mode + i*offset; - uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T); } } //TODO: add generic version of get cost multi - get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out); + get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out); for (int i = 0; i < PARALLEL_BLKS; ++i) { if (mode + i * offset <= 66) { costs_out[i] += count_bits( @@ -1147,12 +1160,12 @@ static uint8_t search_intra_rough( for (int block = 0; block < PARALLEL_BLKS; ++block) { search_proxy.pred_cu.intra.mode = modes_to_check[block + i]; - uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T); } //TODO: add generic version of get cost multi - get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out); + get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out); for (int block = 0; block < PARALLEL_BLKS; ++block) { costs_out[block] += count_bits( state, @@ -1219,12 +1232,9 @@ static void get_rough_cost_for_2n_modes( const int height = cu_loc->height; cost_pixel_nxn_multi_func* satd_dual_func; cost_pixel_nxn_multi_func* sad_dual_func; - if (width == height) { - satd_dual_func = uvg_pixels_get_satd_dual_func(width); - sad_dual_func = uvg_pixels_get_sad_dual_func(width); - } else { - assert(false && "Joose promised to fix this."); - } + satd_dual_func = uvg_pixels_get_satd_dual_func(width, height); + sad_dual_func = uvg_pixels_get_sad_dual_func(width, height); + uvg_pixel _preds[PARALLEL_BLKS * MIN(LCU_WIDTH, 64)* MIN(LCU_WIDTH, 64)+ SIMD_ALIGNMENT]; pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT); @@ -1232,7 +1242,7 @@ static void get_rough_cost_for_2n_modes( uvg_pixel _orig_block[MIN(LCU_WIDTH, 64) * MIN(LCU_WIDTH, 64) + SIMD_ALIGNMENT]; uvg_pixel* orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); - uvg_pixels_blit(orig, orig_block, width, width, orig_stride, width); + uvg_pixels_blit(orig, orig_block, width, height, orig_stride, width); const double mrl = state->encoder_control->cfg.mrl && (cu_loc->y % LCU_WIDTH) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 1) : 0; const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0; @@ -1243,7 +1253,7 @@ static void get_rough_cost_for_2n_modes( for (int i = 0; i < PARALLEL_BLKS; ++i) { uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T); } - get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out); + get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out); for(int i = 0; i < PARALLEL_BLKS; ++i) { uint8_t multi_ref_idx = search_data[mode + i].pred_cu.intra.multi_ref_idx; @@ -1796,16 +1806,17 @@ void uvg_search_cu_intra( bool skip_rough_search = (is_large || state->encoder_control->cfg.rdo >= 4); if (!skip_rough_search) { num_regular_modes = number_of_modes = search_intra_rough( - state, - cu_loc, - ref_pixels, - LCU_WIDTH, - refs, - log2_width, - candidate_modes, - search_data, - &temp_pred_cu, - mip_ctx); + state, + cu_loc, + ref_pixels, + LCU_WIDTH, + refs, + cu_loc->width, + cu_loc->height, + candidate_modes, + search_data, + &temp_pred_cu, + mip_ctx); // if(lines == 1) sort_modes(search_data, number_of_modes); } else { diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index 8c7b1c36..2d45166c 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -720,7 +720,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state, } // Check if there are any non-zero coefficients. - for (int i = 0; i < width * width; i += 8) { + for (int i = 0; i < width * height; i += 8) { __m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i])); has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff); if(has_coeffs) break; @@ -730,7 +730,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state, // rec_out. if (has_coeffs && !early_skip) { // Get quantized residual. (coeff_out -> coeff -> residual) - uvg_dequant(state, coeff_out, coeff, width, width, color, + uvg_dequant(state, coeff_out, coeff, width, height, color, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y); if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index 81927486..eed95e59 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -315,22 +315,22 @@ int uvg_quant_cbcr_residual_generic( if (state->encoder_control->cfg.rdoq_enable && (width > 4 || !state->encoder_control->cfg.rdoq_skip)) { - uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, + uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx); } else if (state->encoder_control->cfg.rdoq_enable && false) { - uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U, + uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U, scan_order); } else { - uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, + uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx); } int8_t has_coeffs = 0; { int i; - for (i = 0; i < width * width; ++i) { + for (i = 0; i < width * height; ++i) { if (coeff_out[i] != 0) { has_coeffs = 1; break; @@ -341,10 +341,10 @@ int uvg_quant_cbcr_residual_generic( if (has_coeffs && !early_skip) { // Get quantized residual. (coeff_out -> coeff -> residual) - uvg_dequant(state, coeff_out, coeff, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, + uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false); if (cur_cu->cr_lfnst_idx) { - uvg_inv_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type); + uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type); } uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu); diff --git a/src/strategies/strategies-picture.c b/src/strategies/strategies-picture.c index 8ff49246..5aae717d 100644 --- a/src/strategies/strategies-picture.c +++ b/src/strategies/strategies-picture.c @@ -108,101 +108,114 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) { /** * \brief Get a function that calculates SATD for NxN block. * -* \param n Width of the region for which SATD is calculated. +* \param width Width of the region for which SATD is calculated. * * \returns Pointer to cost_16bit_nxn_func. */ -cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n) +cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned width, unsigned height) { - switch (n) { - case 4: - return uvg_satd_4x4; - case 8: - return uvg_satd_8x8; - case 16: - return uvg_satd_16x16; - case 32: - return uvg_satd_32x32; - case 64: - return uvg_satd_64x64; - default: - return NULL; + if(width == height) { + switch (width) { + case 4: + return uvg_satd_4x4; + case 8: + return uvg_satd_8x8; + case 16: + return uvg_satd_16x16; + case 32: + return uvg_satd_32x32; + case 64: + return uvg_satd_64x64; + default: + return NULL; + } } + return NULL; } /** * \brief Get a function that calculates SAD for NxN block. * -* \param n Width of the region for which SAD is calculated. +* \param width Width of the region for which SAD is calculated. * * \returns Pointer to cost_16bit_nxn_func. */ -cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n) +cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned width, unsigned height) { - switch (n) { - case 4: - return uvg_sad_4x4; - case 8: - return uvg_sad_8x8; - case 16: - return uvg_sad_16x16; - case 32: - return uvg_sad_32x32; - case 64: - return uvg_sad_64x64; - default: - return NULL; + if(width == height) { + switch (width) { + case 4: + return uvg_sad_4x4; + case 8: + return uvg_sad_8x8; + case 16: + return uvg_sad_16x16; + case 32: + return uvg_sad_32x32; + case 64: + return uvg_sad_64x64; + default: + return NULL; + } } + return NULL; } /** * \brief Get a function that calculates SATDs for 2 NxN blocks. * -* \param n Width of the region for which SATD is calculated. +* \param width Width of the region for which SATD is calculated. +* \param height Height of the region for which SATD is calculated. * * \returns Pointer to cost_pixel_nxn_multi_func. */ -cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n) +cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height) { - switch (n) { - case 4: - return uvg_satd_4x4_dual; - case 8: - return uvg_satd_8x8_dual; - case 16: - return uvg_satd_16x16_dual; - case 32: - return uvg_satd_32x32_dual; - case 64: - return uvg_satd_64x64_dual; - default: - return NULL; + if(width == height) { + switch (width) { + case 4: + return uvg_satd_4x4_dual; + case 8: + return uvg_satd_8x8_dual; + case 16: + return uvg_satd_16x16_dual; + case 32: + return uvg_satd_32x32_dual; + case 64: + return uvg_satd_64x64_dual; + default: + return NULL; + } } + return NULL; } /** * \brief Get a function that calculates SADs for 2 NxN blocks. * -* \param n Width of the region for which SAD is calculated. +* \param width Width of the region for which SAD is calculated. * * \returns Pointer to cost_pixel_nxn_multi_func. */ -cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n) +cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height) { - switch (n) { - case 4: - return uvg_sad_4x4_dual; - case 8: - return uvg_sad_8x8_dual; - case 16: - return uvg_sad_16x16_dual; - case 32: - return uvg_sad_32x32_dual; - case 64: - return uvg_sad_64x64_dual; - default: - return NULL; + if(width == height) { + switch (width) { + case 4: + return uvg_sad_4x4_dual; + case 8: + return uvg_sad_8x8_dual; + case 16: + return uvg_sad_16x16_dual; + case 32: + return uvg_sad_32x32_dual; + case 64: + return uvg_sad_64x64_dual; + default: + return NULL; + } } + return NULL; } diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index 825ffe8e..29735cde 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -194,8 +194,8 @@ extern pixel_var_func *uvg_pixel_var; extern generate_residual_func* uvg_generate_residual; int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth); -cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n); -cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n); +cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height); +cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height); #define STRATEGIES_PICTURE_EXPORTS \ {"reg_sad", (void**) &uvg_reg_sad}, \ diff --git a/src/transform.c b/src/transform.c index fbd6afeb..0169a0ff 100644 --- a/src/transform.c +++ b/src/transform.c @@ -584,7 +584,7 @@ void uvg_chroma_transform_search( if (is_jccr && !u_has_coeffs) continue; if (u_has_coeffs) { - uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, + uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu->type, transforms[i] == CHROMA_TS); if (transforms[i] != CHROMA_TS) { diff --git a/src/uvg266.h b/src/uvg266.h index 92e166dd..17782039 100644 --- a/src/uvg266.h +++ b/src/uvg266.h @@ -542,6 +542,14 @@ typedef struct uvg_config char* cabac_debug_file_name; uint8_t dual_tree; + + uint8_t min_qt_size[3]; + uint8_t max_bt_size[3]; + uint8_t max_tt_size[3]; + + uint8_t max_intra_slice_btt_depth; + uint8_t max_intra_slice_btt_depth_chroma; + uint8_t max_inter_slice_btt_depth; } uvg_config; /** diff --git a/tools/generate_tables.c b/tools/generate_tables.c index d50c889f..6bd2497e 100644 --- a/tools/generate_tables.c +++ b/tools/generate_tables.c @@ -51,7 +51,7 @@ static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h, uint32_t *buff_v, int32_t width, int32_t height) { - uint32_t num_scan_pos = width * width; + uint32_t num_scan_pos = width * height; uint32_t next_scan_pos = 0; int32_t xx, yy, x, y; uint32_t scan_line;