[mtt] WIP

This commit is contained in:
Joose Sainio 2022-11-15 15:31:44 +02:00
parent 8a86c8fe3c
commit 540cf1fa41
17 changed files with 224 additions and 214 deletions

View file

@ -317,7 +317,10 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
}
void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4])
int uvg_get_split_locs(
const cu_loc_t* const origin,
enum split_type split,
cu_loc_t out[4])
{
const int half_width = origin->width >> 1;
const int half_height = origin->height >> 1;
@ -333,24 +336,25 @@ void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height);
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height);
uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height);
break;
return 4;
case BT_HOR_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height);
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height);
break;
return 2;
case BT_VER_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height);
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height);
break;
return 2;
case TT_HOR_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height);
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height);
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height);
break;
return 3;
case TT_VER_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height);
uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height);
uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height);
break;
return 3;
}
return 0;
}

View file

@ -184,7 +184,10 @@ typedef struct {
void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height);
void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4]);
int uvg_get_split_locs(
const cu_loc_t* const origin,
enum split_type split,
cu_loc_t out[4]);
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)

View file

@ -543,7 +543,7 @@ static void encode_transform_unit(
if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) {
cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma;
CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag");
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, height, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
}
if(cur_pu->tr_idx == MTS_SKIP) {
uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL);
@ -1040,7 +1040,7 @@ void uvg_encode_intra_luma_coding_unit(
int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0;
#ifdef UVG_DEBUG_PRINT_YUVIEW_CSV
if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, width, multi_ref_idx);
if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, height, multi_ref_idx);
#endif
if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl && !mip_flag) {
@ -1222,7 +1222,7 @@ bool uvg_write_split_flag(
no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true;
if (depth > MAX_DEPTH) allow_qt = false;
// ToDo: update this when btt is actually used
bool allow_btt = false;// when mt_depth < MAX_BT_DEPTH
bool allow_btt = true;// when mt_depth < MAX_BT_DEPTH
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
@ -1289,7 +1289,7 @@ bool uvg_write_split_flag(
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag");
}
bool qt_split = split_flag == UVG_QUAD_SPLIT;
bool qt_split = split_flag == QT_SPLIT;
if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) {
split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3);
@ -1384,28 +1384,16 @@ void uvg_encode_coding_tree(
const int half_luma = cu_loc->width / 2;
split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 };
cu_loc_t new_cu_loc;
uvg_cu_loc_ctor(&new_cu_loc, x, y, half_luma, half_luma);
// Split blocks and remember to change x and y block positions
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
if (!border_x || border_split_x) {
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
if (!border_y || border_split_y) {
uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
if (!border || (border_split_x && border_split_y)) {
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
cu_loc_t new_cu_loc[4];
const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc);
for (int split = 0; split <splits; ++split) {
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc[split], new_split_tree);
}
return;
}
}
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1);
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, (cur_cu->type == CU_INTRA)?0:1);
if (ctrl->cfg.lossless) {
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
@ -1446,8 +1434,8 @@ void uvg_encode_coding_tree(
}
}
#ifdef UVG_DEBUG_PRINT_YUVIEW_CSV
if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]);
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]);
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
#endif
goto end;

View file

@ -529,48 +529,31 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
WRITE_UE(stream, MIN_SIZE-2, "log2_min_luma_coding_block_size_minus2"); // Min size 2^3 = 8x8
// if(!no_partition_constraints_override_constraint_flag)
WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag");
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
if (encoder->cfg.max_intra_slice_btt_depth) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
}
if (encoder->chroma_format != UVG_CSP_400)
{
WRITE_U(stream, encoder->cfg.dual_tree, 1, "qtbtt_dual_tree_intra_flag");
}
if (encoder->cfg.dual_tree) {
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
if (0 /*sps_max_mtt_hierarchy_depth_intra_slice_chroma != 0*/) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth_chroma, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
if (encoder->cfg.max_intra_slice_btt_depth_chroma) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
}
}
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_inter_slice");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_inter_slice");
#if 0 // mtt depth intra
if (max_mtt_depth_intra != 0) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice");
WRITE_UE(stream, encoder->cfg.max_inter_slice_btt_depth, "sps_max_mtt_hierarchy_depth_inter_slice");
if (encoder->cfg.max_inter_slice_btt_depth != 0) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
}
#endif
#if 0 // mtt depth inter
if (max_mtt_depth_inter != 0) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_inter_tile_group");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_inter_tile_group");
}
#endif
#if 0 // Dual Tree
if (encoder->cfg.dual_i_tree) {
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma");
if (max_mtt_depth_intra != 0) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma");
}
}
#endif
if (LCU_WIDTH > 32)
WRITE_U(stream, (TR_MAX_LOG2_SIZE - 5) ? 1 : 0, 1, "sps_max_luma_transform_size_64_flag");

View file

@ -129,7 +129,7 @@ typedef int16_t coeff_t;
typedef int16_t mv_t;
//#define VERBOSE 1
//#define UVG_DEBUG_PRINT_CABAC 1
#define UVG_DEBUG_PRINT_CABAC 1
//#define UVG_DEBUG 1
//#define UVG_DEBUG_PRINT_YUVIEW_CSV 1

View file

@ -1552,7 +1552,7 @@ void uvg_intra_predict(
}
}
else {
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width);
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, height, stride / 2, width);
if (!PU_IS_TU(&data->pred_cu) || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
predict_cclm(
state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst,
@ -1560,7 +1560,7 @@ void uvg_intra_predict(
tree_type);
}
else {
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width);
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height);
}
}
}

View file

@ -447,7 +447,7 @@ double uvg_get_coeff_cost(
} else {
double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu);
if (save_cccs) {
save_ccc(state->qp, coeff, width * width, ccc);
save_ccc(state->qp, coeff, width * height, ccc);
}
return ccc;
}
@ -1474,10 +1474,13 @@ void uvg_rdoq(
// Hope the compiler is able to utilize this information.
switch (cg_num) {
case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break;
case 2: FILL_ARRAY(sig_coeffgroup_flag, 0, 2); break;
case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break;
case 8: FILL_ARRAY(sig_coeffgroup_flag, 0, 8); break;
case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break;
case 32: FILL_ARRAY(sig_coeffgroup_flag, 0, 32); break;
case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break;
default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups");
default: assert(0 && "There should be 1, 2, 4, 8, 16, 32 or 64 coefficient groups");
}
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[color ? 2 : 0]);

View file

@ -252,15 +252,16 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
}
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu)
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu)
{
const uint32_t mask = ~((MIN(width, TR_MAX_WIDTH))-1);
const uint32_t x_mask = ~((MIN(width, TR_MAX_WIDTH))-1);
const uint32_t y_mask = ~((MIN(height, TR_MAX_WIDTH))-1);
// Set coeff flags in every CU covered by part_mode in this depth.
for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) {
for (uint32_t y = y_local; y < y_local + height; y += SCU_WIDTH) {
for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
// Use TU top-left CU to propagate coeff flags
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask);
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & x_mask, y & y_mask);
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y);
if (cu_from != cu_to) {
// Chroma and luma coeff data is needed for deblocking
@ -942,6 +943,7 @@ static double search_cu(
const int x = cu_loc->x;
const int y = cu_loc->y;
const int luma_width = cu_loc->width;
const int luma_height = cu_loc->height;
assert(cu_width >= 4);
double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE;
@ -1005,7 +1007,7 @@ static double search_cu(
// If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth.
if ( x + luma_width <= frame_width && y + luma_width <= frame_height)
if ( x + luma_width <= frame_width && y + luma_height <= frame_height)
{
int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max;
bool can_use_inter =
@ -1018,7 +1020,7 @@ static double search_cu(
// otherwise forbid it.
(x & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_width ||
(y & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_height
);
) && cu_loc->width == cu_loc->height; // Don't allow non square inter CUs for now
if (can_use_inter) {
double mode_cost;
@ -1148,7 +1150,7 @@ static double search_cu(
if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
recon_chroma = false;
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
uvg_intra_recon_cu(state,
&intra_search, cu_loc,
NULL, lcu,
@ -1195,7 +1197,7 @@ static double search_cu(
if(cbf_cr) cbf_set(&split_cu->cbf, COLOR_V);
split_cu->joint_cb_cr = jccr;
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
} else if (cur_cu->type == CU_INTER) {
@ -1238,7 +1240,7 @@ static double search_cu(
}
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
}
}
@ -1276,7 +1278,7 @@ static double search_cu(
// if (cur_cu->merged) {
// cur_cu->merged = 0;
// cur_cu->skipped = 1;
// lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
// lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
// }
// cur_cu->cbf = 0;
@ -1300,9 +1302,9 @@ static double search_cu(
// Recursively split all the way to max search depth.
if (can_split_cu) {
const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << (split_tree.current_depth * 3), split_tree.current_depth + 1 };
const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT;
const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1 };
int half_cu = cu_width >> (tree_type != UVG_CHROMA_T);
double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf);
cabac_data_t post_seach_cabac;
@ -1357,19 +1359,14 @@ static double search_cu(
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type);
cu_loc_t new_cu_loc[4];
uvg_get_split_locs(cu_loc, QT_SPLIT, new_cu_loc);
const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc);
for (int split = 0; split < splits; ++split) {
split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split);
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[0], &split_lcu, tree_type, new_split);
break;
}
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[1], &split_lcu, tree_type, new_split);
}
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[2], &split_lcu, tree_type, new_split);
}
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[3], &split_lcu, tree_type, new_split);
}
} else {
split_cost = INT_MAX;
}
@ -1410,7 +1407,7 @@ static double search_cu(
cur_cu->lfnst_idx = 0;
cur_cu->cr_lfnst_idx = 0;
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
intra_search_data_t proxy;
FILL(proxy, 0);
@ -1453,7 +1450,7 @@ static double search_cu(
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
}
}
else {
@ -1474,7 +1471,7 @@ static double search_cu(
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
}
}

View file

@ -2138,8 +2138,8 @@ void uvg_cu_cost_inter_rd2(
UVG_BOTH_T);
ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, height, LCU_WIDTH_C, width);
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, height, LCU_WIDTH_C, width);
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];

View file

@ -130,17 +130,31 @@ static INLINE uint8_t select_best_mode_index(const int8_t *modes, const double *
*
* \return
*/
static void get_cost_dual(encoder_state_t * const state,
const pred_buffer preds, const uvg_pixel *orig_block,
static void get_cost_dual(
encoder_state_t * const state,
const pred_buffer preds,
const uvg_pixel *orig_block,
cost_pixel_nxn_multi_func *satd_twin_func,
cost_pixel_nxn_multi_func *sad_twin_func,
int width, double *costs_out)
int width,
int height,
double *costs_out)
{
#define PARALLEL_BLKS 2
unsigned satd_costs[PARALLEL_BLKS] = { 0 };
if (satd_twin_func != NULL) {
satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs);
} else {
satd_costs[0] = uvg_satd_any_size(width, height, preds[0], width, orig_block, LCU_WIDTH);
satd_costs[1] = uvg_satd_any_size(width, height, preds[1], width, orig_block, LCU_WIDTH);
}
unsigned unsigned_sad_costs[PARALLEL_BLKS] = { 0 };
if (sad_twin_func != NULL) {
sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs);
} else {
unsigned_sad_costs[0] = uvg_reg_sad(preds[0], orig_block, width, height, width, LCU_WIDTH);
unsigned_sad_costs[1] = uvg_reg_sad(preds[1], orig_block, width, height, width, LCU_WIDTH);
}
costs_out[0] = (double)MIN(satd_costs[0], unsigned_sad_costs[0] * 2);
costs_out[1] = (double)MIN(satd_costs[1], unsigned_sad_costs[1] * 2);
@ -651,7 +665,7 @@ static int search_intra_chroma_rough(
uvg_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
uvg_pixels_blit(orig_u, orig_block, width, width, LCU_WIDTH_C, width);
uvg_pixels_blit(orig_u, orig_block, width, height, LCU_WIDTH_C, width);
int modes_count = (state->encoder_control->cfg.cclm ? 8 : 5);
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
@ -671,7 +685,7 @@ static int search_intra_chroma_rough(
}
}
uvg_pixels_blit(orig_v, orig_block, width, width, LCU_WIDTH_C, width);
uvg_pixels_blit(orig_v, orig_block, width, height, LCU_WIDTH_C, width);
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
@ -764,7 +778,7 @@ static int16_t search_intra_rough(
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
// Store original block for SAD computation
uvg_pixels_blit(orig, orig_block, width, width, origstride, width);
uvg_pixels_blit(orig, orig_block, width, height, origstride, width);
int8_t modes_selected = 0;
// Note: get_cost and get_cost_dual may return negative costs.
@ -783,7 +797,7 @@ static int16_t search_intra_rough(
// Calculate SAD for evenly spaced modes to select the starting point for
// the recursive search.
cu_loc_t loc = { 0, 0, width, width, width, width };
cu_loc_t loc = { 0, 0, width, height, width, height };
intra_search_data_t search_proxy;
FILL(search_proxy, 0);
search_proxy.pred_cu = *pred_cu;
@ -963,19 +977,19 @@ static uint8_t search_intra_rough(
uvg_pixel *orig,
int32_t origstride,
uvg_intra_references *refs,
int log2_width,
int width,
int height,
int8_t *intra_preds,
intra_search_data_t* modes_out,
cu_info_t* const pred_cu,
uint8_t mip_ctx)
{
#define PARALLEL_BLKS 2 // TODO: use 4 for AVX-512 in the future?
assert(log2_width >= 2 && log2_width <= 5);
int_fast8_t width = 1 << log2_width;
assert(width >= 4 && width <= 32);
// cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
// cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width);
cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width);
cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width, height);
cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width, height);
bool mode_checked[UVG_NUM_INTRA_MODES] = {0};
double costs[UVG_NUM_INTRA_MODES];
@ -990,7 +1004,7 @@ static uint8_t search_intra_rough(
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
// Store original block for SAD computation
uvg_pixels_blit(orig, orig_block, width, width, origstride, width);
uvg_pixels_blit(orig, orig_block, width, height, origstride, width);
int8_t modes_selected = 0;
// Note: get_cost and get_cost_dual may return negative costs.
@ -1016,17 +1030,16 @@ static uint8_t search_intra_rough(
// Calculate SAD for evenly spaced modes to select the starting point for
// the recursive search.
cu_loc_t loc = { 0, 0, width, width, width, width };
intra_search_data_t search_proxy;
FILL(search_proxy, 0);
search_proxy.pred_cu = *pred_cu;
int offset = 4;
search_proxy.pred_cu.intra.mode = 0;
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
search_proxy.pred_cu.intra.mode = 1;
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs);
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs);
mode_checked[0] = true;
mode_checked[1] = true;
costs[0] += count_bits(
@ -1075,12 +1088,12 @@ static uint8_t search_intra_rough(
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
search_proxy.pred_cu.intra.mode = mode + i*offset;
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
}
}
//TODO: add generic version of get cost multi
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
costs_out[i] += count_bits(
@ -1147,12 +1160,12 @@ static uint8_t search_intra_rough(
for (int block = 0; block < PARALLEL_BLKS; ++block) {
search_proxy.pred_cu.intra.mode = modes_to_check[block + i];
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
}
//TODO: add generic version of get cost multi
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
for (int block = 0; block < PARALLEL_BLKS; ++block) {
costs_out[block] += count_bits(
state,
@ -1219,12 +1232,9 @@ static void get_rough_cost_for_2n_modes(
const int height = cu_loc->height;
cost_pixel_nxn_multi_func* satd_dual_func;
cost_pixel_nxn_multi_func* sad_dual_func;
if (width == height) {
satd_dual_func = uvg_pixels_get_satd_dual_func(width);
sad_dual_func = uvg_pixels_get_sad_dual_func(width);
} else {
assert(false && "Joose promised to fix this.");
}
satd_dual_func = uvg_pixels_get_satd_dual_func(width, height);
sad_dual_func = uvg_pixels_get_sad_dual_func(width, height);
uvg_pixel _preds[PARALLEL_BLKS * MIN(LCU_WIDTH, 64)* MIN(LCU_WIDTH, 64)+ SIMD_ALIGNMENT];
pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT);
@ -1232,7 +1242,7 @@ static void get_rough_cost_for_2n_modes(
uvg_pixel _orig_block[MIN(LCU_WIDTH, 64) * MIN(LCU_WIDTH, 64) + SIMD_ALIGNMENT];
uvg_pixel* orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
uvg_pixels_blit(orig, orig_block, width, width, orig_stride, width);
uvg_pixels_blit(orig, orig_block, width, height, orig_stride, width);
const double mrl = state->encoder_control->cfg.mrl && (cu_loc->y % LCU_WIDTH) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 1) : 0;
const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0;
@ -1243,7 +1253,7 @@ static void get_rough_cost_for_2n_modes(
for (int i = 0; i < PARALLEL_BLKS; ++i) {
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T);
}
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
for(int i = 0; i < PARALLEL_BLKS; ++i) {
uint8_t multi_ref_idx = search_data[mode + i].pred_cu.intra.multi_ref_idx;
@ -1801,7 +1811,8 @@ void uvg_search_cu_intra(
ref_pixels,
LCU_WIDTH,
refs,
log2_width,
cu_loc->width,
cu_loc->height,
candidate_modes,
search_data,
&temp_pred_cu,

View file

@ -720,7 +720,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
}
// Check if there are any non-zero coefficients.
for (int i = 0; i < width * width; i += 8) {
for (int i = 0; i < width * height; i += 8) {
__m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i]));
has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff);
if(has_coeffs) break;
@ -730,7 +730,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
// rec_out.
if (has_coeffs && !early_skip) {
// Get quantized residual. (coeff_out -> coeff -> residual)
uvg_dequant(state, coeff_out, coeff, width, width, color,
uvg_dequant(state, coeff_out, coeff, width, height, color,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {

View file

@ -315,22 +315,22 @@ int uvg_quant_cbcr_residual_generic(
if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
{
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx);
}
else if (state->encoder_control->cfg.rdoq_enable && false) {
uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
scan_order);
}
else {
uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx);
}
int8_t has_coeffs = 0;
{
int i;
for (i = 0; i < width * width; ++i) {
for (i = 0; i < width * height; ++i) {
if (coeff_out[i] != 0) {
has_coeffs = 1;
break;
@ -341,10 +341,10 @@ int uvg_quant_cbcr_residual_generic(
if (has_coeffs && !early_skip) {
// Get quantized residual. (coeff_out -> coeff -> residual)
uvg_dequant(state, coeff_out, coeff, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
if (cur_cu->cr_lfnst_idx) {
uvg_inv_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
}
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);

View file

@ -108,13 +108,14 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
/**
* \brief Get a function that calculates SATD for NxN block.
*
* \param n Width of the region for which SATD is calculated.
* \param width Width of the region for which SATD is calculated.
*
* \returns Pointer to cost_16bit_nxn_func.
*/
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n)
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned width, unsigned height)
{
switch (n) {
if(width == height) {
switch (width) {
case 4:
return uvg_satd_4x4;
case 8:
@ -128,19 +129,22 @@ cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n)
default:
return NULL;
}
}
return NULL;
}
/**
* \brief Get a function that calculates SAD for NxN block.
*
* \param n Width of the region for which SAD is calculated.
* \param width Width of the region for which SAD is calculated.
*
* \returns Pointer to cost_16bit_nxn_func.
*/
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n)
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned width, unsigned height)
{
switch (n) {
if(width == height) {
switch (width) {
case 4:
return uvg_sad_4x4;
case 8:
@ -154,18 +158,22 @@ cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n)
default:
return NULL;
}
}
return NULL;
}
/**
* \brief Get a function that calculates SATDs for 2 NxN blocks.
*
* \param n Width of the region for which SATD is calculated.
* \param width Width of the region for which SATD is calculated.
* \param height Height of the region for which SATD is calculated.
*
* \returns Pointer to cost_pixel_nxn_multi_func.
*/
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n)
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height)
{
switch (n) {
if(width == height) {
switch (width) {
case 4:
return uvg_satd_4x4_dual;
case 8:
@ -179,19 +187,22 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n)
default:
return NULL;
}
}
return NULL;
}
/**
* \brief Get a function that calculates SADs for 2 NxN blocks.
*
* \param n Width of the region for which SAD is calculated.
* \param width Width of the region for which SAD is calculated.
*
* \returns Pointer to cost_pixel_nxn_multi_func.
*/
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height)
{
switch (n) {
if(width == height) {
switch (width) {
case 4:
return uvg_sad_4x4_dual;
case 8:
@ -205,4 +216,6 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
default:
return NULL;
}
}
return NULL;
}

View file

@ -194,8 +194,8 @@ extern pixel_var_func *uvg_pixel_var;
extern generate_residual_func* uvg_generate_residual;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height);
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height);
#define STRATEGIES_PICTURE_EXPORTS \
{"reg_sad", (void**) &uvg_reg_sad}, \

View file

@ -584,7 +584,7 @@ void uvg_chroma_transform_search(
if (is_jccr && !u_has_coeffs) continue;
if (u_has_coeffs) {
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
pred_cu->type, transforms[i] == CHROMA_TS);
if (transforms[i] != CHROMA_TS) {

View file

@ -542,6 +542,14 @@ typedef struct uvg_config
char* cabac_debug_file_name;
uint8_t dual_tree;
uint8_t min_qt_size[3];
uint8_t max_bt_size[3];
uint8_t max_tt_size[3];
uint8_t max_intra_slice_btt_depth;
uint8_t max_intra_slice_btt_depth_chroma;
uint8_t max_inter_slice_btt_depth;
} uvg_config;
/**

View file

@ -51,7 +51,7 @@ static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h,
uint32_t *buff_v,
int32_t width, int32_t height)
{
uint32_t num_scan_pos = width * width;
uint32_t num_scan_pos = width * height;
uint32_t next_scan_pos = 0;
int32_t xx, yy, x, y;
uint32_t scan_line;