[mtt] WIP

This commit is contained in:
Joose Sainio 2022-11-15 15:31:44 +02:00 committed by Marko Viitanen
parent 02a5adf768
commit bbbd391b9e
17 changed files with 225 additions and 214 deletions

View file

@ -317,7 +317,10 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
} }
void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4]) int uvg_get_split_locs(
const cu_loc_t* const origin,
enum split_type split,
cu_loc_t out[4])
{ {
const int half_width = origin->width >> 1; const int half_width = origin->width >> 1;
const int half_height = origin->height >> 1; const int half_height = origin->height >> 1;
@ -333,24 +336,25 @@ void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height); uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height);
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height); uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height);
uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height); uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height);
break; return 4;
case BT_HOR_SPLIT: case BT_HOR_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height); uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height);
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height); uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height);
break; return 2;
case BT_VER_SPLIT: case BT_VER_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height);
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height);
break; return 2;
case TT_HOR_SPLIT: case TT_HOR_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height); uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height);
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height); uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height);
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height); uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height);
break; return 3;
case TT_VER_SPLIT: case TT_VER_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height); uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height);
uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height);
uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height); uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height);
break; return 3;
} }
return 0;
} }

View file

@ -185,7 +185,10 @@ typedef struct {
void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height); void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height);
void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4]); int uvg_get_split_locs(
const cu_loc_t* const origin,
enum split_type split,
cu_loc_t out[4]);
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \ #define CU_GET_MV_CAND(cu_info_ptr, reflist) \
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1) (((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)

View file

@ -543,7 +543,7 @@ static void encode_transform_unit(
if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) { if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) {
cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma; cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma;
CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag"); CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag");
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0); DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, height, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
} }
if(cur_pu->tr_idx == MTS_SKIP) { if(cur_pu->tr_idx == MTS_SKIP) {
uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL); uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL);
@ -1040,7 +1040,7 @@ void uvg_encode_intra_luma_coding_unit(
int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0; int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0;
#ifdef UVG_DEBUG_PRINT_YUVIEW_CSV #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV
if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, width, multi_ref_idx); if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, height, multi_ref_idx);
#endif #endif
if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl && !mip_flag) { if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl && !mip_flag) {
@ -1222,7 +1222,7 @@ bool uvg_write_split_flag(
no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true; no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true;
if (depth > MAX_DEPTH) allow_qt = false; if (depth > MAX_DEPTH) allow_qt = false;
// ToDo: update this when btt is actually used // ToDo: update this when btt is actually used
bool allow_btt = false;// when mt_depth < MAX_BT_DEPTH bool allow_btt = true;// when mt_depth < MAX_BT_DEPTH
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
@ -1289,7 +1289,7 @@ bool uvg_write_split_flag(
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag"); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag");
} }
bool qt_split = split_flag == UVG_QUAD_SPLIT; bool qt_split = split_flag == QT_SPLIT;
if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) { if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) {
split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3); split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3);
@ -1384,28 +1384,16 @@ void uvg_encode_coding_tree(
const int half_luma = cu_loc->width / 2; const int half_luma = cu_loc->width / 2;
split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 }; split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 };
cu_loc_t new_cu_loc; cu_loc_t new_cu_loc[4];
uvg_cu_loc_ctor(&new_cu_loc, x, y, half_luma, half_luma); const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc);
// Split blocks and remember to change x and y block positions for (int split = 0; split <splits; ++split) {
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc[split], new_split_tree);
if (!border_x || border_split_x) {
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
if (!border_y || border_split_y) {
uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
if (!border || (border_split_x && border_split_y)) {
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
} }
return; return;
} }
} }
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1); DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, cur_cu->type-1);
if (ctrl->cfg.lossless) { if (ctrl->cfg.lossless) {
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
@ -1457,8 +1445,8 @@ void uvg_encode_coding_tree(
} }
} }
#ifdef UVG_DEBUG_PRINT_YUVIEW_CSV #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV
if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]);
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
#endif #endif
goto end; goto end;

View file

@ -528,48 +528,31 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
WRITE_UE(stream, MIN_SIZE-2, "log2_min_luma_coding_block_size_minus2"); // Min size 2^3 = 8x8 WRITE_UE(stream, MIN_SIZE-2, "log2_min_luma_coding_block_size_minus2"); // Min size 2^3 = 8x8
// if(!no_partition_constraints_override_constraint_flag) // if(!no_partition_constraints_override_constraint_flag)
WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag"); WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag");
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_luma"); WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_luma"); WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
if (encoder->cfg.max_intra_slice_btt_depth) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
}
if (encoder->chroma_format != UVG_CSP_400) if (encoder->chroma_format != UVG_CSP_400)
{ {
WRITE_U(stream, encoder->cfg.dual_tree, 1, "qtbtt_dual_tree_intra_flag"); WRITE_U(stream, encoder->cfg.dual_tree, 1, "qtbtt_dual_tree_intra_flag");
} }
if (encoder->cfg.dual_tree) { if (encoder->cfg.dual_tree) {
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma"); WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_chroma"); WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth_chroma, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
if (0 /*sps_max_mtt_hierarchy_depth_intra_slice_chroma != 0*/) { if (encoder->cfg.max_intra_slice_btt_depth_chroma) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma"); WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma"); WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
} }
} }
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_inter_slice"); WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_inter_slice"); WRITE_UE(stream, encoder->cfg.max_inter_slice_btt_depth, "sps_max_mtt_hierarchy_depth_inter_slice");
if (encoder->cfg.max_inter_slice_btt_depth != 0) {
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
#if 0 // mtt depth intra WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
if (max_mtt_depth_intra != 0) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma");
} }
#endif
#if 0 // mtt depth inter
if (max_mtt_depth_inter != 0) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_inter_tile_group");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_inter_tile_group");
}
#endif
#if 0 // Dual Tree
if (encoder->cfg.dual_i_tree) {
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma");
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma");
if (max_mtt_depth_intra != 0) {
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma");
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma");
}
}
#endif
if (LCU_WIDTH > 32) if (LCU_WIDTH > 32)
WRITE_U(stream, (TR_MAX_LOG2_SIZE - 5) ? 1 : 0, 1, "sps_max_luma_transform_size_64_flag"); WRITE_U(stream, (TR_MAX_LOG2_SIZE - 5) ? 1 : 0, 1, "sps_max_luma_transform_size_64_flag");

View file

@ -129,7 +129,7 @@ typedef int16_t coeff_t;
typedef int32_t mv_t; typedef int32_t mv_t;
//#define VERBOSE 1 //#define VERBOSE 1
//#define UVG_DEBUG_PRINT_CABAC 1 #define UVG_DEBUG_PRINT_CABAC 1
//#define UVG_DEBUG 1 //#define UVG_DEBUG 1
//#define UVG_DEBUG_PRINT_YUVIEW_CSV 1 //#define UVG_DEBUG_PRINT_YUVIEW_CSV 1

View file

@ -1552,7 +1552,7 @@ void uvg_intra_predict(
} }
} }
else { else {
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width); uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, height, stride / 2, width);
if (!PU_IS_TU(&data->pred_cu) || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) { if (!PU_IS_TU(&data->pred_cu) || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
predict_cclm( predict_cclm(
state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst, state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst,
@ -1560,7 +1560,7 @@ void uvg_intra_predict(
tree_type); tree_type);
} }
else { else {
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width); linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height);
} }
} }
} }

View file

@ -447,7 +447,7 @@ double uvg_get_coeff_cost(
} else { } else {
double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu); double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu);
if (save_cccs) { if (save_cccs) {
save_ccc(state->qp, coeff, width * width, ccc); save_ccc(state->qp, coeff, width * height, ccc);
} }
return ccc; return ccc;
} }
@ -1474,10 +1474,13 @@ void uvg_rdoq(
// Hope the compiler is able to utilize this information. // Hope the compiler is able to utilize this information.
switch (cg_num) { switch (cg_num) {
case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break; case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break;
case 2: FILL_ARRAY(sig_coeffgroup_flag, 0, 2); break;
case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break; case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break;
case 8: FILL_ARRAY(sig_coeffgroup_flag, 0, 8); break;
case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break; case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break;
case 32: FILL_ARRAY(sig_coeffgroup_flag, 0, 32); break;
case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break; case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break;
default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups"); default: assert(0 && "There should be 1, 2, 4, 8, 16, 32 or 64 coefficient groups");
} }
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[color ? 2 : 0]); cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[color ? 2 : 0]);

View file

@ -253,15 +253,16 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
} }
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu) static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu)
{ {
const uint32_t mask = ~((MIN(width, TR_MAX_WIDTH))-1); const uint32_t x_mask = ~((MIN(width, TR_MAX_WIDTH))-1);
const uint32_t y_mask = ~((MIN(height, TR_MAX_WIDTH))-1);
// Set coeff flags in every CU covered by part_mode in this depth. // Set coeff flags in every CU covered by part_mode in this depth.
for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) { for (uint32_t y = y_local; y < y_local + height; y += SCU_WIDTH) {
for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) { for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
// Use TU top-left CU to propagate coeff flags // Use TU top-left CU to propagate coeff flags
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask); cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & x_mask, y & y_mask);
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y); cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y);
if (cu_from != cu_to) { if (cu_from != cu_to) {
// Chroma and luma coeff data is needed for deblocking // Chroma and luma coeff data is needed for deblocking
@ -943,6 +944,7 @@ static double search_cu(
const int x = cu_loc->x; const int x = cu_loc->x;
const int y = cu_loc->y; const int y = cu_loc->y;
const int luma_width = cu_loc->width; const int luma_width = cu_loc->width;
const int luma_height = cu_loc->height;
assert(cu_width >= 4); assert(cu_width >= 4);
double cost = MAX_DOUBLE; double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE; double inter_zero_coeff_cost = MAX_DOUBLE;
@ -1009,7 +1011,7 @@ static double search_cu(
// If the CU is completely inside the frame at this depth, search for // If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth. // prediction modes at this depth.
if ( x + luma_width <= frame_width && y + luma_width <= frame_height) if ( x + luma_width <= frame_width && y + luma_height <= frame_height)
{ {
int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max; int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max;
bool can_use_inter = bool can_use_inter =
@ -1022,7 +1024,7 @@ static double search_cu(
// otherwise forbid it. // otherwise forbid it.
(x & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_width || (x & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_width ||
(y & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_height (y & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_height
); ) && cu_loc->width == cu_loc->height; // Don't allow non square inter CUs for now
if (can_use_inter) { if (can_use_inter) {
double mode_cost; double mode_cost;
@ -1179,7 +1181,7 @@ static double search_cu(
if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
recon_chroma = false; recon_chroma = false;
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
uvg_intra_recon_cu(state, uvg_intra_recon_cu(state,
&intra_search, cu_loc, &intra_search, cu_loc,
NULL, lcu, NULL, lcu,
@ -1226,7 +1228,7 @@ static double search_cu(
if(cbf_cr) cbf_set(&split_cu->cbf, COLOR_V); if(cbf_cr) cbf_set(&split_cu->cbf, COLOR_V);
split_cu->joint_cb_cr = jccr; split_cu->joint_cb_cr = jccr;
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
} else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { } else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
@ -1269,7 +1271,7 @@ static double search_cu(
} }
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); lcu_fill_cbf(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
} }
} }
@ -1308,7 +1310,7 @@ static double search_cu(
// if (cur_cu->merged) { // if (cur_cu->merged) {
// cur_cu->merged = 0; // cur_cu->merged = 0;
// cur_cu->skipped = 1; // cur_cu->skipped = 1;
// lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); // lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
// } // }
// cur_cu->cbf = 0; // cur_cu->cbf = 0;
@ -1332,9 +1334,9 @@ static double search_cu(
// Recursively split all the way to max search depth. // Recursively split all the way to max search depth.
if (can_split_cu) { if (can_split_cu) {
const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << (split_tree.current_depth * 3), split_tree.current_depth + 1 }; const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT;
const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1 };
int half_cu = cu_width >> (tree_type != UVG_CHROMA_T);
double split_cost = 0.0; double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf); int cbf = cbf_is_set_any(cur_cu->cbf);
cabac_data_t post_seach_cabac; cabac_data_t post_seach_cabac;
@ -1389,19 +1391,14 @@ static double search_cu(
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type); initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type);
cu_loc_t new_cu_loc[4]; cu_loc_t new_cu_loc[4];
uvg_get_split_locs(cu_loc, QT_SPLIT, new_cu_loc); const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc);
for (int split = 0; split < splits; ++split) {
split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split);
if (split_cost < cost) { if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[0], &split_lcu, tree_type, new_split); break;
} }
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[1], &split_lcu, tree_type, new_split);
}
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[2], &split_lcu, tree_type, new_split);
}
if (split_cost < cost) {
split_cost += search_cu(state, &new_cu_loc[3], &split_lcu, tree_type, new_split);
} }
} else { } else {
split_cost = INT_MAX; split_cost = INT_MAX;
} }
@ -1442,7 +1439,7 @@ static double search_cu(
cur_cu->lfnst_idx = 0; cur_cu->lfnst_idx = 0;
cur_cu->cr_lfnst_idx = 0; cur_cu->cr_lfnst_idx = 0;
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
intra_search_data_t proxy; intra_search_data_t proxy;
FILL(proxy, 0); FILL(proxy, 0);
@ -1492,7 +1489,7 @@ static double search_cu(
} }
// Add candidate when in inter slice or ibc is enabled // Add candidate when in inter slice or ibc is enabled
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) { if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu); uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
} }
} }
else { else {
@ -1520,7 +1517,7 @@ static double search_cu(
} }
// Add candidate when in inter slice or ibc is enabled // Add candidate when in inter slice or ibc is enabled
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) { if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu); uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
} }
} }

View file

@ -2187,8 +2187,8 @@ void uvg_cu_cost_inter_rd2(
UVG_BOTH_T); UVG_BOTH_T);
ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width); uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, height, LCU_WIDTH_C, width);
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width); uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, height, LCU_WIDTH_C, width);
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];

View file

@ -130,17 +130,31 @@ static INLINE uint8_t select_best_mode_index(const int8_t *modes, const double *
* *
* \return * \return
*/ */
static void get_cost_dual(encoder_state_t * const state, static void get_cost_dual(
const pred_buffer preds, const uvg_pixel *orig_block, encoder_state_t * const state,
const pred_buffer preds,
const uvg_pixel *orig_block,
cost_pixel_nxn_multi_func *satd_twin_func, cost_pixel_nxn_multi_func *satd_twin_func,
cost_pixel_nxn_multi_func *sad_twin_func, cost_pixel_nxn_multi_func *sad_twin_func,
int width, double *costs_out) int width,
int height,
double *costs_out)
{ {
#define PARALLEL_BLKS 2 #define PARALLEL_BLKS 2
unsigned satd_costs[PARALLEL_BLKS] = { 0 }; unsigned satd_costs[PARALLEL_BLKS] = { 0 };
if (satd_twin_func != NULL) {
satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs); satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs);
} else {
satd_costs[0] = uvg_satd_any_size(width, height, preds[0], width, orig_block, LCU_WIDTH);
satd_costs[1] = uvg_satd_any_size(width, height, preds[1], width, orig_block, LCU_WIDTH);
}
unsigned unsigned_sad_costs[PARALLEL_BLKS] = { 0 }; unsigned unsigned_sad_costs[PARALLEL_BLKS] = { 0 };
if (sad_twin_func != NULL) {
sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs); sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs);
} else {
unsigned_sad_costs[0] = uvg_reg_sad(preds[0], orig_block, width, height, width, LCU_WIDTH);
unsigned_sad_costs[1] = uvg_reg_sad(preds[1], orig_block, width, height, width, LCU_WIDTH);
}
costs_out[0] = (double)MIN(satd_costs[0], unsigned_sad_costs[0] * 2); costs_out[0] = (double)MIN(satd_costs[0], unsigned_sad_costs[0] * 2);
costs_out[1] = (double)MIN(satd_costs[1], unsigned_sad_costs[1] * 2); costs_out[1] = (double)MIN(satd_costs[1], unsigned_sad_costs[1] * 2);
@ -651,7 +665,7 @@ static int search_intra_chroma_rough(
uvg_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT]; uvg_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
uvg_pixels_blit(orig_u, orig_block, width, width, LCU_WIDTH_C, width); uvg_pixels_blit(orig_u, orig_block, width, height, LCU_WIDTH_C, width);
int modes_count = (state->encoder_control->cfg.cclm ? 8 : 5); int modes_count = (state->encoder_control->cfg.cclm ? 8 : 5);
for (int i = 0; i < modes_count; ++i) { for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
@ -671,7 +685,7 @@ static int search_intra_chroma_rough(
} }
} }
uvg_pixels_blit(orig_v, orig_block, width, width, LCU_WIDTH_C, width); uvg_pixels_blit(orig_v, orig_block, width, height, LCU_WIDTH_C, width);
for (int i = 0; i < modes_count; ++i) { for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
@ -764,7 +778,7 @@ static int16_t search_intra_rough(
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
// Store original block for SAD computation // Store original block for SAD computation
uvg_pixels_blit(orig, orig_block, width, width, origstride, width); uvg_pixels_blit(orig, orig_block, width, height, origstride, width);
int8_t modes_selected = 0; int8_t modes_selected = 0;
// Note: get_cost and get_cost_dual may return negative costs. // Note: get_cost and get_cost_dual may return negative costs.
@ -783,7 +797,7 @@ static int16_t search_intra_rough(
// Calculate SAD for evenly spaced modes to select the starting point for // Calculate SAD for evenly spaced modes to select the starting point for
// the recursive search. // the recursive search.
cu_loc_t loc = { 0, 0, width, width, width, width }; cu_loc_t loc = { 0, 0, width, height, width, height };
intra_search_data_t search_proxy; intra_search_data_t search_proxy;
FILL(search_proxy, 0); FILL(search_proxy, 0);
search_proxy.pred_cu = *pred_cu; search_proxy.pred_cu = *pred_cu;
@ -963,19 +977,19 @@ static uint8_t search_intra_rough(
uvg_pixel *orig, uvg_pixel *orig,
int32_t origstride, int32_t origstride,
uvg_intra_references *refs, uvg_intra_references *refs,
int log2_width, int width,
int height,
int8_t *intra_preds, int8_t *intra_preds,
intra_search_data_t* modes_out, intra_search_data_t* modes_out,
cu_info_t* const pred_cu, cu_info_t* const pred_cu,
uint8_t mip_ctx) uint8_t mip_ctx)
{ {
#define PARALLEL_BLKS 2 // TODO: use 4 for AVX-512 in the future? #define PARALLEL_BLKS 2 // TODO: use 4 for AVX-512 in the future?
assert(log2_width >= 2 && log2_width <= 5); assert(width >= 4 && width <= 32);
int_fast8_t width = 1 << log2_width;
// cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width); // cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
// cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width); // cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width); cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width, height);
cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width); cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width, height);
bool mode_checked[UVG_NUM_INTRA_MODES] = {0}; bool mode_checked[UVG_NUM_INTRA_MODES] = {0};
double costs[UVG_NUM_INTRA_MODES]; double costs[UVG_NUM_INTRA_MODES];
@ -990,7 +1004,7 @@ static uint8_t search_intra_rough(
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
// Store original block for SAD computation // Store original block for SAD computation
uvg_pixels_blit(orig, orig_block, width, width, origstride, width); uvg_pixels_blit(orig, orig_block, width, height, origstride, width);
int8_t modes_selected = 0; int8_t modes_selected = 0;
// Note: get_cost and get_cost_dual may return negative costs. // Note: get_cost and get_cost_dual may return negative costs.
@ -1016,17 +1030,16 @@ static uint8_t search_intra_rough(
// Calculate SAD for evenly spaced modes to select the starting point for // Calculate SAD for evenly spaced modes to select the starting point for
// the recursive search. // the recursive search.
cu_loc_t loc = { 0, 0, width, width, width, width };
intra_search_data_t search_proxy; intra_search_data_t search_proxy;
FILL(search_proxy, 0); FILL(search_proxy, 0);
search_proxy.pred_cu = *pred_cu; search_proxy.pred_cu = *pred_cu;
int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels; int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels;
search_proxy.pred_cu.intra.mode = 0; search_proxy.pred_cu.intra.mode = 0;
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T); uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
search_proxy.pred_cu.intra.mode = 1; search_proxy.pred_cu.intra.mode = 1;
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T); uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs); get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs);
mode_checked[0] = true; mode_checked[0] = true;
mode_checked[1] = true; mode_checked[1] = true;
costs[0] += count_bits( costs[0] += count_bits(
@ -1075,12 +1088,12 @@ static uint8_t search_intra_rough(
for (int i = 0; i < PARALLEL_BLKS; ++i) { for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) { if (mode + i * offset <= 66) {
search_proxy.pred_cu.intra.mode = mode + i*offset; search_proxy.pred_cu.intra.mode = mode + i*offset;
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T); uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
} }
} }
//TODO: add generic version of get cost multi //TODO: add generic version of get cost multi
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out); get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
for (int i = 0; i < PARALLEL_BLKS; ++i) { for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) { if (mode + i * offset <= 66) {
costs_out[i] += count_bits( costs_out[i] += count_bits(
@ -1147,12 +1160,12 @@ static uint8_t search_intra_rough(
for (int block = 0; block < PARALLEL_BLKS; ++block) { for (int block = 0; block < PARALLEL_BLKS; ++block) {
search_proxy.pred_cu.intra.mode = modes_to_check[block + i]; search_proxy.pred_cu.intra.mode = modes_to_check[block + i];
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T); uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
} }
//TODO: add generic version of get cost multi //TODO: add generic version of get cost multi
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out); get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
for (int block = 0; block < PARALLEL_BLKS; ++block) { for (int block = 0; block < PARALLEL_BLKS; ++block) {
costs_out[block] += count_bits( costs_out[block] += count_bits(
state, state,
@ -1219,12 +1232,9 @@ static void get_rough_cost_for_2n_modes(
const int height = cu_loc->height; const int height = cu_loc->height;
cost_pixel_nxn_multi_func* satd_dual_func; cost_pixel_nxn_multi_func* satd_dual_func;
cost_pixel_nxn_multi_func* sad_dual_func; cost_pixel_nxn_multi_func* sad_dual_func;
if (width == height) { satd_dual_func = uvg_pixels_get_satd_dual_func(width, height);
satd_dual_func = uvg_pixels_get_satd_dual_func(width); sad_dual_func = uvg_pixels_get_sad_dual_func(width, height);
sad_dual_func = uvg_pixels_get_sad_dual_func(width);
} else {
assert(false && "Joose promised to fix this.");
}
uvg_pixel _preds[PARALLEL_BLKS * MIN(LCU_WIDTH, 64)* MIN(LCU_WIDTH, 64)+ SIMD_ALIGNMENT]; uvg_pixel _preds[PARALLEL_BLKS * MIN(LCU_WIDTH, 64)* MIN(LCU_WIDTH, 64)+ SIMD_ALIGNMENT];
pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT); pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT);
@ -1232,7 +1242,7 @@ static void get_rough_cost_for_2n_modes(
uvg_pixel _orig_block[MIN(LCU_WIDTH, 64) * MIN(LCU_WIDTH, 64) + SIMD_ALIGNMENT]; uvg_pixel _orig_block[MIN(LCU_WIDTH, 64) * MIN(LCU_WIDTH, 64) + SIMD_ALIGNMENT];
uvg_pixel* orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); uvg_pixel* orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
uvg_pixels_blit(orig, orig_block, width, width, orig_stride, width); uvg_pixels_blit(orig, orig_block, width, height, orig_stride, width);
const double mrl = state->encoder_control->cfg.mrl && (cu_loc->y % LCU_WIDTH) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 1) : 0; const double mrl = state->encoder_control->cfg.mrl && (cu_loc->y % LCU_WIDTH) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 1) : 0;
const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0; const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0;
@ -1243,7 +1253,7 @@ static void get_rough_cost_for_2n_modes(
for (int i = 0; i < PARALLEL_BLKS; ++i) { for (int i = 0; i < PARALLEL_BLKS; ++i) {
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T); uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T);
} }
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out); get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
for(int i = 0; i < PARALLEL_BLKS; ++i) { for(int i = 0; i < PARALLEL_BLKS; ++i) {
uint8_t multi_ref_idx = search_data[mode + i].pred_cu.intra.multi_ref_idx; uint8_t multi_ref_idx = search_data[mode + i].pred_cu.intra.multi_ref_idx;
@ -1801,7 +1811,8 @@ void uvg_search_cu_intra(
ref_pixels, ref_pixels,
LCU_WIDTH, LCU_WIDTH,
refs, refs,
log2_width, cu_loc->width,
cu_loc->height,
candidate_modes, candidate_modes,
search_data, search_data,
&temp_pred_cu, &temp_pred_cu,

View file

@ -720,7 +720,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
} }
// Check if there are any non-zero coefficients. // Check if there are any non-zero coefficients.
for (int i = 0; i < width * width; i += 8) { for (int i = 0; i < width * height; i += 8) {
__m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i])); __m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i]));
has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff); has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff);
if(has_coeffs) break; if(has_coeffs) break;
@ -730,7 +730,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
// rec_out. // rec_out.
if (has_coeffs && !early_skip) { if (has_coeffs && !early_skip) {
// Get quantized residual. (coeff_out -> coeff -> residual) // Get quantized residual. (coeff_out -> coeff -> residual)
uvg_dequant(state, coeff_out, coeff, width, width, color, uvg_dequant(state, coeff_out, coeff, width, height, color,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y); cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {

View file

@ -315,22 +315,22 @@ int uvg_quant_cbcr_residual_generic(
if (state->encoder_control->cfg.rdoq_enable && if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip)) (width > 4 || !state->encoder_control->cfg.rdoq_skip))
{ {
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx); scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx);
} }
else if (state->encoder_control->cfg.rdoq_enable && false) { else if (state->encoder_control->cfg.rdoq_enable && false) {
uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U, uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
scan_order); scan_order);
} }
else { else {
uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx); scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx);
} }
int8_t has_coeffs = 0; int8_t has_coeffs = 0;
{ {
int i; int i;
for (i = 0; i < width * width; ++i) { for (i = 0; i < width * height; ++i) {
if (coeff_out[i] != 0) { if (coeff_out[i] != 0) {
has_coeffs = 1; has_coeffs = 1;
break; break;
@ -341,10 +341,10 @@ int uvg_quant_cbcr_residual_generic(
if (has_coeffs && !early_skip) { if (has_coeffs && !early_skip) {
// Get quantized residual. (coeff_out -> coeff -> residual) // Get quantized residual. (coeff_out -> coeff -> residual)
uvg_dequant(state, coeff_out, coeff, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false); cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
if (cur_cu->cr_lfnst_idx) { if (cur_cu->cr_lfnst_idx) {
uvg_inv_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type); uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
} }
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu); uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);

View file

@ -115,13 +115,14 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
/** /**
* \brief Get a function that calculates SATD for NxN block. * \brief Get a function that calculates SATD for NxN block.
* *
* \param n Width of the region for which SATD is calculated. * \param width Width of the region for which SATD is calculated.
* *
* \returns Pointer to cost_16bit_nxn_func. * \returns Pointer to cost_16bit_nxn_func.
*/ */
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n) cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned width, unsigned height)
{ {
switch (n) { if(width == height) {
switch (width) {
case 4: case 4:
return uvg_satd_4x4; return uvg_satd_4x4;
case 8: case 8:
@ -136,18 +137,21 @@ cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n)
return NULL; return NULL;
} }
} }
return NULL;
}
/** /**
* \brief Get a function that calculates SAD for NxN block. * \brief Get a function that calculates SAD for NxN block.
* *
* \param n Width of the region for which SAD is calculated. * \param width Width of the region for which SAD is calculated.
* *
* \returns Pointer to cost_16bit_nxn_func. * \returns Pointer to cost_16bit_nxn_func.
*/ */
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n) cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned width, unsigned height)
{ {
switch (n) { if(width == height) {
switch (width) {
case 4: case 4:
return uvg_sad_4x4; return uvg_sad_4x4;
case 8: case 8:
@ -162,17 +166,21 @@ cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n)
return NULL; return NULL;
} }
} }
return NULL;
}
/** /**
* \brief Get a function that calculates SATDs for 2 NxN blocks. * \brief Get a function that calculates SATDs for 2 NxN blocks.
* *
* \param n Width of the region for which SATD is calculated. * \param width Width of the region for which SATD is calculated.
* \param height Height of the region for which SATD is calculated.
* *
* \returns Pointer to cost_pixel_nxn_multi_func. * \returns Pointer to cost_pixel_nxn_multi_func.
*/ */
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n) cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height)
{ {
switch (n) { if(width == height) {
switch (width) {
case 4: case 4:
return uvg_satd_4x4_dual; return uvg_satd_4x4_dual;
case 8: case 8:
@ -187,18 +195,21 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n)
return NULL; return NULL;
} }
} }
return NULL;
}
/** /**
* \brief Get a function that calculates SADs for 2 NxN blocks. * \brief Get a function that calculates SADs for 2 NxN blocks.
* *
* \param n Width of the region for which SAD is calculated. * \param width Width of the region for which SAD is calculated.
* *
* \returns Pointer to cost_pixel_nxn_multi_func. * \returns Pointer to cost_pixel_nxn_multi_func.
*/ */
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n) cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height)
{ {
switch (n) { if(width == height) {
switch (width) {
case 4: case 4:
return uvg_sad_4x4_dual; return uvg_sad_4x4_dual;
case 8: case 8:
@ -213,6 +224,8 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
return NULL; return NULL;
} }
} }
return NULL;
}
// Precomputed CRC32C lookup table for polynomial 0x04C11DB7 // Precomputed CRC32C lookup table for polynomial 0x04C11DB7
const uint32_t uvg_crc_table[256] = { const uint32_t uvg_crc_table[256] = {

View file

@ -203,8 +203,8 @@ extern pixel_var_func *uvg_pixel_var;
extern generate_residual_func* uvg_generate_residual; extern generate_residual_func* uvg_generate_residual;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth); int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n); cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height);
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n); cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height);
#define STRATEGIES_PICTURE_EXPORTS \ #define STRATEGIES_PICTURE_EXPORTS \
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \ {"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \

View file

@ -584,7 +584,7 @@ void uvg_chroma_transform_search(
if (is_jccr && !u_has_coeffs) continue; if (is_jccr && !u_has_coeffs) continue;
if (u_has_coeffs) { if (u_has_coeffs) {
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
pred_cu->type, transforms[i] == CHROMA_TS); pred_cu->type, transforms[i] == CHROMA_TS);
if (transforms[i] != CHROMA_TS) { if (transforms[i] != CHROMA_TS) {

View file

@ -543,9 +543,18 @@ typedef struct uvg_config
uint8_t dual_tree; uint8_t dual_tree;
uint8_t min_qt_size[3];
uint8_t max_bt_size[3];
uint8_t max_tt_size[3];
uint8_t max_intra_slice_btt_depth;
uint8_t max_intra_slice_btt_depth_chroma;
uint8_t max_inter_slice_btt_depth;
uint8_t intra_rough_search_levels; uint8_t intra_rough_search_levels;
uint8_t ibc; /* \brief Intra Block Copy parameter */ uint8_t ibc; /* \brief Intra Block Copy parameter */
} uvg_config; } uvg_config;
/** /**

View file

@ -51,7 +51,7 @@ static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h,
uint32_t *buff_v, uint32_t *buff_v,
int32_t width, int32_t height) int32_t width, int32_t height)
{ {
uint32_t num_scan_pos = width * width; uint32_t num_scan_pos = width * height;
uint32_t next_scan_pos = 0; uint32_t next_scan_pos = 0;
int32_t xx, yy, x, y; int32_t xx, yy, x, y;
uint32_t scan_line; uint32_t scan_line;