mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
[mtt] WIP
This commit is contained in:
parent
02a5adf768
commit
bbbd391b9e
16
src/cu.c
16
src/cu.c
|
@ -317,7 +317,10 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
|
|||
}
|
||||
|
||||
|
||||
void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4])
|
||||
int uvg_get_split_locs(
|
||||
const cu_loc_t* const origin,
|
||||
enum split_type split,
|
||||
cu_loc_t out[4])
|
||||
{
|
||||
const int half_width = origin->width >> 1;
|
||||
const int half_height = origin->height >> 1;
|
||||
|
@ -333,24 +336,25 @@ void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_
|
|||
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height);
|
||||
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height);
|
||||
uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height);
|
||||
break;
|
||||
return 4;
|
||||
case BT_HOR_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height);
|
||||
break;
|
||||
return 2;
|
||||
case BT_VER_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height);
|
||||
break;
|
||||
return 2;
|
||||
case TT_HOR_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height);
|
||||
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height);
|
||||
break;
|
||||
return 3;
|
||||
case TT_VER_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height);
|
||||
uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height);
|
||||
break;
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
5
src/cu.h
5
src/cu.h
|
@ -185,7 +185,10 @@ typedef struct {
|
|||
|
||||
void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height);
|
||||
|
||||
void uvg_get_split_locs(const cu_loc_t* const origin, enum split_type split, cu_loc_t out[4]);
|
||||
int uvg_get_split_locs(
|
||||
const cu_loc_t* const origin,
|
||||
enum split_type split,
|
||||
cu_loc_t out[4]);
|
||||
|
||||
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
|
||||
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)
|
||||
|
|
|
@ -543,7 +543,7 @@ static void encode_transform_unit(
|
|||
if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) {
|
||||
cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma;
|
||||
CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag");
|
||||
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
|
||||
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, height, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
|
||||
}
|
||||
if(cur_pu->tr_idx == MTS_SKIP) {
|
||||
uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL);
|
||||
|
@ -1040,7 +1040,7 @@ void uvg_encode_intra_luma_coding_unit(
|
|||
int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0;
|
||||
|
||||
#ifdef UVG_DEBUG_PRINT_YUVIEW_CSV
|
||||
if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, width, multi_ref_idx);
|
||||
if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, height, multi_ref_idx);
|
||||
#endif
|
||||
|
||||
if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl && !mip_flag) {
|
||||
|
@ -1222,7 +1222,7 @@ bool uvg_write_split_flag(
|
|||
no_split = allow_qt = bh_split = bv_split = th_split = tv_split = true;
|
||||
if (depth > MAX_DEPTH) allow_qt = false;
|
||||
// ToDo: update this when btt is actually used
|
||||
bool allow_btt = false;// when mt_depth < MAX_BT_DEPTH
|
||||
bool allow_btt = true;// when mt_depth < MAX_BT_DEPTH
|
||||
|
||||
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
|
||||
|
@ -1289,7 +1289,7 @@ bool uvg_write_split_flag(
|
|||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), split_flag != 0, bits, "split_flag");
|
||||
}
|
||||
|
||||
bool qt_split = split_flag == UVG_QUAD_SPLIT;
|
||||
bool qt_split = split_flag == QT_SPLIT;
|
||||
|
||||
if (!(implicit_split_mode == UVG_NO_SPLIT) && (allow_qt && allow_btt)) {
|
||||
split_model = (left_cu && GET_SPLITDATA(left_cu, depth)) + (above_cu && GET_SPLITDATA(above_cu, depth)) + (depth < 2 ? 0 : 3);
|
||||
|
@ -1384,28 +1384,16 @@ void uvg_encode_coding_tree(
|
|||
const int half_luma = cu_loc->width / 2;
|
||||
split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 };
|
||||
|
||||
cu_loc_t new_cu_loc;
|
||||
uvg_cu_loc_ctor(&new_cu_loc, x, y, half_luma, half_luma);
|
||||
// Split blocks and remember to change x and y block positions
|
||||
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
|
||||
|
||||
if (!border_x || border_split_x) {
|
||||
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_luma, half_luma);
|
||||
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
|
||||
}
|
||||
if (!border_y || border_split_y) {
|
||||
uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_luma, half_luma);
|
||||
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
|
||||
}
|
||||
if (!border || (border_split_x && border_split_y)) {
|
||||
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_luma, half_luma);
|
||||
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
|
||||
cu_loc_t new_cu_loc[4];
|
||||
const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc);
|
||||
for (int split = 0; split <splits; ++split) {
|
||||
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc[split], new_split_tree);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1);
|
||||
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, cur_cu->type-1);
|
||||
|
||||
if (ctrl->cfg.lossless) {
|
||||
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
|
||||
|
@ -1457,8 +1445,8 @@ void uvg_encode_coding_tree(
|
|||
}
|
||||
}
|
||||
#ifdef UVG_DEBUG_PRINT_YUVIEW_CSV
|
||||
if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]);
|
||||
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_width, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
|
||||
if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]);
|
||||
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
|
||||
#endif
|
||||
|
||||
goto end;
|
||||
|
|
|
@ -528,48 +528,31 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
|||
WRITE_UE(stream, MIN_SIZE-2, "log2_min_luma_coding_block_size_minus2"); // Min size 2^3 = 8x8
|
||||
// if(!no_partition_constraints_override_constraint_flag)
|
||||
WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
|
||||
WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
|
||||
if (encoder->cfg.max_intra_slice_btt_depth) {
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
|
||||
}
|
||||
|
||||
if (encoder->chroma_format != UVG_CSP_400)
|
||||
{
|
||||
WRITE_U(stream, encoder->cfg.dual_tree, 1, "qtbtt_dual_tree_intra_flag");
|
||||
}
|
||||
if (encoder->cfg.dual_tree) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
|
||||
if (0 /*sps_max_mtt_hierarchy_depth_intra_slice_chroma != 0*/) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
|
||||
WRITE_UE(stream, encoder->cfg.max_intra_slice_btt_depth_chroma, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
|
||||
if (encoder->cfg.max_intra_slice_btt_depth_chroma) {
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
|
||||
}
|
||||
}
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_inter_slice");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_inter_slice");
|
||||
|
||||
|
||||
#if 0 // mtt depth intra
|
||||
if (max_mtt_depth_intra != 0) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice");
|
||||
WRITE_UE(stream, encoder->cfg.max_inter_slice_btt_depth, "sps_max_mtt_hierarchy_depth_inter_slice");
|
||||
if (encoder->cfg.max_inter_slice_btt_depth != 0) {
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
|
||||
}
|
||||
#endif
|
||||
#if 0 // mtt depth inter
|
||||
if (max_mtt_depth_inter != 0) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_inter_tile_group");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_inter_tile_group");
|
||||
}
|
||||
#endif
|
||||
#if 0 // Dual Tree
|
||||
if (encoder->cfg.dual_i_tree) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma");
|
||||
|
||||
if (max_mtt_depth_intra != 0) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (LCU_WIDTH > 32)
|
||||
WRITE_U(stream, (TR_MAX_LOG2_SIZE - 5) ? 1 : 0, 1, "sps_max_luma_transform_size_64_flag");
|
||||
|
|
|
@ -129,7 +129,7 @@ typedef int16_t coeff_t;
|
|||
typedef int32_t mv_t;
|
||||
|
||||
//#define VERBOSE 1
|
||||
//#define UVG_DEBUG_PRINT_CABAC 1
|
||||
#define UVG_DEBUG_PRINT_CABAC 1
|
||||
//#define UVG_DEBUG 1
|
||||
|
||||
//#define UVG_DEBUG_PRINT_YUVIEW_CSV 1
|
||||
|
|
|
@ -1552,7 +1552,7 @@ void uvg_intra_predict(
|
|||
}
|
||||
}
|
||||
else {
|
||||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width);
|
||||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, height, stride / 2, width);
|
||||
if (!PU_IS_TU(&data->pred_cu) || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
|
||||
predict_cclm(
|
||||
state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst,
|
||||
|
@ -1560,7 +1560,7 @@ void uvg_intra_predict(
|
|||
tree_type);
|
||||
}
|
||||
else {
|
||||
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width);
|
||||
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -447,7 +447,7 @@ double uvg_get_coeff_cost(
|
|||
} else {
|
||||
double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu);
|
||||
if (save_cccs) {
|
||||
save_ccc(state->qp, coeff, width * width, ccc);
|
||||
save_ccc(state->qp, coeff, width * height, ccc);
|
||||
}
|
||||
return ccc;
|
||||
}
|
||||
|
@ -1474,10 +1474,13 @@ void uvg_rdoq(
|
|||
// Hope the compiler is able to utilize this information.
|
||||
switch (cg_num) {
|
||||
case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break;
|
||||
case 2: FILL_ARRAY(sig_coeffgroup_flag, 0, 2); break;
|
||||
case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break;
|
||||
case 8: FILL_ARRAY(sig_coeffgroup_flag, 0, 8); break;
|
||||
case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break;
|
||||
case 32: FILL_ARRAY(sig_coeffgroup_flag, 0, 32); break;
|
||||
case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break;
|
||||
default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups");
|
||||
default: assert(0 && "There should be 1, 2, 4, 8, 16, 32 or 64 coefficient groups");
|
||||
}
|
||||
|
||||
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[color ? 2 : 0]);
|
||||
|
|
51
src/search.c
51
src/search.c
|
@ -253,15 +253,16 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
|
|||
}
|
||||
|
||||
|
||||
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu)
|
||||
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu)
|
||||
{
|
||||
const uint32_t mask = ~((MIN(width, TR_MAX_WIDTH))-1);
|
||||
const uint32_t x_mask = ~((MIN(width, TR_MAX_WIDTH))-1);
|
||||
const uint32_t y_mask = ~((MIN(height, TR_MAX_WIDTH))-1);
|
||||
|
||||
// Set coeff flags in every CU covered by part_mode in this depth.
|
||||
for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) {
|
||||
for (uint32_t y = y_local; y < y_local + height; y += SCU_WIDTH) {
|
||||
for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
||||
// Use TU top-left CU to propagate coeff flags
|
||||
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask);
|
||||
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & x_mask, y & y_mask);
|
||||
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y);
|
||||
if (cu_from != cu_to) {
|
||||
// Chroma and luma coeff data is needed for deblocking
|
||||
|
@ -943,6 +944,7 @@ static double search_cu(
|
|||
const int x = cu_loc->x;
|
||||
const int y = cu_loc->y;
|
||||
const int luma_width = cu_loc->width;
|
||||
const int luma_height = cu_loc->height;
|
||||
assert(cu_width >= 4);
|
||||
double cost = MAX_DOUBLE;
|
||||
double inter_zero_coeff_cost = MAX_DOUBLE;
|
||||
|
@ -1009,7 +1011,7 @@ static double search_cu(
|
|||
|
||||
// If the CU is completely inside the frame at this depth, search for
|
||||
// prediction modes at this depth.
|
||||
if ( x + luma_width <= frame_width && y + luma_width <= frame_height)
|
||||
if ( x + luma_width <= frame_width && y + luma_height <= frame_height)
|
||||
{
|
||||
int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max;
|
||||
bool can_use_inter =
|
||||
|
@ -1022,7 +1024,7 @@ static double search_cu(
|
|||
// otherwise forbid it.
|
||||
(x & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_width ||
|
||||
(y & ~(cu_width_inter_min - 1)) + cu_width_inter_min > frame_height
|
||||
);
|
||||
) && cu_loc->width == cu_loc->height; // Don't allow non square inter CUs for now
|
||||
|
||||
if (can_use_inter) {
|
||||
double mode_cost;
|
||||
|
@ -1179,7 +1181,7 @@ static double search_cu(
|
|||
if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
|
||||
recon_chroma = false;
|
||||
}
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
uvg_intra_recon_cu(state,
|
||||
&intra_search, cu_loc,
|
||||
NULL, lcu,
|
||||
|
@ -1226,7 +1228,7 @@ static double search_cu(
|
|||
if(cbf_cr) cbf_set(&split_cu->cbf, COLOR_V);
|
||||
split_cu->joint_cb_cr = jccr;
|
||||
}
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
|
||||
|
||||
} else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
|
||||
|
@ -1269,7 +1271,7 @@ static double search_cu(
|
|||
}
|
||||
}
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
|
||||
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1308,7 +1310,7 @@ static double search_cu(
|
|||
// if (cur_cu->merged) {
|
||||
// cur_cu->merged = 0;
|
||||
// cur_cu->skipped = 1;
|
||||
// lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
// lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
// }
|
||||
|
||||
// cur_cu->cbf = 0;
|
||||
|
@ -1332,9 +1334,9 @@ static double search_cu(
|
|||
|
||||
// Recursively split all the way to max search depth.
|
||||
if (can_split_cu) {
|
||||
const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << (split_tree.current_depth * 3), split_tree.current_depth + 1 };
|
||||
const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT;
|
||||
const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1 };
|
||||
|
||||
int half_cu = cu_width >> (tree_type != UVG_CHROMA_T);
|
||||
double split_cost = 0.0;
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf);
|
||||
cabac_data_t post_seach_cabac;
|
||||
|
@ -1389,19 +1391,14 @@ static double search_cu(
|
|||
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
|
||||
initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type);
|
||||
cu_loc_t new_cu_loc[4];
|
||||
uvg_get_split_locs(cu_loc, QT_SPLIT, new_cu_loc);
|
||||
if (split_cost < cost) {
|
||||
split_cost += search_cu(state, &new_cu_loc[0], &split_lcu, tree_type, new_split);
|
||||
}
|
||||
if (split_cost < cost) {
|
||||
split_cost += search_cu(state, &new_cu_loc[1], &split_lcu, tree_type, new_split);
|
||||
}
|
||||
if (split_cost < cost) {
|
||||
split_cost += search_cu(state, &new_cu_loc[2], &split_lcu, tree_type, new_split);
|
||||
}
|
||||
if (split_cost < cost) {
|
||||
split_cost += search_cu(state, &new_cu_loc[3], &split_lcu, tree_type, new_split);
|
||||
const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc);
|
||||
for (int split = 0; split < splits; ++split) {
|
||||
split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split);
|
||||
if (split_cost < cost) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
split_cost = INT_MAX;
|
||||
}
|
||||
|
@ -1442,7 +1439,7 @@ static double search_cu(
|
|||
cur_cu->lfnst_idx = 0;
|
||||
cur_cu->cr_lfnst_idx = 0;
|
||||
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
|
||||
intra_search_data_t proxy;
|
||||
FILL(proxy, 0);
|
||||
|
@ -1492,7 +1489,7 @@ static double search_cu(
|
|||
}
|
||||
// Add candidate when in inter slice or ibc is enabled
|
||||
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
|
||||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
||||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -1520,7 +1517,7 @@ static double search_cu(
|
|||
}
|
||||
// Add candidate when in inter slice or ibc is enabled
|
||||
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
|
||||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
||||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2187,8 +2187,8 @@ void uvg_cu_cost_inter_rd2(
|
|||
UVG_BOTH_T);
|
||||
ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, height, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, height, LCU_WIDTH_C, width);
|
||||
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
|
||||
|
|
|
@ -130,17 +130,31 @@ static INLINE uint8_t select_best_mode_index(const int8_t *modes, const double *
|
|||
*
|
||||
* \return
|
||||
*/
|
||||
static void get_cost_dual(encoder_state_t * const state,
|
||||
const pred_buffer preds, const uvg_pixel *orig_block,
|
||||
cost_pixel_nxn_multi_func *satd_twin_func,
|
||||
cost_pixel_nxn_multi_func *sad_twin_func,
|
||||
int width, double *costs_out)
|
||||
static void get_cost_dual(
|
||||
encoder_state_t * const state,
|
||||
const pred_buffer preds,
|
||||
const uvg_pixel *orig_block,
|
||||
cost_pixel_nxn_multi_func *satd_twin_func,
|
||||
cost_pixel_nxn_multi_func *sad_twin_func,
|
||||
int width,
|
||||
int height,
|
||||
double *costs_out)
|
||||
{
|
||||
#define PARALLEL_BLKS 2
|
||||
unsigned satd_costs[PARALLEL_BLKS] = { 0 };
|
||||
satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs);
|
||||
if (satd_twin_func != NULL) {
|
||||
satd_twin_func(preds, orig_block, PARALLEL_BLKS, satd_costs);
|
||||
} else {
|
||||
satd_costs[0] = uvg_satd_any_size(width, height, preds[0], width, orig_block, LCU_WIDTH);
|
||||
satd_costs[1] = uvg_satd_any_size(width, height, preds[1], width, orig_block, LCU_WIDTH);
|
||||
}
|
||||
unsigned unsigned_sad_costs[PARALLEL_BLKS] = { 0 };
|
||||
sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs);
|
||||
if (sad_twin_func != NULL) {
|
||||
sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs);
|
||||
} else {
|
||||
unsigned_sad_costs[0] = uvg_reg_sad(preds[0], orig_block, width, height, width, LCU_WIDTH);
|
||||
unsigned_sad_costs[1] = uvg_reg_sad(preds[1], orig_block, width, height, width, LCU_WIDTH);
|
||||
}
|
||||
costs_out[0] = (double)MIN(satd_costs[0], unsigned_sad_costs[0] * 2);
|
||||
costs_out[1] = (double)MIN(satd_costs[1], unsigned_sad_costs[1] * 2);
|
||||
|
||||
|
@ -651,7 +665,7 @@ static int search_intra_chroma_rough(
|
|||
uvg_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
|
||||
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
||||
|
||||
uvg_pixels_blit(orig_u, orig_block, width, width, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(orig_u, orig_block, width, height, LCU_WIDTH_C, width);
|
||||
int modes_count = (state->encoder_control->cfg.cclm ? 8 : 5);
|
||||
for (int i = 0; i < modes_count; ++i) {
|
||||
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
|
||||
|
@ -671,7 +685,7 @@ static int search_intra_chroma_rough(
|
|||
}
|
||||
}
|
||||
|
||||
uvg_pixels_blit(orig_v, orig_block, width, width, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(orig_v, orig_block, width, height, LCU_WIDTH_C, width);
|
||||
for (int i = 0; i < modes_count; ++i) {
|
||||
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
|
||||
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
|
||||
|
@ -764,7 +778,7 @@ static int16_t search_intra_rough(
|
|||
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
||||
|
||||
// Store original block for SAD computation
|
||||
uvg_pixels_blit(orig, orig_block, width, width, origstride, width);
|
||||
uvg_pixels_blit(orig, orig_block, width, height, origstride, width);
|
||||
|
||||
int8_t modes_selected = 0;
|
||||
// Note: get_cost and get_cost_dual may return negative costs.
|
||||
|
@ -783,7 +797,7 @@ static int16_t search_intra_rough(
|
|||
|
||||
// Calculate SAD for evenly spaced modes to select the starting point for
|
||||
// the recursive search.
|
||||
cu_loc_t loc = { 0, 0, width, width, width, width };
|
||||
cu_loc_t loc = { 0, 0, width, height, width, height };
|
||||
intra_search_data_t search_proxy;
|
||||
FILL(search_proxy, 0);
|
||||
search_proxy.pred_cu = *pred_cu;
|
||||
|
@ -963,19 +977,19 @@ static uint8_t search_intra_rough(
|
|||
uvg_pixel *orig,
|
||||
int32_t origstride,
|
||||
uvg_intra_references *refs,
|
||||
int log2_width,
|
||||
int width,
|
||||
int height,
|
||||
int8_t *intra_preds,
|
||||
intra_search_data_t* modes_out,
|
||||
cu_info_t* const pred_cu,
|
||||
uint8_t mip_ctx)
|
||||
{
|
||||
#define PARALLEL_BLKS 2 // TODO: use 4 for AVX-512 in the future?
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
int_fast8_t width = 1 << log2_width;
|
||||
assert(width >= 4 && width <= 32);
|
||||
// cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
|
||||
// cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
|
||||
cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width);
|
||||
cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width);
|
||||
cost_pixel_nxn_multi_func *satd_dual_func = uvg_pixels_get_satd_dual_func(width, height);
|
||||
cost_pixel_nxn_multi_func *sad_dual_func = uvg_pixels_get_sad_dual_func(width, height);
|
||||
bool mode_checked[UVG_NUM_INTRA_MODES] = {0};
|
||||
double costs[UVG_NUM_INTRA_MODES];
|
||||
|
||||
|
@ -990,7 +1004,7 @@ static uint8_t search_intra_rough(
|
|||
uvg_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
||||
|
||||
// Store original block for SAD computation
|
||||
uvg_pixels_blit(orig, orig_block, width, width, origstride, width);
|
||||
uvg_pixels_blit(orig, orig_block, width, height, origstride, width);
|
||||
|
||||
int8_t modes_selected = 0;
|
||||
// Note: get_cost and get_cost_dual may return negative costs.
|
||||
|
@ -1016,17 +1030,16 @@ static uint8_t search_intra_rough(
|
|||
|
||||
// Calculate SAD for evenly spaced modes to select the starting point for
|
||||
// the recursive search.
|
||||
cu_loc_t loc = { 0, 0, width, width, width, width };
|
||||
intra_search_data_t search_proxy;
|
||||
FILL(search_proxy, 0);
|
||||
search_proxy.pred_cu = *pred_cu;
|
||||
|
||||
int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels;
|
||||
search_proxy.pred_cu.intra.mode = 0;
|
||||
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
|
||||
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
|
||||
search_proxy.pred_cu.intra.mode = 1;
|
||||
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs);
|
||||
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs);
|
||||
mode_checked[0] = true;
|
||||
mode_checked[1] = true;
|
||||
costs[0] += count_bits(
|
||||
|
@ -1075,12 +1088,12 @@ static uint8_t search_intra_rough(
|
|||
for (int i = 0; i < PARALLEL_BLKS; ++i) {
|
||||
if (mode + i * offset <= 66) {
|
||||
search_proxy.pred_cu.intra.mode = mode + i*offset;
|
||||
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
|
||||
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: add generic version of get cost multi
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
|
||||
for (int i = 0; i < PARALLEL_BLKS; ++i) {
|
||||
if (mode + i * offset <= 66) {
|
||||
costs_out[i] += count_bits(
|
||||
|
@ -1147,12 +1160,12 @@ static uint8_t search_intra_rough(
|
|||
|
||||
for (int block = 0; block < PARALLEL_BLKS; ++block) {
|
||||
search_proxy.pred_cu.intra.mode = modes_to_check[block + i];
|
||||
uvg_intra_predict(state, refs, &loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
|
||||
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
|
||||
|
||||
}
|
||||
|
||||
//TODO: add generic version of get cost multi
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
|
||||
for (int block = 0; block < PARALLEL_BLKS; ++block) {
|
||||
costs_out[block] += count_bits(
|
||||
state,
|
||||
|
@ -1219,12 +1232,9 @@ static void get_rough_cost_for_2n_modes(
|
|||
const int height = cu_loc->height;
|
||||
cost_pixel_nxn_multi_func* satd_dual_func;
|
||||
cost_pixel_nxn_multi_func* sad_dual_func;
|
||||
if (width == height) {
|
||||
satd_dual_func = uvg_pixels_get_satd_dual_func(width);
|
||||
sad_dual_func = uvg_pixels_get_sad_dual_func(width);
|
||||
} else {
|
||||
assert(false && "Joose promised to fix this.");
|
||||
}
|
||||
satd_dual_func = uvg_pixels_get_satd_dual_func(width, height);
|
||||
sad_dual_func = uvg_pixels_get_sad_dual_func(width, height);
|
||||
|
||||
|
||||
uvg_pixel _preds[PARALLEL_BLKS * MIN(LCU_WIDTH, 64)* MIN(LCU_WIDTH, 64)+ SIMD_ALIGNMENT];
|
||||
pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT);
|
||||
|
@ -1232,7 +1242,7 @@ static void get_rough_cost_for_2n_modes(
|
|||
uvg_pixel _orig_block[MIN(LCU_WIDTH, 64) * MIN(LCU_WIDTH, 64) + SIMD_ALIGNMENT];
|
||||
uvg_pixel* orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
||||
|
||||
uvg_pixels_blit(orig, orig_block, width, width, orig_stride, width);
|
||||
uvg_pixels_blit(orig, orig_block, width, height, orig_stride, width);
|
||||
|
||||
const double mrl = state->encoder_control->cfg.mrl && (cu_loc->y % LCU_WIDTH) ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 1) : 0;
|
||||
const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0;
|
||||
|
@ -1243,7 +1253,7 @@ static void get_rough_cost_for_2n_modes(
|
|||
for (int i = 0; i < PARALLEL_BLKS; ++i) {
|
||||
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T);
|
||||
}
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
|
||||
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
|
||||
|
||||
for(int i = 0; i < PARALLEL_BLKS; ++i) {
|
||||
uint8_t multi_ref_idx = search_data[mode + i].pred_cu.intra.multi_ref_idx;
|
||||
|
@ -1796,16 +1806,17 @@ void uvg_search_cu_intra(
|
|||
bool skip_rough_search = (is_large || state->encoder_control->cfg.rdo >= 4);
|
||||
if (!skip_rough_search) {
|
||||
num_regular_modes = number_of_modes = search_intra_rough(
|
||||
state,
|
||||
cu_loc,
|
||||
ref_pixels,
|
||||
LCU_WIDTH,
|
||||
refs,
|
||||
log2_width,
|
||||
candidate_modes,
|
||||
search_data,
|
||||
&temp_pred_cu,
|
||||
mip_ctx);
|
||||
state,
|
||||
cu_loc,
|
||||
ref_pixels,
|
||||
LCU_WIDTH,
|
||||
refs,
|
||||
cu_loc->width,
|
||||
cu_loc->height,
|
||||
candidate_modes,
|
||||
search_data,
|
||||
&temp_pred_cu,
|
||||
mip_ctx);
|
||||
// if(lines == 1) sort_modes(search_data, number_of_modes);
|
||||
|
||||
} else {
|
||||
|
|
|
@ -720,7 +720,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
}
|
||||
|
||||
// Check if there are any non-zero coefficients.
|
||||
for (int i = 0; i < width * width; i += 8) {
|
||||
for (int i = 0; i < width * height; i += 8) {
|
||||
__m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i]));
|
||||
has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff);
|
||||
if(has_coeffs) break;
|
||||
|
@ -730,7 +730,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
// rec_out.
|
||||
if (has_coeffs && !early_skip) {
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, color,
|
||||
uvg_dequant(state, coeff_out, coeff, width, height, color,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
|
||||
|
||||
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {
|
||||
|
|
|
@ -315,22 +315,22 @@ int uvg_quant_cbcr_residual_generic(
|
|||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && false) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
|
||||
scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx);
|
||||
}
|
||||
|
||||
int8_t has_coeffs = 0;
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width * width; ++i) {
|
||||
for (i = 0; i < width * height; ++i) {
|
||||
if (coeff_out[i] != 0) {
|
||||
has_coeffs = 1;
|
||||
break;
|
||||
|
@ -341,10 +341,10 @@ int uvg_quant_cbcr_residual_generic(
|
|||
if (has_coeffs && !early_skip) {
|
||||
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
if (cur_cu->cr_lfnst_idx) {
|
||||
uvg_inv_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
|
||||
uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
|
||||
}
|
||||
|
||||
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
|
|
@ -115,103 +115,116 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
|||
/**
|
||||
* \brief Get a function that calculates SATD for NxN block.
|
||||
*
|
||||
* \param n Width of the region for which SATD is calculated.
|
||||
* \param width Width of the region for which SATD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_16bit_nxn_func.
|
||||
*/
|
||||
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n)
|
||||
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
case 4:
|
||||
return uvg_satd_4x4;
|
||||
case 8:
|
||||
return uvg_satd_8x8;
|
||||
case 16:
|
||||
return uvg_satd_16x16;
|
||||
case 32:
|
||||
return uvg_satd_32x32;
|
||||
case 64:
|
||||
return uvg_satd_64x64;
|
||||
default:
|
||||
return NULL;
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_satd_4x4;
|
||||
case 8:
|
||||
return uvg_satd_8x8;
|
||||
case 16:
|
||||
return uvg_satd_16x16;
|
||||
case 32:
|
||||
return uvg_satd_32x32;
|
||||
case 64:
|
||||
return uvg_satd_64x64;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Get a function that calculates SAD for NxN block.
|
||||
*
|
||||
* \param n Width of the region for which SAD is calculated.
|
||||
* \param width Width of the region for which SAD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_16bit_nxn_func.
|
||||
*/
|
||||
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n)
|
||||
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
case 4:
|
||||
return uvg_sad_4x4;
|
||||
case 8:
|
||||
return uvg_sad_8x8;
|
||||
case 16:
|
||||
return uvg_sad_16x16;
|
||||
case 32:
|
||||
return uvg_sad_32x32;
|
||||
case 64:
|
||||
return uvg_sad_64x64;
|
||||
default:
|
||||
return NULL;
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_sad_4x4;
|
||||
case 8:
|
||||
return uvg_sad_8x8;
|
||||
case 16:
|
||||
return uvg_sad_16x16;
|
||||
case 32:
|
||||
return uvg_sad_32x32;
|
||||
case 64:
|
||||
return uvg_sad_64x64;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get a function that calculates SATDs for 2 NxN blocks.
|
||||
*
|
||||
* \param n Width of the region for which SATD is calculated.
|
||||
* \param width Width of the region for which SATD is calculated.
|
||||
* \param height Height of the region for which SATD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_pixel_nxn_multi_func.
|
||||
*/
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n)
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
case 4:
|
||||
return uvg_satd_4x4_dual;
|
||||
case 8:
|
||||
return uvg_satd_8x8_dual;
|
||||
case 16:
|
||||
return uvg_satd_16x16_dual;
|
||||
case 32:
|
||||
return uvg_satd_32x32_dual;
|
||||
case 64:
|
||||
return uvg_satd_64x64_dual;
|
||||
default:
|
||||
return NULL;
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_satd_4x4_dual;
|
||||
case 8:
|
||||
return uvg_satd_8x8_dual;
|
||||
case 16:
|
||||
return uvg_satd_16x16_dual;
|
||||
case 32:
|
||||
return uvg_satd_32x32_dual;
|
||||
case 64:
|
||||
return uvg_satd_64x64_dual;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Get a function that calculates SADs for 2 NxN blocks.
|
||||
*
|
||||
* \param n Width of the region for which SAD is calculated.
|
||||
* \param width Width of the region for which SAD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_pixel_nxn_multi_func.
|
||||
*/
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
case 4:
|
||||
return uvg_sad_4x4_dual;
|
||||
case 8:
|
||||
return uvg_sad_8x8_dual;
|
||||
case 16:
|
||||
return uvg_sad_16x16_dual;
|
||||
case 32:
|
||||
return uvg_sad_32x32_dual;
|
||||
case 64:
|
||||
return uvg_sad_64x64_dual;
|
||||
default:
|
||||
return NULL;
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_sad_4x4_dual;
|
||||
case 8:
|
||||
return uvg_sad_8x8_dual;
|
||||
case 16:
|
||||
return uvg_sad_16x16_dual;
|
||||
case 32:
|
||||
return uvg_sad_32x32_dual;
|
||||
case 64:
|
||||
return uvg_sad_64x64_dual;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Precomputed CRC32C lookup table for polynomial 0x04C11DB7
|
||||
|
|
|
@ -203,8 +203,8 @@ extern pixel_var_func *uvg_pixel_var;
|
|||
extern generate_residual_func* uvg_generate_residual;
|
||||
|
||||
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height);
|
||||
|
||||
#define STRATEGIES_PICTURE_EXPORTS \
|
||||
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
|
||||
|
|
|
@ -584,7 +584,7 @@ void uvg_chroma_transform_search(
|
|||
if (is_jccr && !u_has_coeffs) continue;
|
||||
|
||||
if (u_has_coeffs) {
|
||||
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
pred_cu->type, transforms[i] == CHROMA_TS);
|
||||
|
||||
if (transforms[i] != CHROMA_TS) {
|
||||
|
|
|
@ -543,9 +543,18 @@ typedef struct uvg_config
|
|||
|
||||
uint8_t dual_tree;
|
||||
|
||||
uint8_t min_qt_size[3];
|
||||
uint8_t max_bt_size[3];
|
||||
uint8_t max_tt_size[3];
|
||||
|
||||
uint8_t max_intra_slice_btt_depth;
|
||||
uint8_t max_intra_slice_btt_depth_chroma;
|
||||
uint8_t max_inter_slice_btt_depth;
|
||||
|
||||
uint8_t intra_rough_search_levels;
|
||||
|
||||
uint8_t ibc; /* \brief Intra Block Copy parameter */
|
||||
|
||||
} uvg_config;
|
||||
|
||||
/**
|
||||
|
|
|
@ -51,7 +51,7 @@ static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h,
|
|||
uint32_t *buff_v,
|
||||
int32_t width, int32_t height)
|
||||
{
|
||||
uint32_t num_scan_pos = width * width;
|
||||
uint32_t num_scan_pos = width * height;
|
||||
uint32_t next_scan_pos = 0;
|
||||
int32_t xx, yy, x, y;
|
||||
uint32_t scan_line;
|
||||
|
|
Loading…
Reference in a new issue