[mtt] WIP 16x16 TT split

This commit is contained in:
Joose Sainio 2022-11-24 15:04:57 +02:00 committed by Marko Viitanen
parent 43a710e104
commit 7b117f171f
7 changed files with 34 additions and 24 deletions

View file

@ -452,8 +452,8 @@ static void encode_chroma_tu(
{ {
int width_c = cu_loc->chroma_width; int width_c = cu_loc->chroma_width;
int height_c = cu_loc->chroma_height; int height_c = cu_loc->chroma_height;
int x_local = ((cu_loc->x >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; int x_local = (cu_loc->x >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C;
int y_local = ((cu_loc->y >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; int y_local = (cu_loc->y >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C;
cabac_data_t* const cabac = &state->cabac; cabac_data_t* const cabac = &state->cabac;
*scan_idx = SCAN_DIAG; *scan_idx = SCAN_DIAG;
if(!joint_chroma){ if(!joint_chroma){
@ -1668,6 +1668,7 @@ double uvg_mock_encode_coding_unit(
encoder_state_t* const state, encoder_state_t* const state,
cabac_data_t* cabac, cabac_data_t* cabac,
const cu_loc_t* const cu_loc, const cu_loc_t* const cu_loc,
const cu_loc_t* const chroma_loc,
lcu_t* lcu, lcu_t* lcu,
cu_info_t* cur_cu, cu_info_t* cur_cu,
enum uvg_tree_type tree_type, enum uvg_tree_type tree_type,
@ -1780,7 +1781,7 @@ double uvg_mock_encode_coding_unit(
if(tree_type != UVG_CHROMA_T) { if(tree_type != UVG_CHROMA_T) {
uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, lcu, &bits); uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, lcu, &bits);
} }
if((cur_cu->log2_height + cur_cu->log2_width >= 6 || (x % 8 != 0 && y % 8 != 0) || tree_type == UVG_CHROMA_T) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { if((chroma_loc || tree_type == UVG_CHROMA_T) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, &bits); encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, &bits);
} }
} }

View file

@ -77,6 +77,7 @@ double uvg_mock_encode_coding_unit(
encoder_state_t* const state, encoder_state_t* const state,
cabac_data_t* cabac, cabac_data_t* cabac,
const cu_loc_t* const cu_loc, const cu_loc_t* const cu_loc,
const cu_loc_t* const chroma_loc,
lcu_t* lcu, lcu_t* lcu,
cu_info_t* cur_cu, cu_info_t* cur_cu,
enum uvg_tree_type tree_type, enum uvg_tree_type tree_type,

View file

@ -1121,6 +1121,7 @@ void uvg_intra_build_reference_any(
else { else {
const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true);
px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus *2; px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus *2;
px_available_left -= px.x % 4;
} }
// Limit the number of available pixels based on block size and dimensions // Limit the number of available pixels based on block size and dimensions
@ -1440,7 +1441,7 @@ void uvg_intra_build_reference_inner(
int i = multi_ref_index; // Offset by multi_ref_index int i = multi_ref_index; // Offset by multi_ref_index
// Do different loop for heights smaller than 4 (possible for some ISP splits) // Do different loop for heights smaller than 4 (possible for some ISP splits)
if (lcu_px.y % 4 != 0) { if (px.y % 4 != 0) {
do { do {
out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride];
out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride];

View file

@ -188,8 +188,8 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to
if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
//const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T)); //const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
const int chroma_x = (cu_loc->x >> 1) & ~3; const int chroma_x = (cu_loc->x >> 1);
const int chroma_y = (cu_loc->y >> 1) & ~3; const int chroma_y = (cu_loc->y >> 1);
const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C); const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C);
copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
@ -209,11 +209,16 @@ static void work_tree_copy_up(
bool joint, bool joint,
enum enum
uvg_tree_type tree_type, uvg_tree_type tree_type,
const cu_loc_t* const cu_loc) const cu_loc_t* const cu_loc,
const cu_loc_t* const chroma_loc)
{ {
copy_cu_info (from, to, cu_loc, tree_type); copy_cu_info (from, to, cu_loc, tree_type);
copy_cu_pixels(from, to, cu_loc, tree_type); copy_cu_pixels(from, to, cu_loc, cu_loc != chroma_loc && tree_type == UVG_LUMA_T ? UVG_LUMA_T : tree_type);
copy_cu_coeffs(cu_loc, from, to, joint, tree_type); copy_cu_coeffs(cu_loc, from, to, joint, cu_loc != chroma_loc && tree_type == UVG_LUMA_T ? UVG_LUMA_T : tree_type);
if (cu_loc != chroma_loc && tree_type == UVG_LUMA_T) {
copy_cu_pixels(from, to, chroma_loc, UVG_CHROMA_T);
copy_cu_coeffs(chroma_loc, from, to, joint, UVG_CHROMA_T);
}
} }
@ -482,7 +487,7 @@ double uvg_cu_rd_cost_chroma(
lcu_t *const lcu, lcu_t *const lcu,
const cu_loc_t * const cu_loc) const cu_loc_t * const cu_loc)
{ {
const vector2d_t lcu_px = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; const vector2d_t lcu_px = { (cu_loc->local_x) / 2, (cu_loc->local_y) / 2 };
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
@ -787,7 +792,7 @@ static double cu_rd_cost_tr_split_accurate(
if (chroma_can_use_tr_skip) { if (chroma_can_use_tr_skip) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
} }
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
} }
} }
@ -949,6 +954,7 @@ static double search_cu(
const int y = cu_loc->y; const int y = cu_loc->y;
const int luma_width = cu_loc->width; const int luma_width = cu_loc->width;
const int luma_height = cu_loc->height; const int luma_height = cu_loc->height;
const bool is_separate_tree = chroma_loc == NULL || cu_loc->height != chroma_loc->height || cu_loc->width != chroma_loc->width;
assert(cu_width >= 4); assert(cu_width >= 4);
double cost = MAX_DOUBLE; double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE; double inter_zero_coeff_cost = MAX_DOUBLE;
@ -1181,7 +1187,7 @@ static double search_cu(
bool recon_chroma = true; bool recon_chroma = true;
bool recon_luma = tree_type != UVG_CHROMA_T; bool recon_luma = tree_type != UVG_CHROMA_T;
if ((cur_cu->log2_height + cur_cu->log2_width < 6) || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { if (is_separate_tree || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
recon_chroma = false; recon_chroma = false;
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
@ -1192,7 +1198,7 @@ static double search_cu(
recon_luma, recon_chroma); recon_luma, recon_chroma);
if((cur_cu->log2_height + cur_cu->log2_width < 6 && has_chroma && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 ) if((is_separate_tree && has_chroma && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 )
|| tree_type == UVG_CHROMA_T) { || tree_type == UVG_CHROMA_T) {
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
uvg_intra_recon_cu(state, uvg_intra_recon_cu(state,
@ -1295,6 +1301,7 @@ static double search_cu(
state, state,
cabac, cabac,
tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc,
is_separate_tree && !has_chroma ? NULL : chroma_loc,
lcu, lcu,
cur_cu, cur_cu,
tree_type, tree_type,
@ -1338,7 +1345,7 @@ static double search_cu(
// Recursively split all the way to max search depth. // Recursively split all the way to max search depth.
if (can_split_cu) { if (can_split_cu) {
const int split_type = depth == 2 ? BT_HOR_SPLIT : QT_SPLIT; const int split_type = depth == 2 ? TT_VER_SPLIT : QT_SPLIT;
const split_tree_t new_split = { const split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1, split_tree.current_depth + 1,
@ -1479,7 +1486,7 @@ static double search_cu(
if (split_cost < cost) { if (split_cost < cost) {
// Copy split modes to this depth. // Copy split modes to this depth.
cost = split_cost; cost = split_cost;
work_tree_copy_up(&split_lcu, lcu, state->encoder_control->cfg.jccr, tree_type, cu_loc); work_tree_copy_up(&split_lcu, lcu, state->encoder_control->cfg.jccr, tree_type, cu_loc, is_separate_tree && !has_chroma ? NULL : chroma_loc);
#if UVG_DEBUG #if UVG_DEBUG
//debug_split = 1; //debug_split = 1;
#endif #endif

View file

@ -2172,10 +2172,10 @@ void uvg_cu_cost_inter_rd2(
const split_tree_t splitt_tree = { cur_cu->split_tree, depth, mtt_depth }; const split_tree_t splitt_tree = { cur_cu->split_tree, depth, mtt_depth };
if (cur_cu->merged) { if (cur_cu->merged) {
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost; no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost;
bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree);
} }
else { else {
no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree);
bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1); bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1);
} }
double no_cbf_cost = ssd + no_cbf_bits * state->lambda; double no_cbf_cost = ssd + no_cbf_bits * state->lambda;

View file

@ -630,7 +630,7 @@ static int search_intra_chroma_rough(
{ {
const int_fast8_t log2_width_c = uvg_g_convert_to_log2[cu_loc->chroma_width]; const int_fast8_t log2_width_c = uvg_g_convert_to_log2[cu_loc->chroma_width];
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 }; const vector2d_t luma_px = { cu_loc->x, cu_loc->y};
const int width = 1 << log2_width_c; const int width = 1 << log2_width_c;
const int height = width; // TODO: height for non-square blocks const int height = width; // TODO: height for non-square blocks
@ -642,7 +642,7 @@ static int search_intra_chroma_rough(
uvg_intra_references refs_v; uvg_intra_references refs_v;
uvg_intra_build_reference(state, &loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0); uvg_intra_build_reference(state, &loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0);
vector2d_t lcu_cpx = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; vector2d_t lcu_cpx = { (cu_loc->local_x) / 2, (cu_loc->local_y) / 2 };
uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
uvg_pixel* orig_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; uvg_pixel* orig_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
@ -1429,7 +1429,7 @@ int8_t uvg_search_intra_chroma_rdo(
const int chroma_width = cu_loc->chroma_width; const int chroma_width = cu_loc->chroma_width;
const int chroma_height = cu_loc->chroma_height; const int chroma_height = cu_loc->chroma_height;
uvg_intra_references refs[2]; uvg_intra_references refs[2];
const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 }; const vector2d_t luma_px = { cu_loc->x, cu_loc->y };
const vector2d_t pic_px = { const vector2d_t pic_px = {
state->tile->frame->width, state->tile->frame->width,
state->tile->frame->height, state->tile->frame->height,
@ -1444,7 +1444,7 @@ int8_t uvg_search_intra_chroma_rdo(
cabac_data_t temp_cabac; cabac_data_t temp_cabac;
memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t)); memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t));
const int offset = ((cu_loc->local_x & ~7) >> 1) + ((cu_loc->local_y & ~7) >> 1)* LCU_WIDTH_C; const int offset = ((cu_loc->local_x) >> 1) + ((cu_loc->local_y) >> 1)* LCU_WIDTH_C;
int lfnst_modes_to_check[3]; int lfnst_modes_to_check[3];
if((is_separate || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) { if((is_separate || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) {

View file

@ -131,7 +131,7 @@ static void uvg_angular_pred_generic(
const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -((int32_t)pred_mode - 18); const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -((int32_t)pred_mode - 18);
// Sample displacement per column in fractions of 32. // Sample displacement per column in fractions of 32.
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; const int16_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
const int side_size = vertical_mode ? log2_height : log2_width; const int side_size = vertical_mode ? log2_height : log2_width;
int scale = MIN(2, side_size - pre_scale[abs(mode_disp)]); int scale = MIN(2, side_size - pre_scale[abs(mode_disp)]);
@ -248,7 +248,7 @@ static void uvg_angular_pred_generic(
// PDPC // PDPC
bool PDPC_filter = ((width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) || channel_type != 0) && multi_ref_index == 0; bool PDPC_filter = (width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) && multi_ref_index == 0;
if (pred_mode > 1 && pred_mode < 67) { if (pred_mode > 1 && pred_mode < 67) {
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL. if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
PDPC_filter = false; PDPC_filter = false;
@ -274,7 +274,7 @@ static void uvg_angular_pred_generic(
// Do not apply PDPC if multi ref line index is other than 0 // Do not apply PDPC if multi ref line index is other than 0
// TODO: do not do PDPC if block is in BDPCM mode // TODO: do not do PDPC if block is in BDPCM mode
bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/); bool do_pdpc = ((width >= 4 && height >= 4) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/);
if (do_pdpc) { if (do_pdpc) {
int scale = (log2_width + log2_height - 2) >> 2; int scale = (log2_width + log2_height - 2) >> 2;