[mtt] WIP

This commit is contained in:
Joose Sainio 2022-11-18 14:00:01 +02:00 committed by Marko Viitanen
parent 5ba8d45981
commit b893a9268c
8 changed files with 90 additions and 42 deletions

View file

@ -357,4 +357,25 @@ int uvg_get_split_locs(
return 3;
}
return 0;
}
int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left)
{
if ((left && cu_loc->x == 0) || (!left && cu_loc->y == 0)) {
return 0;
}
if (left && cu_loc->local_x == 0) return (LCU_CU_WIDTH - cu_loc->local_y) / 4;
if (!left && cu_loc->local_y == 0) return (LCU_CU_WIDTH - cu_loc->local_x) / 4;
int amount = 0;
if(left) {
while (LCU_GET_CU_AT_PX(lcu, cu_loc->local_x - TR_MIN_WIDTH, cu_loc->local_y + amount * TR_MIN_WIDTH)->type != CU_NOTSET) {
amount++;
}
return amount;
}
while (LCU_GET_CU_AT_PX(lcu, cu_loc->local_x + amount * TR_MIN_WIDTH, cu_loc->local_y - TR_MIN_WIDTH)->type != CU_NOTSET) {
amount++;
}
return amount;
}

View file

@ -191,6 +191,7 @@ int uvg_get_split_locs(
enum split_type split,
cu_loc_t out[4]);
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)
@ -370,6 +371,8 @@ typedef struct {
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type
tree_type);
int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left);
/**
* \brief Return pointer to the top right reference CU.
*/

View file

@ -1263,7 +1263,7 @@ uint8_t uvg_write_split_flag(
bool allow_split = allow_qt | bh_split | bv_split | th_split | tv_split;
int split_flag = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7;
enum split_type split_flag = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7;
split_flag = implicit_split_mode != UVG_NO_SPLIT ? implicit_split_mode : split_flag;
@ -1298,7 +1298,19 @@ uint8_t uvg_write_split_flag(
if (implicit_split_mode == UVG_NO_SPLIT && allow_qt && (bh_split || bv_split || th_split || tv_split) && split_flag != NO_SPLIT) {
bool qt_split = split_flag == QT_SPLIT;
if((bv_split || bh_split || tv_split || th_split) && allow_qt) {
split_model = (left_cu && GET_SPLITDATA(left_cu, split_tree.current_depth)) + (above_cu && GET_SPLITDATA(above_cu, split_tree.current_depth)) + (split_tree.current_depth < 2 ? 0 : 3);
unsigned left_qt_depth = 0;
unsigned top_qt_depth = 0;
if(left_cu) {
while (((left_cu->split_tree >> left_qt_depth) & 7u) == QT_SPLIT) {
left_qt_depth++;
}
}
if(above_cu) {
while (((above_cu->split_tree >> top_qt_depth) & 7u) == QT_SPLIT) {
top_qt_depth++;
}
}
split_model = (left_cu && (left_qt_depth > split_tree.current_depth)) + (above_cu && (top_qt_depth > split_tree.current_depth)) + (split_tree.current_depth < 2 ? 0 : 3);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "qt_split_flag");
}
if (!qt_split) {
@ -1319,7 +1331,7 @@ uint8_t uvg_write_split_flag(
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_vertical_model[split_model]), is_vertical, bits, "mtt_vertical_flag");
}
if ((bv_split && tv_split && is_vertical) || (bh_split && th_split && !is_vertical)) {
split_model = 2 * is_vertical + split_tree.mtt_depth <= 1;
split_model = (2 * is_vertical) + (split_tree.mtt_depth <= 1);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_binary_model[split_model]),
split_flag == BT_VER_SPLIT || split_flag == BT_HOR_SPLIT, bits, "mtt_binary_flag");
}

View file

@ -985,6 +985,7 @@ static void intra_predict_regular(
void uvg_intra_build_reference_any(
const encoder_state_t* const state,
const cu_loc_t* const pu_loc,
const cu_loc_t* const cu_loc,
const color_t color,
@ -1019,6 +1020,7 @@ void uvg_intra_build_reference_any(
const uvg_pixel dc_val = 1 << (UVG_BIT_DEPTH - 1); //TODO: add used bitdepth as a variable
const int is_chroma = color != COLOR_Y ? 1 : 0;
const int is_dual_tree = is_chroma && state->encoder_control->cfg.dual_tree && state->frame->is_irap;
// Get multi ref index from CU under prediction or reconstrcution. Do not use MRL if not luma
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
@ -1091,7 +1093,8 @@ void uvg_intra_build_reference_any(
}
}
else {
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true);
px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus *2;
}
// Limit the number of available pixels based on block size and dimensions
@ -1212,7 +1215,8 @@ void uvg_intra_build_reference_any(
}
}
else {
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, false);
px_available_top = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2;
}
// Limit the number of available pixels based on block size and dimensions
@ -1245,6 +1249,7 @@ void uvg_intra_build_reference_any(
}
void uvg_intra_build_reference_inner(
const encoder_state_t* const state,
const cu_loc_t* const pu_loc,
const cu_loc_t* const cu_loc,
const color_t color,
@ -1280,6 +1285,7 @@ void uvg_intra_build_reference_inner(
uvg_pixel * __restrict out_top_ref = &refs->ref.top[0];
const int is_chroma = color != COLOR_Y ? 1 : 0;
const int is_dual_tree = is_chroma && state->encoder_control->cfg.dual_tree && state->frame->is_irap;
// Get multiRefIdx from CU under prediction. Do not use MRL if not luma
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
@ -1395,7 +1401,8 @@ void uvg_intra_build_reference_inner(
}
else {
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true);
px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2;
}
// Limit the number of available pixels based on block size and dimensions
@ -1456,7 +1463,8 @@ void uvg_intra_build_reference_inner(
}
}
else {
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true);
px_available_top = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2;
}
// Limit the number of available pixels based on block size and dimensions
@ -1488,6 +1496,7 @@ void uvg_intra_build_reference_inner(
void uvg_intra_build_reference(
const encoder_state_t* const state,
const cu_loc_t* const pu_loc,
const cu_loc_t* const cu_loc,
const color_t color,
@ -1507,9 +1516,9 @@ void uvg_intra_build_reference(
// Much logic can be discarded if not on the edge
if (luma_px->x > 0 && luma_px->y > 0) {
uvg_intra_build_reference_inner(pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines, isp_mode);
uvg_intra_build_reference_inner(state, pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines, isp_mode);
} else {
uvg_intra_build_reference_any(pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines, isp_mode);
uvg_intra_build_reference_any(state, pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines, isp_mode);
}
}
@ -1721,7 +1730,7 @@ static void intra_recon_tb_leaf(
}
}
uvg_intra_build_reference(pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode);
uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode);
uvg_pixel pred[32 * 32];
uvg_intra_predict(state, &refs, pu_loc, color, pred, search_data, lcu, tree_type);

View file

@ -108,6 +108,7 @@ int8_t uvg_intra_get_dir_luma_predictor(
* \param multi_ref_idx Multi reference line index for the prediction block.
*/
void uvg_intra_build_reference(
const encoder_state_t* const state,
const cu_loc_t* const pu_loc,
const cu_loc_t* const cu_loc,
const color_t color,

View file

@ -1335,7 +1335,7 @@ static double search_cu(
// Recursively split all the way to max search depth.
if (can_split_cu) {
const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT;
const int split_type = depth == 0 ? QT_SPLIT : BT_VER_SPLIT;
const split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1,

View file

@ -294,14 +294,6 @@ static double search_intra_trdepth(
const bool reconstruct_chroma = false;// (depth != 4 || (depth == 4 && (x_px & 4 && y_px & 4))) && state->encoder_control->chroma_format != UVG_CSP_400;
cu_info_t* pred_cu = &search_data->pred_cu;
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
struct {
uvg_pixel y[TR_MAX_WIDTH*TR_MAX_WIDTH];
uvg_pixel u[TR_MAX_WIDTH*TR_MAX_WIDTH];
uvg_pixel v[TR_MAX_WIDTH*TR_MAX_WIDTH];
} nosplit_pixels;
uint16_t nosplit_cbf = 0;
double split_cost = INT32_MAX;
double nosplit_cost = INT32_MAX;
@ -574,8 +566,6 @@ static double search_intra_trdepth(
if (nosplit_cost >= cost_treshold) {
return nosplit_cost;
}
nosplit_cbf = pred_cu->cbf;
}
@ -648,10 +638,10 @@ static int search_intra_chroma_rough(
const cu_loc_t loc = { luma_px.x, luma_px.y, width, height, width, height };
uvg_intra_references refs_u;
uvg_intra_build_reference(&loc, &loc, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_build_reference(state, &loc, &loc, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_references refs_v;
uvg_intra_build_reference(&loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_build_reference(state, &loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0);
vector2d_t lcu_cpx = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 };
uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
@ -1447,8 +1437,8 @@ int8_t uvg_search_intra_chroma_rdo(
if (reconstruct_chroma) {
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0);
const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y };
cabac_data_t temp_cabac;
@ -1783,7 +1773,7 @@ void uvg_search_cu_intra(
bool is_large = cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH;
if (!is_large) {
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0);
uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0);
}
// This is needed for bit cost calculation and requires too many parameters to be
@ -1848,7 +1838,7 @@ void uvg_search_cu_intra(
frame->rec->stride, 1);
}
}
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0);
uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0);
for(int i = 1; i < INTRA_MPM_COUNT; i++) {
num_mrl_modes++;
const int index = (i - 1) + (INTRA_MPM_COUNT -1)*(line-1) + number_of_modes;

View file

@ -113,6 +113,9 @@ static void uvg_angular_pred_generic(
// Temporary buffer for modes 11-25.
// It only needs to be big enough to hold indices from -width to width-1.
uvg_pixel temp_dst[TR_MAX_WIDTH * TR_MAX_WIDTH];
// TODO: check the correct size for these arrays when MRL is used
//uvg_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
uvg_pixel temp_above[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
@ -138,6 +141,7 @@ static void uvg_angular_pred_generic(
uvg_pixel *ref_main;
// Pointer for the other reference.
const uvg_pixel *ref_side;
uvg_pixel* work = width == height || vertical_mode ? dst : temp_dst;
const int cu_dim = MAX(width, height);
const int top_ref_length = isp_mode ? width + cu_dim : width << 1;
@ -184,6 +188,7 @@ static void uvg_angular_pred_generic(
// compensate for line offset in reference line buffers
ref_main += multi_ref_index;
ref_side += multi_ref_index;
if (!vertical_mode) { SWAP(width, height, int) }
if (sample_disp != 0) {
// The mode is not horizontal or vertical, we have to do interpolation.
@ -221,7 +226,7 @@ static void uvg_angular_pred_generic(
p[2] = ref_main[ref_main_index + 2];
p[3] = ref_main[ref_main_index + 3];
dst_buf[y * tmp_width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6);
work[y * width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6);
}
}
@ -231,14 +236,14 @@ static void uvg_angular_pred_generic(
for (int_fast32_t x = 0; x < tmp_width; ++x) {
uvg_pixel ref1 = ref_main[x + delta_int + 1];
uvg_pixel ref2 = ref_main[x + delta_int + 2];
dst_buf[y * tmp_width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5);
work[y * width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5);
}
}
}
else {
// Just copy the integer samples
for (int_fast32_t x = 0; x < tmp_width; x++) {
dst_buf[y * tmp_width + x] = ref_main[x + delta_int + 1];
for (int_fast32_t x = 0; x < width; x++) {
work[y * width + x] = ref_main[x + delta_int + 1];
}
}
@ -260,7 +265,7 @@ static void uvg_angular_pred_generic(
int wL = 32 >> (2 * x >> scale);
const uvg_pixel left = ref_side[y + (inv_angle_sum >> 9) + 1];
dst_buf[y * tmp_width + x] = dst_buf[y * tmp_width + x] + ((wL * (left - dst_buf[y * tmp_width + x]) + 32) >> 6);
work[y * width + x] = work[y * width + x] + ((wL * (left - work[y * width + x]) + 32) >> 6);
}
}
}
@ -273,30 +278,37 @@ static void uvg_angular_pred_generic(
bool do_pdpc = (((tmp_width >= 4 && tmp_height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/);
if (do_pdpc) {
if (!vertical_mode) {SWAP(width, height, int)}
int scale = (log2_width + log2_height - 2) >> 2;
const uvg_pixel top_left = ref_main[0];
for (int_fast32_t y = 0; y < tmp_height; ++y) {
memcpy(&dst_buf[y * tmp_width], &ref_main[1], tmp_width * sizeof(uvg_pixel));
for (int_fast32_t y = 0; y < height; ++y) {
memcpy(&work[y * width], &ref_main[1], width * sizeof(uvg_pixel));
const uvg_pixel left = ref_side[1 + y];
for (int_fast32_t x = 0; x < MIN(3 << scale, tmp_width); ++x) {
const int wL = 32 >> (2 * x >> scale);
const uvg_pixel val = dst_buf[y * tmp_width + x];
dst_buf[y * tmp_width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
const uvg_pixel val = work[y * width + x];
work[y * width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
}
}
} else {
for (int_fast32_t y = 0; y < tmp_height; ++y) {
memcpy(&dst_buf[y * tmp_width], &ref_main[1], tmp_width * sizeof(uvg_pixel));
for (int_fast32_t y = 0; y < height; ++y) {
memcpy(&work[y * width], &ref_main[1], width * sizeof(uvg_pixel));
}
}
}
// Flip the block if this is was a horizontal mode.
if (!vertical_mode) {
for (int_fast32_t y = 0; y < height - 1; ++y) {
for (int_fast32_t x = y + 1; x < width; ++x) {
SWAP(dst[y * height + x], dst[x * width + y], uvg_pixel);
if(width == height) {
for (int_fast32_t y = 0; y < height - 1; ++y) {
for (int_fast32_t x = y + 1; x < width; ++x) {
SWAP(work[y * height + x], work[x * width + y], uvg_pixel);
}
}
} else {
for(int y = 0; y < width; ++y) {
for(int x = 0; x < height; ++x) {
dst[x + y * height] = work[y + x * width];
}
}
}
}