From b893a9268c094383486e7b279e482fb7704a39e8 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Fri, 18 Nov 2022 14:00:01 +0200 Subject: [PATCH] [mtt] WIP --- src/cu.c | 21 +++++++++++++ src/cu.h | 3 ++ src/encode_coding_tree.c | 18 +++++++++-- src/intra.c | 23 +++++++++----- src/intra.h | 1 + src/search.c | 2 +- src/search_intra.c | 22 ++++---------- src/strategies/generic/intra-generic.c | 42 +++++++++++++++++--------- 8 files changed, 90 insertions(+), 42 deletions(-) diff --git a/src/cu.c b/src/cu.c index 0256bd3d..8998dafd 100644 --- a/src/cu.c +++ b/src/cu.c @@ -357,4 +357,25 @@ int uvg_get_split_locs( return 3; } return 0; +} + +int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left) +{ + if ((left && cu_loc->x == 0) || (!left && cu_loc->y == 0)) { + return 0; + } + if (left && cu_loc->local_x == 0) return (LCU_CU_WIDTH - cu_loc->local_y) / 4; + if (!left && cu_loc->local_y == 0) return (LCU_CU_WIDTH - cu_loc->local_x) / 4; + + int amount = 0; + if(left) { + while (LCU_GET_CU_AT_PX(lcu, cu_loc->local_x - TR_MIN_WIDTH, cu_loc->local_y + amount * TR_MIN_WIDTH)->type != CU_NOTSET) { + amount++; + } + return amount; + } + while (LCU_GET_CU_AT_PX(lcu, cu_loc->local_x + amount * TR_MIN_WIDTH, cu_loc->local_y - TR_MIN_WIDTH)->type != CU_NOTSET) { + amount++; + } + return amount; } \ No newline at end of file diff --git a/src/cu.h b/src/cu.h index cc2f6925..6440f6f2 100644 --- a/src/cu.h +++ b/src/cu.h @@ -191,6 +191,7 @@ int uvg_get_split_locs( enum split_type split, cu_loc_t out[4]); + #define CU_GET_MV_CAND(cu_info_ptr, reflist) \ (((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1) @@ -370,6 +371,8 @@ typedef struct { void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type tree_type); +int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left); + /** * \brief Return pointer to the top right reference CU. */ diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index ac8d206e..bacaf38b 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1263,7 +1263,7 @@ uint8_t uvg_write_split_flag( bool allow_split = allow_qt | bh_split | bv_split | th_split | tv_split; - int split_flag = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7; + enum split_type split_flag = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7; split_flag = implicit_split_mode != UVG_NO_SPLIT ? implicit_split_mode : split_flag; @@ -1298,7 +1298,19 @@ uint8_t uvg_write_split_flag( if (implicit_split_mode == UVG_NO_SPLIT && allow_qt && (bh_split || bv_split || th_split || tv_split) && split_flag != NO_SPLIT) { bool qt_split = split_flag == QT_SPLIT; if((bv_split || bh_split || tv_split || th_split) && allow_qt) { - split_model = (left_cu && GET_SPLITDATA(left_cu, split_tree.current_depth)) + (above_cu && GET_SPLITDATA(above_cu, split_tree.current_depth)) + (split_tree.current_depth < 2 ? 0 : 3); + unsigned left_qt_depth = 0; + unsigned top_qt_depth = 0; + if(left_cu) { + while (((left_cu->split_tree >> left_qt_depth) & 7u) == QT_SPLIT) { + left_qt_depth++; + } + } + if(above_cu) { + while (((above_cu->split_tree >> top_qt_depth) & 7u) == QT_SPLIT) { + top_qt_depth++; + } + } + split_model = (left_cu && (left_qt_depth > split_tree.current_depth)) + (above_cu && (top_qt_depth > split_tree.current_depth)) + (split_tree.current_depth < 2 ? 0 : 3); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_split_flag_model[split_model]), qt_split, bits, "qt_split_flag"); } if (!qt_split) { @@ -1319,7 +1331,7 @@ uint8_t uvg_write_split_flag( CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_vertical_model[split_model]), is_vertical, bits, "mtt_vertical_flag"); } if ((bv_split && tv_split && is_vertical) || (bh_split && th_split && !is_vertical)) { - split_model = 2 * is_vertical + split_tree.mtt_depth <= 1; + split_model = (2 * is_vertical) + (split_tree.mtt_depth <= 1); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mtt_binary_model[split_model]), split_flag == BT_VER_SPLIT || split_flag == BT_HOR_SPLIT, bits, "mtt_binary_flag"); } diff --git a/src/intra.c b/src/intra.c index d6f9ad3f..aacd238f 100644 --- a/src/intra.c +++ b/src/intra.c @@ -985,6 +985,7 @@ static void intra_predict_regular( void uvg_intra_build_reference_any( + const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, @@ -1019,6 +1020,7 @@ void uvg_intra_build_reference_any( const uvg_pixel dc_val = 1 << (UVG_BIT_DEPTH - 1); //TODO: add used bitdepth as a variable const int is_chroma = color != COLOR_Y ? 1 : 0; + const int is_dual_tree = is_chroma && state->encoder_control->cfg.dual_tree && state->frame->is_irap; // Get multi ref index from CU under prediction or reconstrcution. Do not use MRL if not luma const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0; @@ -1091,7 +1093,8 @@ void uvg_intra_build_reference_any( } } else { - px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma; + const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); + px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus *2; } // Limit the number of available pixels based on block size and dimensions @@ -1212,7 +1215,8 @@ void uvg_intra_build_reference_any( } } else { - px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma; + const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, false); + px_available_top = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } // Limit the number of available pixels based on block size and dimensions @@ -1245,6 +1249,7 @@ void uvg_intra_build_reference_any( } void uvg_intra_build_reference_inner( + const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, @@ -1280,6 +1285,7 @@ void uvg_intra_build_reference_inner( uvg_pixel * __restrict out_top_ref = &refs->ref.top[0]; const int is_chroma = color != COLOR_Y ? 1 : 0; + const int is_dual_tree = is_chroma && state->encoder_control->cfg.dual_tree && state->frame->is_irap; // Get multiRefIdx from CU under prediction. Do not use MRL if not luma const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0; @@ -1395,7 +1401,8 @@ void uvg_intra_build_reference_inner( } else { - px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma; + const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); + px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } // Limit the number of available pixels based on block size and dimensions @@ -1456,7 +1463,8 @@ void uvg_intra_build_reference_inner( } } else { - px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma; + const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); + px_available_top = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } // Limit the number of available pixels based on block size and dimensions @@ -1488,6 +1496,7 @@ void uvg_intra_build_reference_inner( void uvg_intra_build_reference( + const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, @@ -1507,9 +1516,9 @@ void uvg_intra_build_reference( // Much logic can be discarded if not on the edge if (luma_px->x > 0 && luma_px->y > 0) { - uvg_intra_build_reference_inner(pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines, isp_mode); + uvg_intra_build_reference_inner(state, pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines, isp_mode); } else { - uvg_intra_build_reference_any(pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines, isp_mode); + uvg_intra_build_reference_any(state, pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines, isp_mode); } } @@ -1721,7 +1730,7 @@ static void intra_recon_tb_leaf( } } - uvg_intra_build_reference(pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode); + uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode); uvg_pixel pred[32 * 32]; uvg_intra_predict(state, &refs, pu_loc, color, pred, search_data, lcu, tree_type); diff --git a/src/intra.h b/src/intra.h index deeb173d..7ef5357b 100644 --- a/src/intra.h +++ b/src/intra.h @@ -108,6 +108,7 @@ int8_t uvg_intra_get_dir_luma_predictor( * \param multi_ref_idx Multi reference line index for the prediction block. */ void uvg_intra_build_reference( + const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, diff --git a/src/search.c b/src/search.c index 25a2ea1c..090f5f16 100644 --- a/src/search.c +++ b/src/search.c @@ -1335,7 +1335,7 @@ static double search_cu( // Recursively split all the way to max search depth. if (can_split_cu) { - const int split_type = depth == 0 ? QT_SPLIT : BT_HOR_SPLIT; + const int split_type = depth == 0 ? QT_SPLIT : BT_VER_SPLIT; const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1, diff --git a/src/search_intra.c b/src/search_intra.c index d08b9d64..792bc1fc 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -294,14 +294,6 @@ static double search_intra_trdepth( const bool reconstruct_chroma = false;// (depth != 4 || (depth == 4 && (x_px & 4 && y_px & 4))) && state->encoder_control->chroma_format != UVG_CSP_400; cu_info_t* pred_cu = &search_data->pred_cu; - cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); - - struct { - uvg_pixel y[TR_MAX_WIDTH*TR_MAX_WIDTH]; - uvg_pixel u[TR_MAX_WIDTH*TR_MAX_WIDTH]; - uvg_pixel v[TR_MAX_WIDTH*TR_MAX_WIDTH]; - } nosplit_pixels; - uint16_t nosplit_cbf = 0; double split_cost = INT32_MAX; double nosplit_cost = INT32_MAX; @@ -574,8 +566,6 @@ static double search_intra_trdepth( if (nosplit_cost >= cost_treshold) { return nosplit_cost; } - - nosplit_cbf = pred_cu->cbf; } @@ -648,10 +638,10 @@ static int search_intra_chroma_rough( const cu_loc_t loc = { luma_px.x, luma_px.y, width, height, width, height }; uvg_intra_references refs_u; - uvg_intra_build_reference(&loc, &loc, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(state, &loc, &loc, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp, NULL, 0, 0); uvg_intra_references refs_v; - uvg_intra_build_reference(&loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(state, &loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0); vector2d_t lcu_cpx = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; @@ -1447,8 +1437,8 @@ int8_t uvg_search_intra_chroma_rdo( if (reconstruct_chroma) { - uvg_intra_build_reference(cu_loc, cu_loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0); - uvg_intra_build_reference(cu_loc, cu_loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0); const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y }; cabac_data_t temp_cabac; @@ -1783,7 +1773,7 @@ void uvg_search_cu_intra( bool is_large = cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH; if (!is_large) { - uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0); } // This is needed for bit cost calculation and requires too many parameters to be @@ -1848,7 +1838,7 @@ void uvg_search_cu_intra( frame->rec->stride, 1); } } - uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0); + uvg_intra_build_reference(state, cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0); for(int i = 1; i < INTRA_MPM_COUNT; i++) { num_mrl_modes++; const int index = (i - 1) + (INTRA_MPM_COUNT -1)*(line-1) + number_of_modes; diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index f07fa020..6e712bf5 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -113,6 +113,9 @@ static void uvg_angular_pred_generic( // Temporary buffer for modes 11-25. // It only needs to be big enough to hold indices from -width to width-1. + uvg_pixel temp_dst[TR_MAX_WIDTH * TR_MAX_WIDTH]; + + // TODO: check the correct size for these arrays when MRL is used //uvg_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 }; uvg_pixel temp_above[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 }; @@ -138,6 +141,7 @@ static void uvg_angular_pred_generic( uvg_pixel *ref_main; // Pointer for the other reference. const uvg_pixel *ref_side; + uvg_pixel* work = width == height || vertical_mode ? dst : temp_dst; const int cu_dim = MAX(width, height); const int top_ref_length = isp_mode ? width + cu_dim : width << 1; @@ -184,6 +188,7 @@ static void uvg_angular_pred_generic( // compensate for line offset in reference line buffers ref_main += multi_ref_index; ref_side += multi_ref_index; + if (!vertical_mode) { SWAP(width, height, int) } if (sample_disp != 0) { // The mode is not horizontal or vertical, we have to do interpolation. @@ -221,7 +226,7 @@ static void uvg_angular_pred_generic( p[2] = ref_main[ref_main_index + 2]; p[3] = ref_main[ref_main_index + 3]; - dst_buf[y * tmp_width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6); + work[y * width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6); } } @@ -231,14 +236,14 @@ static void uvg_angular_pred_generic( for (int_fast32_t x = 0; x < tmp_width; ++x) { uvg_pixel ref1 = ref_main[x + delta_int + 1]; uvg_pixel ref2 = ref_main[x + delta_int + 2]; - dst_buf[y * tmp_width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5); + work[y * width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5); } } } else { // Just copy the integer samples - for (int_fast32_t x = 0; x < tmp_width; x++) { - dst_buf[y * tmp_width + x] = ref_main[x + delta_int + 1]; + for (int_fast32_t x = 0; x < width; x++) { + work[y * width + x] = ref_main[x + delta_int + 1]; } } @@ -260,7 +265,7 @@ static void uvg_angular_pred_generic( int wL = 32 >> (2 * x >> scale); const uvg_pixel left = ref_side[y + (inv_angle_sum >> 9) + 1]; - dst_buf[y * tmp_width + x] = dst_buf[y * tmp_width + x] + ((wL * (left - dst_buf[y * tmp_width + x]) + 32) >> 6); + work[y * width + x] = work[y * width + x] + ((wL * (left - work[y * width + x]) + 32) >> 6); } } } @@ -273,30 +278,37 @@ static void uvg_angular_pred_generic( bool do_pdpc = (((tmp_width >= 4 && tmp_height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/); if (do_pdpc) { - if (!vertical_mode) {SWAP(width, height, int)} int scale = (log2_width + log2_height - 2) >> 2; const uvg_pixel top_left = ref_main[0]; - for (int_fast32_t y = 0; y < tmp_height; ++y) { - memcpy(&dst_buf[y * tmp_width], &ref_main[1], tmp_width * sizeof(uvg_pixel)); + for (int_fast32_t y = 0; y < height; ++y) { + memcpy(&work[y * width], &ref_main[1], width * sizeof(uvg_pixel)); const uvg_pixel left = ref_side[1 + y]; for (int_fast32_t x = 0; x < MIN(3 << scale, tmp_width); ++x) { const int wL = 32 >> (2 * x >> scale); - const uvg_pixel val = dst_buf[y * tmp_width + x]; - dst_buf[y * tmp_width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6)); + const uvg_pixel val = work[y * width + x]; + work[y * width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6)); } } } else { - for (int_fast32_t y = 0; y < tmp_height; ++y) { - memcpy(&dst_buf[y * tmp_width], &ref_main[1], tmp_width * sizeof(uvg_pixel)); + for (int_fast32_t y = 0; y < height; ++y) { + memcpy(&work[y * width], &ref_main[1], width * sizeof(uvg_pixel)); } } } // Flip the block if this is was a horizontal mode. if (!vertical_mode) { - for (int_fast32_t y = 0; y < height - 1; ++y) { - for (int_fast32_t x = y + 1; x < width; ++x) { - SWAP(dst[y * height + x], dst[x * width + y], uvg_pixel); + if(width == height) { + for (int_fast32_t y = 0; y < height - 1; ++y) { + for (int_fast32_t x = y + 1; x < width; ++x) { + SWAP(work[y * height + x], work[x * width + y], uvg_pixel); + } + } + } else { + for(int y = 0; y < width; ++y) { + for(int x = 0; x < height; ++x) { + dst[x + y * height] = work[y + x * width]; + } } } }