From 80945c985a6072fbcf0aa01be49d3f7538408454 Mon Sep 17 00:00:00 2001 From: siivonek Date: Tue, 20 Sep 2022 12:35:16 +0300 Subject: [PATCH] [isp] Fix cabac issues. There are always four transform blocks even if there are only two ISP splits. Fix prediction issues. PDPC filter was applied when it should be disabled. Fix reference building issues. Left reference was built incorrectly for blocks with height 2. --- src/encode_coding_tree.c | 4 +- src/intra.c | 74 ++++++++++++++++++-------- src/intra.h | 6 +-- src/search.c | 19 ++++--- src/strategies/avx2/intra-avx2.c | 2 +- src/strategies/generic/intra-generic.c | 2 +- src/transform.c | 4 +- 7 files changed, 74 insertions(+), 37 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index e0886bec..a8ba706c 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1717,13 +1717,13 @@ void uvg_encode_coding_tree( // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = cur_cu->intra.isp_mode; - int split_limit = split_type == ISP_MODE_NO_ISP ? 1 : uvg_get_isp_split_num(cu_width, cu_height, split_type); + int split_limit = split_type == ISP_MODE_NO_ISP ? 1 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); luma_cbf_ctx = split_limit != 1 ? 2 : 0; // If all first three splits have luma cbf 0, the last one must be one. Since the value ca be derived, no need to write it bool can_skip_last_cbf = true; for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x, y, cu_width, cu_height, i, split_type); + uvg_get_isp_split_loc(&split_loc, x, y, cu_width, cu_height, i, split_type, true); // Check if last split to write chroma bool last_split = (i + 1) == split_limit; diff --git a/src/intra.c b/src/intra.c index 2239eeac..e6ccb77a 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1077,6 +1077,10 @@ void uvg_intra_build_reference_any( } else { px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4]; + // This table does not have values for dimensions less than 4 + if (lcu_px.y % 4 != 0) { + px_available_left -= 2; + } } } else { @@ -1085,7 +1089,6 @@ void uvg_intra_build_reference_any( // Limit the number of available pixels based on block size and dimensions // of the picture. - // TODO: height for non-square blocks px_available_left = MIN(px_available_left, cu_height * 2 + multi_ref_index); px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma); @@ -1364,6 +1367,10 @@ void uvg_intra_build_reference_inner( } else { px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4]; + // This table does not have values for dimensions less than 4 + if (lcu_px.y % 4 != 0) { + px_available_left -= 2; + } } } @@ -1378,13 +1385,24 @@ void uvg_intra_build_reference_inner( // Copy pixels from coded CUs. int i = multi_ref_index; // Offset by multi_ref_index - do { - out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; - out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; - out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride]; - out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride]; - i += 4; - } while (i < px_available_left); + + // Do different loop for heights smaller than 4 (possible for some ISP splits) + if (lcu_px.y % 4 != 0) { + do { + out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; + out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; + i += 2; + } while (i < px_available_left); + } + else { + do { + out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; + out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; + out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride]; + out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride]; + i += 4; + } while (i < px_available_left); + } // Extend the last pixel for the rest of the reference values. uvg_pixel nearest_pixel = out_left_ref[i]; @@ -1556,7 +1574,7 @@ const cu_info_t* uvg_get_co_located_luma_cu( * \param height Block height. * \param split_type Horizontal or vertical split. */ -int uvg_get_isp_split_dim(const int width, const int height, const int split_type) +int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_split) { assert(split_type != ISP_MODE_NO_ISP && "Cannot calculate split dimension if no split type is set. Make sure this function is not called in this case."); @@ -1576,35 +1594,45 @@ int uvg_get_isp_split_dim(const int width, const int height, const int split_typ const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1; partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift); + // Minimum width for ISP splits are 4. (JVET-T2001 chapter 8.4.5.1 equation 246: nPbW = Max(4, nW)) + // Except this does not apply for transform blocks for some reason. VTM does seem to expect 4 transform blocks even if only two pred blocks were used + // Height can be 2. + if (!divide_in_rows && !is_transform_split) { + partition_size = MAX(4, partition_size); + } + assert((uvg_math_floor_log2(partition_size) + uvg_math_floor_log2(non_split_dim_size) >= uvg_math_floor_log2(min_num_samples)) && "Partition has less than allowed minimum number of samples."); return partition_size; } -int uvg_get_isp_split_num(const int width, const int height, const int split_type) +int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_split) { assert((split_type != ISP_MODE_NO_ISP) && "This function cannot be called if ISP mode is 0."); - int split_dim = uvg_get_isp_split_dim(width, height, split_type); + int split_dim = uvg_get_isp_split_dim(width, height, split_type, is_transform_split); int num = split_type == ISP_MODE_HOR ? height / split_dim : width / split_dim; return num; } -void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type) +void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_split) { assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3]."); assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP."); int part_dim = block_w; if (split_type != ISP_MODE_NO_ISP) { - part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type); + part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type, is_transform_split); + } + if(split_type == ISP_MODE_VER && block_w < 16 && !is_transform_split) { + split_idx /= 2; } const int offset = part_dim * split_idx; const int part_x = split_type == ISP_MODE_HOR ? x : x + offset; const int part_y = split_type == ISP_MODE_HOR ? y + offset : y; - const int part_w = split_type == ISP_MODE_HOR ? block_w : part_dim; + const int part_w = split_type == ISP_MODE_HOR ? block_w : part_dim; const int part_h = split_type == ISP_MODE_HOR ? part_dim : block_h; uvg_cu_loc_ctor(loc, part_x, part_y, part_w, part_h); @@ -1773,17 +1801,21 @@ void uvg_intra_recon_cu( // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type); + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); cu_loc_t origin_cu; uvg_cu_loc_ctor(&origin_cu, x, y, width, height); for (int i = 0; i < split_limit; ++i) { - cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x, y, width, height, i, split_type); + cu_loc_t tu_loc; + uvg_get_isp_split_loc(&tu_loc, x, y, width, height, i, split_type, true); + cu_loc_t pu_loc; + uvg_get_isp_split_loc(&pu_loc, x, y, width, height, i, split_type, false); - intra_recon_tb_leaf(state, &split_loc, &origin_cu, lcu, COLOR_Y, search_data, tree_type); + if(tu_loc.x % 4 == 0) { + intra_recon_tb_leaf(state, &pu_loc, &origin_cu, lcu, COLOR_Y, search_data, tree_type); + } uvg_quantize_lcu_residual(state, true, false, false, - &split_loc, depth, cur_cu, lcu, + &tu_loc, depth, cur_cu, lcu, false, tree_type); search_data->best_isp_cbfs |= cbf_is_set(cur_cu->cbf, depth, COLOR_Y) << i; } @@ -1855,8 +1887,8 @@ bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp return false; } - const int tu_width = (isp_split_type == ISP_MODE_HOR) ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER); - const int tu_height = (isp_split_type == ISP_MODE_HOR) ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR) : height; + const int tu_width = (isp_split_type == ISP_MODE_HOR) ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER, true); + const int tu_height = (isp_split_type == ISP_MODE_HOR) ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR, true) : height; if (!(tu_width >= TR_MIN_WIDTH && tu_height >= TR_MIN_WIDTH)) { diff --git a/src/intra.h b/src/intra.h index f324c4fa..ac076327 100644 --- a/src/intra.h +++ b/src/intra.h @@ -171,8 +171,8 @@ int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* l #define SPLIT_TYPE_HOR 1 #define SPLIT_TYPE_VER 2 -int uvg_get_isp_split_dim(const int width, const int height, const int split_type); -int uvg_get_isp_split_num(const int width, const int height, const int split_type); -void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type); +int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_block); +int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_block); +void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_block); bool uvg_can_use_isp(const int width, const int height, const int max_tr_size); bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp_mode, const enum uvg_tree_type tree_type); diff --git a/src/search.c b/src/search.c index 9e22cdcf..9e90d5b1 100644 --- a/src/search.c +++ b/src/search.c @@ -369,7 +369,8 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, } else { // TODO: 8x4 CUs - for (int i = 0; i < 4; i++) { + const int split_limit = uvg_get_isp_split_num(width, height, pred_cu->intra.isp_mode, true); + for (int i = 0; i < split_limit; i++) { int luma_ctx = 2; if (i != 3 && isp_cbf != 0x8) { const int flag = (isp_cbf >> i) & 1; @@ -399,11 +400,11 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, } else { int split_type = pred_cu->intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type); + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type); + uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true); const int part_x = split_loc.x; const int part_y = split_loc.y; @@ -602,7 +603,8 @@ static double cu_rd_cost_tr_split_accurate( } else { // TODO: 8x4 CUs - for (int i = 0; i < 4; i++) { + const int split_limit = uvg_get_isp_split_num(width, height, pred_cu->intra.isp_mode, true); + for (int i = 0; i < split_limit; i++) { int luma_ctx = 2; if (i != 3 && isp_cbf != 0x8) { const int flag = (isp_cbf >> i) & 1; @@ -646,11 +648,11 @@ static double cu_rd_cost_tr_split_accurate( } else { int split_type = pred_cu->intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type); + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type); + uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true); const int part_x = split_loc.x; const int part_y = split_loc.y; @@ -1131,14 +1133,14 @@ static double search_cu( // Set isp split cbfs here const int split_type = intra_search.pred_cu.intra.isp_mode; - const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type); + const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); const int cbf_cb = cbf_is_set(cur_cu->cbf, depth, COLOR_U); const int cbf_cr = cbf_is_set(cur_cu->cbf, depth, COLOR_V); const int jccr = cur_cu->joint_cb_cr; for (int i = 0; i < split_num; ++i) { cu_loc_t isp_loc; - uvg_get_isp_split_loc(&isp_loc, x, y, cu_width, cu_height, i, split_type); + uvg_get_isp_split_loc(&isp_loc, x, y, cu_width, cu_height, i, split_type, true); // Fetching from CU array does not work for dimensions less than 4 // Fetch proper x, y coords for isp blocks int tmp_x = isp_loc.x; @@ -1146,6 +1148,7 @@ static double search_cu( uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y); cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH); bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1; + // ISP_TODO: here, cbfs are also set for chroma for all ISP splits, is this behavior wanted? cbf_clear(&split_cu->cbf, depth, COLOR_Y); cbf_clear(&split_cu->cbf, depth, COLOR_U); cbf_clear(&split_cu->cbf, depth, COLOR_V); diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c index 63ff924d..f94b3a72 100644 --- a/src/strategies/avx2/intra-avx2.c +++ b/src/strategies/avx2/intra-avx2.c @@ -357,7 +357,7 @@ static void uvg_angular_pred_avx2( PDPC_filter = false; } else if (mode_disp > 0) { - PDPC_filter = (scale >= 0); + PDPC_filter &= (scale >= 0); } } if(PDPC_filter) { diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index eff47941..98911e5f 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -308,7 +308,7 @@ static void uvg_angular_pred_generic( PDPC_filter = false; } else if (mode_disp > 0) { - PDPC_filter = (scale >= 0); + PDPC_filter &= (scale >= 0); } } if(PDPC_filter) { diff --git a/src/transform.c b/src/transform.c index 526e71c2..ee9e79ec 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1297,6 +1297,7 @@ static void quantize_tr_residual( } + // ISP_TODO: does this cu point to correct cbf when ISP is used for small blocks? cbf_clear(&cur_pu->cbf, depth, color); if (has_coeffs) { for (int j = 0; j < tr_height; ++j) { @@ -1353,7 +1354,7 @@ void uvg_quantize_lcu_residual( // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. - // Width 2 is possible with ISP blocks + // Width 2 is possible with ISP blocks // ISP_TODO: no, they actually are not assert(width == 2 || width == 4 || width == 8 || @@ -1363,6 +1364,7 @@ void uvg_quantize_lcu_residual( // Reset CBFs because CBFs might have been set // for depth earlier + // ISP_TODO: does this cur_cu point to the correct place when ISP is used for small blocks? if (luma) { cbf_clear(&cur_pu->cbf, depth, COLOR_Y); }