diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index e0886bec..a8ba706c 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1717,13 +1717,13 @@ void uvg_encode_coding_tree( // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = cur_cu->intra.isp_mode; - int split_limit = split_type == ISP_MODE_NO_ISP ? 1 : uvg_get_isp_split_num(cu_width, cu_height, split_type); + int split_limit = split_type == ISP_MODE_NO_ISP ? 1 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); luma_cbf_ctx = split_limit != 1 ? 2 : 0; // If all first three splits have luma cbf 0, the last one must be one. Since the value ca be derived, no need to write it bool can_skip_last_cbf = true; for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x, y, cu_width, cu_height, i, split_type); + uvg_get_isp_split_loc(&split_loc, x, y, cu_width, cu_height, i, split_type, true); // Check if last split to write chroma bool last_split = (i + 1) == split_limit; diff --git a/src/intra.c b/src/intra.c index 2239eeac..e6ccb77a 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1077,6 +1077,10 @@ void uvg_intra_build_reference_any( } else { px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4]; + // This table does not have values for dimensions less than 4 + if (lcu_px.y % 4 != 0) { + px_available_left -= 2; + } } } else { @@ -1085,7 +1089,6 @@ void uvg_intra_build_reference_any( // Limit the number of available pixels based on block size and dimensions // of the picture. - // TODO: height for non-square blocks px_available_left = MIN(px_available_left, cu_height * 2 + multi_ref_index); px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma); @@ -1364,6 +1367,10 @@ void uvg_intra_build_reference_inner( } else { px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4]; + // This table does not have values for dimensions less than 4 + if (lcu_px.y % 4 != 0) { + px_available_left -= 2; + } } } @@ -1378,13 +1385,24 @@ void uvg_intra_build_reference_inner( // Copy pixels from coded CUs. int i = multi_ref_index; // Offset by multi_ref_index - do { - out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; - out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; - out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride]; - out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride]; - i += 4; - } while (i < px_available_left); + + // Do different loop for heights smaller than 4 (possible for some ISP splits) + if (lcu_px.y % 4 != 0) { + do { + out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; + out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; + i += 2; + } while (i < px_available_left); + } + else { + do { + out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; + out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; + out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride]; + out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride]; + i += 4; + } while (i < px_available_left); + } // Extend the last pixel for the rest of the reference values. uvg_pixel nearest_pixel = out_left_ref[i]; @@ -1556,7 +1574,7 @@ const cu_info_t* uvg_get_co_located_luma_cu( * \param height Block height. * \param split_type Horizontal or vertical split. */ -int uvg_get_isp_split_dim(const int width, const int height, const int split_type) +int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_split) { assert(split_type != ISP_MODE_NO_ISP && "Cannot calculate split dimension if no split type is set. Make sure this function is not called in this case."); @@ -1576,35 +1594,45 @@ int uvg_get_isp_split_dim(const int width, const int height, const int split_typ const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1; partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift); + // Minimum width for ISP splits are 4. (JVET-T2001 chapter 8.4.5.1 equation 246: nPbW = Max(4, nW)) + // Except this does not apply for transform blocks for some reason. VTM does seem to expect 4 transform blocks even if only two pred blocks were used + // Height can be 2. + if (!divide_in_rows && !is_transform_split) { + partition_size = MAX(4, partition_size); + } + assert((uvg_math_floor_log2(partition_size) + uvg_math_floor_log2(non_split_dim_size) >= uvg_math_floor_log2(min_num_samples)) && "Partition has less than allowed minimum number of samples."); return partition_size; } -int uvg_get_isp_split_num(const int width, const int height, const int split_type) +int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_split) { assert((split_type != ISP_MODE_NO_ISP) && "This function cannot be called if ISP mode is 0."); - int split_dim = uvg_get_isp_split_dim(width, height, split_type); + int split_dim = uvg_get_isp_split_dim(width, height, split_type, is_transform_split); int num = split_type == ISP_MODE_HOR ? height / split_dim : width / split_dim; return num; } -void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type) +void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_split) { assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3]."); assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP."); int part_dim = block_w; if (split_type != ISP_MODE_NO_ISP) { - part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type); + part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type, is_transform_split); + } + if(split_type == ISP_MODE_VER && block_w < 16 && !is_transform_split) { + split_idx /= 2; } const int offset = part_dim * split_idx; const int part_x = split_type == ISP_MODE_HOR ? x : x + offset; const int part_y = split_type == ISP_MODE_HOR ? y + offset : y; - const int part_w = split_type == ISP_MODE_HOR ? block_w : part_dim; + const int part_w = split_type == ISP_MODE_HOR ? block_w : part_dim; const int part_h = split_type == ISP_MODE_HOR ? part_dim : block_h; uvg_cu_loc_ctor(loc, part_x, part_y, part_w, part_h); @@ -1773,17 +1801,21 @@ void uvg_intra_recon_cu( // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type); + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); cu_loc_t origin_cu; uvg_cu_loc_ctor(&origin_cu, x, y, width, height); for (int i = 0; i < split_limit; ++i) { - cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x, y, width, height, i, split_type); + cu_loc_t tu_loc; + uvg_get_isp_split_loc(&tu_loc, x, y, width, height, i, split_type, true); + cu_loc_t pu_loc; + uvg_get_isp_split_loc(&pu_loc, x, y, width, height, i, split_type, false); - intra_recon_tb_leaf(state, &split_loc, &origin_cu, lcu, COLOR_Y, search_data, tree_type); + if(tu_loc.x % 4 == 0) { + intra_recon_tb_leaf(state, &pu_loc, &origin_cu, lcu, COLOR_Y, search_data, tree_type); + } uvg_quantize_lcu_residual(state, true, false, false, - &split_loc, depth, cur_cu, lcu, + &tu_loc, depth, cur_cu, lcu, false, tree_type); search_data->best_isp_cbfs |= cbf_is_set(cur_cu->cbf, depth, COLOR_Y) << i; } @@ -1855,8 +1887,8 @@ bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp return false; } - const int tu_width = (isp_split_type == ISP_MODE_HOR) ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER); - const int tu_height = (isp_split_type == ISP_MODE_HOR) ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR) : height; + const int tu_width = (isp_split_type == ISP_MODE_HOR) ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER, true); + const int tu_height = (isp_split_type == ISP_MODE_HOR) ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR, true) : height; if (!(tu_width >= TR_MIN_WIDTH && tu_height >= TR_MIN_WIDTH)) { diff --git a/src/intra.h b/src/intra.h index f324c4fa..ac076327 100644 --- a/src/intra.h +++ b/src/intra.h @@ -171,8 +171,8 @@ int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* l #define SPLIT_TYPE_HOR 1 #define SPLIT_TYPE_VER 2 -int uvg_get_isp_split_dim(const int width, const int height, const int split_type); -int uvg_get_isp_split_num(const int width, const int height, const int split_type); -void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type); +int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_block); +int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_block); +void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_block); bool uvg_can_use_isp(const int width, const int height, const int max_tr_size); bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp_mode, const enum uvg_tree_type tree_type); diff --git a/src/search.c b/src/search.c index 9e22cdcf..9e90d5b1 100644 --- a/src/search.c +++ b/src/search.c @@ -369,7 +369,8 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, } else { // TODO: 8x4 CUs - for (int i = 0; i < 4; i++) { + const int split_limit = uvg_get_isp_split_num(width, height, pred_cu->intra.isp_mode, true); + for (int i = 0; i < split_limit; i++) { int luma_ctx = 2; if (i != 3 && isp_cbf != 0x8) { const int flag = (isp_cbf >> i) & 1; @@ -399,11 +400,11 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, } else { int split_type = pred_cu->intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type); + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type); + uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true); const int part_x = split_loc.x; const int part_y = split_loc.y; @@ -602,7 +603,8 @@ static double cu_rd_cost_tr_split_accurate( } else { // TODO: 8x4 CUs - for (int i = 0; i < 4; i++) { + const int split_limit = uvg_get_isp_split_num(width, height, pred_cu->intra.isp_mode, true); + for (int i = 0; i < split_limit; i++) { int luma_ctx = 2; if (i != 3 && isp_cbf != 0x8) { const int flag = (isp_cbf >> i) & 1; @@ -646,11 +648,11 @@ static double cu_rd_cost_tr_split_accurate( } else { int split_type = pred_cu->intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type); + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type); + uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true); const int part_x = split_loc.x; const int part_y = split_loc.y; @@ -1131,14 +1133,14 @@ static double search_cu( // Set isp split cbfs here const int split_type = intra_search.pred_cu.intra.isp_mode; - const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type); + const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); const int cbf_cb = cbf_is_set(cur_cu->cbf, depth, COLOR_U); const int cbf_cr = cbf_is_set(cur_cu->cbf, depth, COLOR_V); const int jccr = cur_cu->joint_cb_cr; for (int i = 0; i < split_num; ++i) { cu_loc_t isp_loc; - uvg_get_isp_split_loc(&isp_loc, x, y, cu_width, cu_height, i, split_type); + uvg_get_isp_split_loc(&isp_loc, x, y, cu_width, cu_height, i, split_type, true); // Fetching from CU array does not work for dimensions less than 4 // Fetch proper x, y coords for isp blocks int tmp_x = isp_loc.x; @@ -1146,6 +1148,7 @@ static double search_cu( uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y); cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH); bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1; + // ISP_TODO: here, cbfs are also set for chroma for all ISP splits, is this behavior wanted? cbf_clear(&split_cu->cbf, depth, COLOR_Y); cbf_clear(&split_cu->cbf, depth, COLOR_U); cbf_clear(&split_cu->cbf, depth, COLOR_V); diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c index 63ff924d..f94b3a72 100644 --- a/src/strategies/avx2/intra-avx2.c +++ b/src/strategies/avx2/intra-avx2.c @@ -357,7 +357,7 @@ static void uvg_angular_pred_avx2( PDPC_filter = false; } else if (mode_disp > 0) { - PDPC_filter = (scale >= 0); + PDPC_filter &= (scale >= 0); } } if(PDPC_filter) { diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index eff47941..98911e5f 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -308,7 +308,7 @@ static void uvg_angular_pred_generic( PDPC_filter = false; } else if (mode_disp > 0) { - PDPC_filter = (scale >= 0); + PDPC_filter &= (scale >= 0); } } if(PDPC_filter) { diff --git a/src/transform.c b/src/transform.c index 526e71c2..ee9e79ec 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1297,6 +1297,7 @@ static void quantize_tr_residual( } + // ISP_TODO: does this cu point to correct cbf when ISP is used for small blocks? cbf_clear(&cur_pu->cbf, depth, color); if (has_coeffs) { for (int j = 0; j < tr_height; ++j) { @@ -1353,7 +1354,7 @@ void uvg_quantize_lcu_residual( // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. - // Width 2 is possible with ISP blocks + // Width 2 is possible with ISP blocks // ISP_TODO: no, they actually are not assert(width == 2 || width == 4 || width == 8 || @@ -1363,6 +1364,7 @@ void uvg_quantize_lcu_residual( // Reset CBFs because CBFs might have been set // for depth earlier + // ISP_TODO: does this cur_cu point to the correct place when ISP is used for small blocks? if (luma) { cbf_clear(&cur_pu->cbf, depth, COLOR_Y); }