From 2da1a34ff36a52d7c6f32df7f04fb9685a23f641 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 6 Dec 2022 11:23:30 +0200 Subject: [PATCH] [mtt] Fix isp for MTT --- src/encode_coding_tree.c | 44 ++++++++++-- src/encode_coding_tree.h | 2 +- src/global.h | 4 +- src/intra.c | 95 +++++++++++++++----------- src/intra.h | 2 +- src/search.c | 8 +-- src/search_intra.c | 16 +++-- src/strategies/avx2/intra-avx2.c | 3 +- src/strategies/generic/dct-generic.c | 21 ++++-- src/strategies/generic/intra-generic.c | 12 ++-- src/strategies/generic/quant-generic.c | 15 ++-- src/strategies/strategies-intra.h | 3 +- src/transform.c | 25 +++++-- 13 files changed, 160 insertions(+), 90 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index ab79ee92..d3f4de29 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -111,7 +111,7 @@ bool uvg_is_lfnst_allowed( const cu_info_t* const pred_cu, enum uvg_tree_type tree_type, const color_t color, - const cu_loc_t* const cu_loc) + const cu_loc_t* const cu_loc, const lcu_t* const lcu) { if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA && PU_IS_TU(pred_cu)) { const int isp_mode = pred_cu->intra.isp_mode; @@ -121,22 +121,51 @@ bool uvg_is_lfnst_allowed( bool is_sep_tree = tree_type != UVG_BOTH_T; bool mip_flag = pred_cu->type == CU_INTRA && color == COLOR_Y ? pred_cu->intra.mip_flag : false; - if ((isp_mode && !uvg_can_use_isp_with_lfnst(cu_width, cu_height, isp_mode, tree_type)) || - (pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip) || + if ((isp_mode && !uvg_can_use_isp_with_lfnst(cu_width, cu_height, isp_mode, tree_type) && color == COLOR_Y) || + (pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip && color == COLOR_Y) || (is_sep_tree && MIN(cu_width, cu_height) < 4) || (cu_width > (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) || cu_height > (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)))) { return false; } bool luma_flag = tree_type != UVG_CHROMA_T; bool chroma_flag = tree_type != UVG_LUMA_T; - bool non_zero_coeff_non_ts_corner_8x8 = (luma_flag && pred_cu->violates_lfnst_constrained_luma) || (chroma_flag && pred_cu->violates_lfnst_constrained_chroma); + bool non_zero_coeff_non_ts_corner_8x8 = false; + bool last_scan_pos = false; bool is_tr_skip = false; + int split_num = color == COLOR_Y && isp_mode ? uvg_get_isp_split_num(cu_width, cu_height, isp_mode, false) : 0; + const videoframe_t* const frame = state->tile->frame; + + if (split_num) { + // Constraints for ISP split blocks + for (int i = 0; i < split_num; ++i) { + cu_loc_t split_loc; + uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_width, cu_height, i, isp_mode, false); + int local_split_x = split_loc.x; + int local_split_y = split_loc.y; + uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y); + const cu_info_t* split_cu = lcu ? LCU_GET_CU_AT_PX(lcu, local_split_x, local_split_y) : + uvg_cu_array_at_const(frame->cu_array, local_split_x, local_split_y); + + //if (cbf_is_set(split_cu->cbf, depth, COLOR_Y)) { + // ISP_TODO: remove this if clause altogether if it seems it is not needed + if (true) { + non_zero_coeff_non_ts_corner_8x8 |= (luma_flag && split_cu->violates_lfnst_constrained_luma) || (chroma_flag && split_cu->violates_lfnst_constrained_chroma); + //last_scan_pos |= split_cu->lfnst_last_scan_pos; + last_scan_pos |= true; + } + } + } + else { + non_zero_coeff_non_ts_corner_8x8 |= (luma_flag && pred_cu->violates_lfnst_constrained_luma) || (chroma_flag && pred_cu->violates_lfnst_constrained_chroma); + last_scan_pos |= pred_cu->lfnst_last_scan_pos; + } + if (color == COLOR_Y && pred_cu->tr_idx == MTS_SKIP) { is_tr_skip = true; } - if ((!pred_cu->lfnst_last_scan_pos && !isp_mode) || non_zero_coeff_non_ts_corner_8x8 || is_tr_skip) { + if ((!last_scan_pos) || non_zero_coeff_non_ts_corner_8x8 || is_tr_skip) { return false; } return true; @@ -155,7 +184,7 @@ static bool encode_lfnst_idx( const cu_loc_t* const cu_loc) { - if (uvg_is_lfnst_allowed(state, pred_cu, tree_type, color, cu_loc)) { + if (uvg_is_lfnst_allowed(state, pred_cu, tree_type, color, cu_loc, NULL)) { // Getting separate tree bool from block size is a temporary fix until a proper dual tree check is possible (there is no dual tree structure at time of writing this). // VTM seems to force explicit dual tree structure for small 4x4 blocks bool is_separate_tree = tree_type != UVG_BOTH_T; @@ -1399,7 +1428,7 @@ void uvg_encode_coding_tree( DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, cur_cu->type-1); - // fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma); + //fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma); if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; @@ -1611,6 +1640,7 @@ void uvg_encode_coding_tree( encode_transform_coeff(state, &split_loc, 0, coeff, NULL, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, cu_loc, is_local_dual_tree ? NULL : chroma_loc); + can_skip_last_cbf &= luma_cbf_ctx == 2; } } diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 96e0cfb7..0e72369e 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -47,7 +47,7 @@ bool uvg_is_lfnst_allowed( const cu_info_t* const pred_cu, enum uvg_tree_type tree_type, const color_t color, - const cu_loc_t* const cu_loc); + const cu_loc_t* const cu_loc, const lcu_t* const lcu); void uvg_encode_coding_tree( encoder_state_t * const state, diff --git a/src/global.h b/src/global.h index c5b73c93..27058463 100644 --- a/src/global.h +++ b/src/global.h @@ -128,9 +128,9 @@ typedef int16_t coeff_t; typedef int32_t mv_t; -#define VERBOSE 1 +//#define VERBOSE 1 #define UVG_DEBUG_PRINT_CABAC 1 -#define UVG_DEBUG 1 +//#define UVG_DEBUG 1 //#define UVG_DEBUG_PRINT_YUVIEW_CSV 1 //#define UVG_DEBUG_PRINT_MV_INFO 1 diff --git a/src/intra.c b/src/intra.c index 439910f5..429254c1 100644 --- a/src/intra.c +++ b/src/intra.c @@ -300,13 +300,13 @@ bool uvg_cclm_is_allowed(const encoder_state_t* const state, const cu_loc_t * co } const cu_info_t* const luma_cu = uvg_cu_array_at_const(state->tile->frame->cu_array, luma_loc->x, luma_loc->y); uint32_t split = GET_SPLITDATA(luma_cu, 0); - if (split != QT_SPLIT && split != NO_SPLIT) { - return false; + if (split != NO_SPLIT) { + allow = split == QT_SPLIT; } - if (split != NO_SPLIT && luma_cu->intra.isp_mode != ISP_MODE_NO_ISP) { - return false; + else if (split != NO_SPLIT && luma_cu->intra.isp_mode != ISP_MODE_NO_ISP) { + allow = false; } - return true; + return allow; } @@ -943,11 +943,15 @@ static void mip_predict( } -int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height, const - bool account_for_dc_planar) +int8_t uvg_wide_angle_correction( + int_fast8_t mode, + const int log2_width, + const int log2_height, + const + bool account_for_dc_planar) { int8_t pred_mode = mode; - if (!is_isp && log2_width != log2_height) { + if (log2_width != log2_height) { if (mode > 1 && mode <= 66) { const int modeShift[] = { 0, 6, 10, 12, 14, 15 }; const int deltaSize = abs(log2_width - log2_height); @@ -965,15 +969,17 @@ int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int static void intra_predict_regular( const encoder_state_t* const state, uvg_intra_references *refs, + const cu_info_t* const cur_cu, const cu_loc_t* const cu_loc, + const cu_loc_t* const pu_loc, int_fast8_t mode, color_t color, uvg_pixel *dst, const uint8_t multi_ref_idx, const uint8_t isp_mode) { - const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; - const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; + const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; const int log2_width = uvg_g_convert_to_log2[width]; const int log2_height = uvg_g_convert_to_log2[height]; const uvg_config *cfg = &state->encoder_control->cfg; @@ -983,11 +989,12 @@ static void intra_predict_regular( uint8_t isp = color == COLOR_Y ? isp_mode : 0; // Wide angle correction - int8_t pred_mode = uvg_wide_angle_correction(mode, - isp_mode, - log2_width, - log2_height, - false); + int8_t pred_mode = uvg_wide_angle_correction( + mode, + color == COLOR_Y ? cur_cu->log2_width : log2_width, + color == COLOR_Y ? cur_cu->log2_height : log2_height, + false + ); const uvg_intra_ref *used_ref = &refs->ref; if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || isp_mode /*ISP_TODO: replace this fake ISP check*/) { @@ -1019,11 +1026,20 @@ static void intra_predict_regular( } if (mode == 0) { - uvg_intra_pred_planar(cu_loc, color, used_ref->top, used_ref->left, dst); + uvg_intra_pred_planar(pu_loc, color, used_ref->top, used_ref->left, dst); } else if (mode == 1) { - intra_pred_dc(cu_loc, color, used_ref->top, used_ref->left, dst, multi_ref_index); + intra_pred_dc(pu_loc, color, used_ref->top, used_ref->left, dst, multi_ref_index); } else { - uvg_angular_pred(cu_loc, pred_mode, color, used_ref->top, used_ref->left, dst, multi_ref_index, isp); + uvg_angular_pred( + pu_loc, + pred_mode, + color, + used_ref->top, + used_ref->left, + dst, + multi_ref_index, + isp, + isp_mode == ISP_MODE_HOR ? cu_loc->height : cu_loc->width); } // pdpc @@ -1032,7 +1048,7 @@ static void intra_predict_regular( pdpcCondition &= width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH; if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL. { - uvg_pdpc_planar_dc(mode, cu_loc, color, used_ref, dst); + uvg_pdpc_planar_dc(mode, pu_loc, color, used_ref, dst); } } @@ -1065,7 +1081,7 @@ void uvg_intra_build_reference_any( bool is_first_isp_block = isp_mode ? pu_x == cu_x && pu_y == cu_y : false; - assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5)); + assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5); refs->filtered_initialized = false; uvg_pixel *out_left_ref = &refs->ref.left[0]; @@ -1138,11 +1154,8 @@ void uvg_intra_build_reference_any( px_available_left = height; } else { - px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4]; - // This table does not have values for dimensions less than 4 - if (lcu_px.y % 4 != 0) { - px_available_left -= 2; - } + px_available_left = uvg_count_available_edge_cus(cu_loc, lcu, true) * 4; + px_available_left -= pu_loc->y - cu_loc->y; } } else { @@ -1270,7 +1283,8 @@ void uvg_intra_build_reference_any( px_available_top = width; } else { - px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4]; + px_available_top = uvg_count_available_edge_cus(cu_loc, lcu, false) * 4; + px_available_top -= pu_loc->x - cu_loc->x; } } else { @@ -1343,7 +1357,7 @@ void uvg_intra_build_reference_inner( bool is_first_isp_block = isp_mode ? pu_x == cu_x && pu_y == cu_y : false; // Log2_dim 1 is possible with ISP blocks - assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5)); + assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5); refs->filtered_initialized = false; uvg_pixel * __restrict out_left_ref = &refs->ref.left[0]; @@ -1457,11 +1471,8 @@ void uvg_intra_build_reference_inner( px_available_left = height; } else { - px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4]; - // This table does not have values for dimensions less than 4 - if (lcu_px.y % 4 != 0) { - px_available_left -= 2; - } + px_available_left = uvg_count_available_edge_cus(cu_loc, lcu, true) * 4; + px_available_left -= pu_loc->y - cu_loc->y; } } @@ -1477,7 +1488,7 @@ void uvg_intra_build_reference_inner( // Limit the number of available pixels based on block size and dimensions // of the picture. - px_available_left = MIN(px_available_left, height * 2); + px_available_left = MIN(px_available_left, cu_height * 2); px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma); // Copy pixels from coded CUs. @@ -1529,7 +1540,8 @@ void uvg_intra_build_reference_inner( px_available_top = width; } else { - px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4]; + px_available_top = uvg_count_available_edge_cus(cu_loc, lcu, false) * 4; + px_available_top -= pu_loc->x - cu_loc->x; } } else { @@ -1603,6 +1615,7 @@ void uvg_intra_predict( const encoder_state_t* const state, uvg_intra_references* const refs, const cu_loc_t* const cu_loc, + const cu_loc_t* const pu_loc, const color_t color, uvg_pixel* dst, const intra_search_data_t* data, @@ -1614,10 +1627,10 @@ void uvg_intra_predict( // TODO: what is this used for? // const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); bool use_mip = false; - const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; - const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; - const int x = cu_loc->x; - const int y = cu_loc->y; + const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; + const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; + const int x = pu_loc->x; + const int y = pu_loc->y; int8_t intra_mode = color == COLOR_Y ? data->pred_cu.intra.mode : data->pred_cu.intra.mode_chroma; if (data->pred_cu.intra.mip_flag) { if (color == COLOR_Y) { @@ -1633,7 +1646,7 @@ void uvg_intra_predict( mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed); } else { - intra_predict_regular(state, refs, cu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode); + intra_predict_regular(state, refs, &data->pred_cu, cu_loc, pu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode); } } else { @@ -1748,7 +1761,7 @@ void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int bl if (split_type != ISP_MODE_NO_ISP) { part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type, is_transform_split); } - if(split_type == ISP_MODE_VER && block_w < 16 && !is_transform_split) { + if(split_type == ISP_MODE_VER && block_w < 16 && block_h != 4 && !is_transform_split) { split_idx /= 2; } const int offset = part_dim * split_idx; @@ -1818,7 +1831,7 @@ static void intra_recon_tb_leaf( uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode); uvg_pixel pred[32 * 32]; - uvg_intra_predict(state, &refs, pu_loc, color, pred, search_data, lcu, tree_type); + uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu, tree_type); const int index = lcu_px.x + lcu_px.y * lcu_width; uvg_pixel *block = NULL; diff --git a/src/intra.h b/src/intra.h index 5d7d84e7..515abc85 100644 --- a/src/intra.h +++ b/src/intra.h @@ -134,6 +134,7 @@ void uvg_intra_predict( const encoder_state_t* const state, uvg_intra_references* const refs, const cu_loc_t* const cu_loc, + const cu_loc_t* const pu_loc, const color_t color, uvg_pixel* dst, const intra_search_data_t* data, @@ -168,7 +169,6 @@ uint8_t uvg_get_mip_flag_context( int8_t uvg_wide_angle_correction( int_fast8_t mode, - const bool is_isp, const int log2_width, const int log2_height, const bool account_for_dc_planar); diff --git a/src/search.c b/src/search.c index c5480a2e..9c26d160 100644 --- a/src/search.c +++ b/src/search.c @@ -741,7 +741,7 @@ static double cu_rd_cost_tr_split_accurate( if(is_local_sep_tree || tree_type == UVG_LUMA_T) { - if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc)) { + if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc, lcu)) { const int lfnst_idx = tr_cu->lfnst_idx; CABAC_FBITS_UPDATE( cabac, @@ -814,7 +814,7 @@ static double cu_rd_cost_tr_split_accurate( } const bool is_chroma_tree = is_local_sep_tree || tree_type == UVG_CHROMA_T; - if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, is_chroma_tree ? cu_loc : chroma_loc)) { + if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, is_chroma_tree ? cu_loc : chroma_loc, lcu)) { const int lfnst_idx = is_chroma_tree ? tr_cu->cr_lfnst_idx : tr_cu->lfnst_idx; CABAC_FBITS_UPDATE( cabac, @@ -1151,7 +1151,7 @@ static double search_cu( uvg_intra_recon_cu(state, &intra_search, chroma_loc, &intra_search.pred_cu, lcu, - tree_type, + is_separate_tree ? UVG_CHROMA_T : tree_type, false, true); if(tree_type != UVG_CHROMA_T) { @@ -1224,7 +1224,7 @@ static double search_cu( uvg_intra_recon_cu(state, &intra_search, chroma_loc, cur_cu, lcu, - tree_type, + UVG_CHROMA_T, false, true); } else { diff --git a/src/search_intra.c b/src/search_intra.c index 2856a7d4..557dff4e 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -660,7 +660,7 @@ static int search_intra_chroma_rough( for (int i = 0; i < modes_count; ++i) { const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; - uvg_intra_predict(state, &refs_u, &loc, COLOR_U, pred, &chroma_data[i], lcu, tree_type); + uvg_intra_predict(state, &refs_u, cu_loc, &loc, COLOR_U, pred, &chroma_data[i], lcu, tree_type); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); switch (width) { case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block); @@ -679,7 +679,7 @@ static int search_intra_chroma_rough( for (int i = 0; i < modes_count; ++i) { const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; - uvg_intra_predict(state, &refs_v, &loc, COLOR_V, pred, &chroma_data[i], lcu, tree_type); + uvg_intra_predict(state, &refs_v, cu_loc, &loc, COLOR_V, pred, &chroma_data[i], lcu, tree_type); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); switch (width) { case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block); @@ -1026,9 +1026,9 @@ static uint8_t search_intra_rough( int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels; search_proxy.pred_cu.intra.mode = 0; - uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T); search_proxy.pred_cu.intra.mode = 1; - uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T); get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs); mode_checked[0] = true; mode_checked[1] = true; @@ -1078,7 +1078,7 @@ static uint8_t search_intra_rough( for (int i = 0; i < PARALLEL_BLKS; ++i) { if (mode + i * offset <= 66) { search_proxy.pred_cu.intra.mode = mode + i*offset; - uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T); } } @@ -1150,7 +1150,7 @@ static uint8_t search_intra_rough( for (int block = 0; block < PARALLEL_BLKS; ++block) { search_proxy.pred_cu.intra.mode = modes_to_check[block + i]; - uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T); } @@ -1241,7 +1241,7 @@ static void get_rough_cost_for_2n_modes( double bits[PARALLEL_BLKS] = { 0 }; for(int mode = 0; mode < num_modes; mode += PARALLEL_BLKS) { for (int i = 0; i < PARALLEL_BLKS; ++i) { - uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T); + uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T); } get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out); @@ -1482,6 +1482,7 @@ int8_t uvg_search_intra_chroma_rdo( state, &refs[COLOR_U - 1], cu_loc, + cu_loc, COLOR_U, u_pred, &chroma_data[mode_i], @@ -1491,6 +1492,7 @@ int8_t uvg_search_intra_chroma_rdo( state, &refs[COLOR_V - 1], cu_loc, + cu_loc, COLOR_V, v_pred, &chroma_data[mode_i], diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c index 1d3c117f..838bad91 100644 --- a/src/strategies/avx2/intra-avx2.c +++ b/src/strategies/avx2/intra-avx2.c @@ -60,7 +60,8 @@ static void uvg_angular_pred_avx2( const uvg_pixel *const in_ref_left, uvg_pixel *const dst, const uint8_t multi_ref_idx, - const uint8_t isp_mode) + const uint8_t isp_mode, + const int cu_dim) { // ISP_TODO: non-square block implementation, height is passed but not used const int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index d5fdb88e..fec783b6 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -2605,9 +2605,14 @@ static void mts_dct_generic( int16_t tmp[32 * 32]; const int32_t shift_1st = log2_width_minus1 + bitdepth - 8; const int32_t shift_2nd = log2_height_minus1 + 7; - - dct_hor(input, tmp, shift_1st, height, 0, skip_width); - dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height); + if (height == 1) { + dct_hor(input, output, shift_1st, height, 0, skip_width); + } else if (width == 1) { + dct_ver(input, output, shift_2nd, width, 0, skip_height); + } else { + dct_hor(input, tmp, shift_1st, height, 0, skip_width); + dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height); + } } } @@ -2660,8 +2665,14 @@ static void mts_idct_generic( const int32_t shift_1st = transform_matrix_shift + 1; const int32_t shift_2nd = (transform_matrix_shift + max_log2_tr_dynamic_range - 1) - bitdepth; - idct_ver(input, tmp, shift_1st, width, skip_width, skip_height); - idct_hor(tmp, output, shift_2nd, height, 0, skip_width); + if (height == 1) { + idct_hor(input, output, shift_1st, height, 0, skip_width); + } else if (width == 1) { + idct_ver(input, output, shift_2nd, width, 0, skip_height); + } else { + idct_ver(input, tmp, shift_1st, width, skip_width, skip_height); + idct_hor(tmp, output, shift_2nd, height, 0, skip_width); + } } } diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index b7ab7e94..e00ac48a 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -59,7 +59,8 @@ static void uvg_angular_pred_generic( const uvg_pixel *const in_ref_left, uvg_pixel *const dst, const uint8_t multi_ref_idx, - const uint8_t isp_mode) + const uint8_t isp_mode, + const int cu_dim) { int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; @@ -141,10 +142,9 @@ static void uvg_angular_pred_generic( // Pointer for the other reference. const uvg_pixel *ref_side; uvg_pixel* work = width == height || vertical_mode ? dst : temp_dst; - - const int cu_dim = MAX(width, height); - const int top_ref_length = isp_mode ? width + cu_dim : width << 1; - const int left_ref_length = isp_mode ? height + cu_dim : height << 1; + + const int top_ref_length = isp_mode == ISP_MODE_VER ? width + cu_dim : width << 1; + const int left_ref_length = isp_mode == ISP_MODE_HOR ? height + cu_dim : height << 1; // Set ref_main and ref_side such that, when indexed with 0, they point to // index 0 in block coordinates. @@ -338,7 +338,7 @@ static void uvg_intra_pred_planar_generic( const int final_shift = 1 + log2_width + log2_height; // If ISP is enabled log_dim 1 is possible (limit was previously 2) - assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5)); + assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5); const uvg_pixel top_right = ref_top[width + 1]; const uvg_pixel bottom_left = ref_left[height + 1]; diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index 8c5649dc..8d2a85da 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -313,15 +313,16 @@ int uvg_quant_cbcr_residual_generic( uvg_transform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu); - if(cur_cu->cr_lfnst_idx) { - uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type, state->collocated_luma_mode); + uint8_t lfnst_idx = tree_type == UVG_CHROMA_T ? cur_cu->cr_lfnst_idx : cur_cu->lfnst_idx; + if(lfnst_idx) { + uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, lfnst_idx, coeff, tree_type, state->collocated_luma_mode); } if (state->encoder_control->cfg.rdoq_enable && (width > 4 || !state->encoder_control->cfg.rdoq_skip)) { uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, - scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx); + scan_order, cur_cu->type, cur_cu->cbf, lfnst_idx); } else if (state->encoder_control->cfg.rdoq_enable && false) { uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U, @@ -329,7 +330,7 @@ int uvg_quant_cbcr_residual_generic( } else { uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, - scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->cr_lfnst_idx); + scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, lfnst_idx); } int8_t has_coeffs = 0; @@ -348,8 +349,8 @@ int uvg_quant_cbcr_residual_generic( // Get quantized residual. (coeff_out -> coeff -> residual) uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false); - if (cur_cu->cr_lfnst_idx) { - uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type, state->collocated_luma_mode); + if (lfnst_idx) { + uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, lfnst_idx, coeff, tree_type, state->collocated_luma_mode); } uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu); @@ -487,7 +488,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state, uvg_transform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu); } - const uint8_t lfnst_index = color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx; + const uint8_t lfnst_index = tree_type != UVG_CHROMA_T || color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx; if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) { // Forward low frequency non-separable transform diff --git a/src/strategies/strategies-intra.h b/src/strategies/strategies-intra.h index ce008d01..52f5e519 100644 --- a/src/strategies/strategies-intra.h +++ b/src/strategies/strategies-intra.h @@ -52,7 +52,8 @@ typedef void (angular_pred_func)( const uvg_pixel *const in_ref_left, uvg_pixel *const dst, const uint8_t multi_ref_idx, - const uint8_t isp_mode); + const uint8_t isp_mode, + const int cu_dim); typedef void (intra_pred_planar_func)( const cu_loc_t* const cu_loc, diff --git a/src/transform.c b/src/transform.c index 54ec2ecd..4d953454 100644 --- a/src/transform.c +++ b/src/transform.c @@ -571,7 +571,7 @@ void uvg_chroma_transform_search( SCAN_DIAG, &u_has_coeffs, &v_has_coeffs, - pred_cu->cr_lfnst_idx); + tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx); if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue; if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) { @@ -720,7 +720,7 @@ void uvg_chroma_transform_search( COEFF_ORDER_LINEAR); } if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) { - if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc)) { + if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc, lcu)) { const int lfnst_idx = pred_cu->cr_lfnst_idx; CABAC_FBITS_UPDATE( &state->search_cabac, @@ -873,7 +873,7 @@ void uvg_fwd_lfnst( const int scan_order = SCAN_DIAG; - if (lfnst_index && !mts_skip) + if (lfnst_index && !mts_skip && (color == COLOR_Y || is_separate_tree)) { assert(log2_width != -1 && "LFNST: invalid block width."); const bool whge3 = width >= 8 && height >= 8; @@ -887,7 +887,12 @@ void uvg_fwd_lfnst( } assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]"); - int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true); + int32_t wide_adjusted_mode = uvg_wide_angle_correction( + intra_mode, + color == COLOR_Y ? cur_cu->log2_width : log2_width, + color == COLOR_Y ? cur_cu->log2_height : log2_height, + true + ); // Transform wide angle mode to intra mode intra_mode = get_lfnst_intra_mode(wide_adjusted_mode); @@ -1007,7 +1012,7 @@ void uvg_inv_lfnst( bool is_mip = block_is_mip(cur_cu, color, is_separate_tree); const int scan_order = SCAN_DIAG; - if (lfnst_index && !mts_skip) { + if (lfnst_index && !mts_skip && (color == COLOR_Y || is_separate_tree)) { const bool whge3 = width >= 8 && height >= 8; const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_width] : uvg_g_sig_last_scan[scan_order][log2_width - 1]; @@ -1019,7 +1024,12 @@ void uvg_inv_lfnst( } assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]"); - int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true); + int32_t wide_adjusted_mode = uvg_wide_angle_correction( + intra_mode, + color == COLOR_Y ? cur_cu->log2_width : log2_width, + color == COLOR_Y ? cur_cu->log2_height : log2_height, + true + ); intra_mode = get_lfnst_intra_mode(wide_adjusted_mode); @@ -1386,7 +1396,8 @@ void uvg_quantize_lcu_residual( // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. // Width 2 is possible with ISP blocks // ISP_TODO: no, they actually are not - assert(width == 2 || + assert(width == 1 || + width == 2 || width == 4 || width == 8 || width == 16 ||