diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index a1b8a075..d5a44728 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -101,7 +101,7 @@ static void encode_mts_idx(encoder_state_t * const state, } } -// TODO: move these defines to a proper place when ISP is implemented +// ISP_TODO: move these defines to a proper place when ISP is implemented // As of now, these are only needed in lfnst checks #define NOT_INTRA_SUBPARTITIONS 0 #define HOR_INTRA_SUBPARTITIONS 1 @@ -111,12 +111,9 @@ static void encode_mts_idx(encoder_state_t * const state, #define TU_1D_HOR_SPLIT 8 #define TU_1D_VER_SPLIT 9 -// TODO: check if these are defined somewhere else #define MIN_TB_SIZE_X 4 #define MIN_TB_SIZE_Y 4 -#define MAX_TB_SIZE 32 - static int get_isp_split_dim(const int width, const int height, const int isp_split_type) { bool divide_tu_in_rows = isp_split_type == TU_1D_HOR_SPLIT; @@ -164,20 +161,21 @@ static bool is_lfnst_allowed(encoder_state_t* const state, const cu_info_t* cons const int width, const int height) { if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA) { - const int isp_mode = 0; // LFNST_TODO: assign proper ISP mode when ISP is implemented + const int isp_mode = 0; // ISP_TODO: assign proper ISP mode when ISP is implemented const int isp_split_type = 0; const int chroma_width = width >> 1; const int chroma_height = height >> 1; const int cu_width = color == COLOR_Y ? width : chroma_width; const int cu_height = color == COLOR_Y ? height : chroma_height; bool can_use_lfnst_with_mip = (width >= 16 && height >= 16); - bool is_sep_tree = false; // LFNST_TODO: if/when separate tree structure is implemented, add proper boolean here + const int depth = pred_cu->depth; + bool is_sep_tree = depth == 4; // TODO: if/when separate tree structure is implemented, add proper boolean here bool mip_flag = pred_cu->type == CU_INTRA ? pred_cu->intra.mip_flag : false; if ((isp_mode && !can_use_lfnst_with_isp(width, height, isp_split_type, color)) || (pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip) || (is_sep_tree && color != COLOR_Y && MIN(chroma_width, chroma_height) < 4) || - (cu_width > MAX_TB_SIZE || cu_height > MAX_TB_SIZE)) { + (cu_width > TR_MAX_WIDTH || cu_height > TR_MAX_WIDTH)) { return false; } @@ -196,7 +194,7 @@ static bool encode_lfnst_idx(encoder_state_t * const state, cabac_data_t * const if (is_lfnst_allowed(state, pred_cu, color, width, height)) { // Getting separate tree bool from block size is a temporary fix until a proper dual tree check is possible (there is no dual tree structure at time of writing this). // VTM seems to force explicit dual tree structure for small 4x4 blocks - bool is_separate_tree = depth == 4; // LFNST_TODO: if/when separate/dual tree structure is implemented, get proper value for this + bool is_separate_tree = depth == 4; // TODO: if/when separate/dual tree structure is implemented, get proper value for this bool luma_flag = is_separate_tree ? (color == COLOR_Y ? true: false) : true; bool chroma_flag = is_separate_tree ? (color != COLOR_Y ? true : false) : true; bool non_zero_coeff_non_ts_corner_8x8 = (luma_flag && pred_cu->violates_lfnst_constrained[0]) || (chroma_flag && pred_cu->violates_lfnst_constrained[1]); @@ -211,7 +209,7 @@ static bool encode_lfnst_idx(encoder_state_t * const state, cabac_data_t * const const int tu_row_length = 1 << (tr_depth - depth); const int tu_width = cu_width >> (tr_depth - depth); const int tu_height = tu_width; // TODO: height for non-square blocks - const int isp_mode = 0; // LFNST_TODO:get isp_mode from cu when ISP is implemented + const int isp_mode = 0; // ISP_TODO:get isp_mode from cu when ISP is implemented // TODO: chroma transform skip if (color == COLOR_Y) { diff --git a/src/search_intra.c b/src/search_intra.c index 8702d24d..adaba818 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -383,10 +383,8 @@ static double search_intra_trdepth( pred_cu->intra.mode_chroma = -1; pred_cu->joint_cb_cr = 4; - const int max_tb_size = 32; // LFNST_TODO: use define instead for max transform block size + const int max_tb_size = TR_MAX_WIDTH; // LFNST search params - // bool is_separate_tree = (width == 4 && height == 4) ? true : false; // LFNST_TODO: if/when separate/dual tree structure is implemented, get proper value for this - // const int max_lfnst_idx = (is_separate_tree /*&& color != COLOR_Y*/ && (width < 8 || height < 8)) || (width > max_tb_size || height > max_tb_size) ? 0 : 2; const int max_lfnst_idx = width > max_tb_size || height > max_tb_size ? 0 : 2; int start_idx = 0; @@ -449,8 +447,9 @@ static double search_intra_trdepth( best_rd_cost = rd_cost; best_lfnst_idx = pred_cu->lfnst_idx; best_tr_idx = pred_cu->tr_idx; + if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option } - } + } // end mts index loop (tr_idx) if (reconstruct_chroma) { int8_t luma_mode = pred_cu->intra.mode; pred_cu->intra.mode = -1; @@ -481,7 +480,8 @@ static double search_intra_trdepth( } } } - } + if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option + } // end lfnst_index loop pred_cu->tr_skip = best_tr_idx == MTS_SKIP; pred_cu->tr_idx = best_tr_idx; diff --git a/src/transform.c b/src/transform.c index a2dbbae3..befa4aee 100644 --- a/src/transform.c +++ b/src/transform.c @@ -253,8 +253,22 @@ void kvz_fwd_lfnst_NxN(coeff_t *src, coeff_t *dst, const int8_t mode, const int8 tr_mat += tr_size; } - // LFNST_TODO: implement fill. Use fill macros present in Kvazaar. Use FILL with switch case if there are only few possible fill cases, it's faster - FILL_ARRAY(out, 0, tr_size - zero_out_size); + // Possible tr_size values 16, 48. Possible zero_out_size values 8, 16 + switch (tr_size - zero_out_size) { + case 0: + break; + case 8: + FILL_ARRAY(out, 0, 8); + break; + case 32: + FILL_ARRAY(out, 0, 32); + break; + case 40: + FILL_ARRAY(out, 0, 40); + break; + default: + assert(false && "LFNST: This should never trip."); + } } static inline bool get_transpose_flag(const int8_t intra_mode) @@ -272,21 +286,23 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu, const uint16_t lfnst_index = lfnst_idx; int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma; bool mts_skip = cur_cu->tr_skip; - // this should probably never trigger - bool is_separate_tree = color == COLOR_Y ? width == 4 && height == 4 : width == 2 && height == 2; // LFNST_TODO: proper dual tree check when that structure is implemented + const int depth = cur_cu->depth; + bool is_separate_tree = depth == 4; // TODO: proper dual tree check when that structure is implemented bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83] bool is_mip = cur_cu->type == CU_INTRA ? cur_cu->intra.mip_flag : false; bool is_wide_angle = false; // TODO: get wide angle mode when implemented - // LFNST_TODO: use kvz_get_scan_order to get scan mode instead of using SCAN_DIAG define. + const int cu_type = cur_cu->type; + + const int scan_order = kvz_get_scan_order(cu_type, intra_mode, depth); if (lfnst_index && !mts_skip && (is_separate_tree || color == COLOR_Y)) { const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; assert(log2_block_size != -1 && "LFNST: invalid block width."); const bool whge3 = width >= 8 && height >= 8; - const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[SCAN_DIAG][log2_block_size - 1]; + const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[scan_order][log2_block_size - 1]; if (is_cclm_mode) { intra_mode = cur_cu->intra.mode; @@ -295,76 +311,75 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu, intra_mode = 0; // Set to planar mode } assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); + assert(lfnst_index < 3 && lfnst_index >= 0 && "LFNST: Invalid LFNST index. Must be in [0, 2]"); - if (lfnst_index < 3) { - if (is_wide_angle) { - // Transform wide angle mode to intra mode - intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing. - } + if (is_wide_angle) { + // Transform wide angle mode to intra mode + intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing. + } - bool transpose = get_transpose_flag(intra_mode); - const int sb_size = whge3 ? 8 : 4; - bool tu_4x4 = (width == 4 && height == 4); - bool tu_8x8 = (width == 8 && height == 8); + bool transpose = get_transpose_flag(intra_mode); + const int sb_size = whge3 ? 8 : 4; + bool tu_4x4 = (width == 4 && height == 4); + bool tu_8x8 = (width == 8 && height == 8); - coeff_t tmp_in_matrix[48]; - coeff_t tmp_out_matrix[48]; - coeff_t *lfnst_tmp = tmp_in_matrix; // forward low frequency non-separable transform + coeff_t tmp_in_matrix[48]; + coeff_t tmp_out_matrix[48]; + coeff_t *lfnst_tmp = tmp_in_matrix; // forward low frequency non-separable transform - coeff_t *coeff_tmp = coeffs; + coeff_t *coeff_tmp = coeffs; - int y; - if (transpose) { - if (sb_size == 4) { - for (y = 0; y < 4; y++) { - lfnst_tmp[0] = coeff_tmp[0]; - lfnst_tmp[4] = coeff_tmp[1]; - lfnst_tmp[8] = coeff_tmp[2]; - lfnst_tmp[12] = coeff_tmp[3]; - lfnst_tmp++; - coeff_tmp += width; - } - } - else { // ( sb_size == 8 ) - for (y = 0; y < 8; y++) { - lfnst_tmp[0] = coeff_tmp[0]; - lfnst_tmp[8] = coeff_tmp[1]; - lfnst_tmp[16] = coeff_tmp[2]; - lfnst_tmp[24] = coeff_tmp[3]; - if (y < 4) { - lfnst_tmp[32] = coeff_tmp[4]; - lfnst_tmp[36] = coeff_tmp[5]; - lfnst_tmp[40] = coeff_tmp[6]; - lfnst_tmp[44] = coeff_tmp[7]; - } - lfnst_tmp++; - coeff_tmp += width; - } - } - } - else { - for (y = 0; y < sb_size; y++) { - uint32_t stride = (y < 4) ? sb_size : 4; - memcpy(lfnst_tmp, coeff_tmp, stride * sizeof(coeff_t)); - lfnst_tmp += stride; + int y; + if (transpose) { + if (sb_size == 4) { + for (y = 0; y < 4; y++) { + lfnst_tmp[0] = coeff_tmp[0]; + lfnst_tmp[4] = coeff_tmp[1]; + lfnst_tmp[8] = coeff_tmp[2]; + lfnst_tmp[12] = coeff_tmp[3]; + lfnst_tmp++; coeff_tmp += width; } } - - kvz_fwd_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size, - (tu_4x4 || tu_8x8) ? 8 : 16); - - lfnst_tmp = tmp_out_matrix; // forward spectral rearrangement - coeff_tmp = coeffs; - int lfnst_coeff_num = (sb_size == 4) ? sb_size * sb_size : 48; - - const uint32_t *scan_ptr = scan; - - for (y = 0; y < lfnst_coeff_num; y++) { - coeff_tmp[*scan_ptr] = *lfnst_tmp++; - scan_ptr++; + else { // ( sb_size == 8 ) + for (y = 0; y < 8; y++) { + lfnst_tmp[0] = coeff_tmp[0]; + lfnst_tmp[8] = coeff_tmp[1]; + lfnst_tmp[16] = coeff_tmp[2]; + lfnst_tmp[24] = coeff_tmp[3]; + if (y < 4) { + lfnst_tmp[32] = coeff_tmp[4]; + lfnst_tmp[36] = coeff_tmp[5]; + lfnst_tmp[40] = coeff_tmp[6]; + lfnst_tmp[44] = coeff_tmp[7]; + } + lfnst_tmp++; + coeff_tmp += width; + } } } + else { + for (y = 0; y < sb_size; y++) { + uint32_t stride = (y < 4) ? sb_size : 4; + memcpy(lfnst_tmp, coeff_tmp, stride * sizeof(coeff_t)); + lfnst_tmp += stride; + coeff_tmp += width; + } + } + + kvz_fwd_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size, + (tu_4x4 || tu_8x8) ? 8 : 16); + + lfnst_tmp = tmp_out_matrix; // forward spectral rearrangement + coeff_tmp = coeffs; + int lfnst_coeff_num = (sb_size == 4) ? sb_size * sb_size : 48; + + const uint32_t *scan_ptr = scan; + + for (y = 0; y < lfnst_coeff_num; y++) { + coeff_tmp[*scan_ptr] = *lfnst_tmp++; + scan_ptr++; + } } } @@ -405,19 +420,21 @@ void kvz_inv_lfnst(const cu_info_t *cur_cu, const uint32_t lfnst_index = lfnst_idx; int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma; bool mts_skip = cur_cu->tr_skip; - // this should probably never trigger - bool is_separate_tree = color == COLOR_Y ? width == 4 && height == 4 : width == 2 && height == 2; // LFNST_TODO: proper dual tree check when that structure is implemented + const int depth = cur_cu->depth; + bool is_separate_tree = depth == 4; // TODO: proper dual tree check when that structure is implemented bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83] bool is_mip = cur_cu->type == CU_INTRA ? cur_cu->intra.mip_flag : false; bool is_wide_angle = false; // TODO: get wide angle mode when implemented - // LFNST_TODO: use kvz_get_scan_order to get scan mode instead of using SCAN_DIAG define. + const int cu_type = cur_cu->type; + const int scan_order = kvz_get_scan_order(cu_type, intra_mode, depth); + if (lfnst_index && !mts_skip && (is_separate_tree || color == COLOR_Y)) { const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const bool whge3 = width >= 8 && height >= 8; - const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[SCAN_DIAG][log2_block_size - 1]; + const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[scan_order][log2_block_size - 1]; if (is_cclm_mode) { intra_mode = cur_cu->intra.mode; @@ -426,72 +443,71 @@ void kvz_inv_lfnst(const cu_info_t *cur_cu, intra_mode = 0; // Set to planar mode } assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); + assert(lfnst_index < 3 && lfnst_index >= 0 && "LFNST: Invalid LFNST index. Must be in [0, 2]"); - if (lfnst_index < 3) { - if (is_wide_angle) { - // Transform wide angle mode to intra mode - intra_mode = intra_mode; // LFNST_TODO: wide angle modes not implemented yet. Do nothing. - } + if (is_wide_angle) { + // Transform wide angle mode to intra mode + intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing. + } - bool transpose_flag = get_transpose_flag(intra_mode); - const int sb_size = whge3 ? 8 : 4; - bool tu_4x4_flag = (width == 4 && height == 4); - bool tu_8x8_flag = (width == 8 && height == 8); - coeff_t tmp_in_matrix[48]; - coeff_t tmp_out_matrix[48]; - coeff_t *lfnst_tmp; - coeff_t *coeff_tmp; - int y; - lfnst_tmp = tmp_in_matrix; // inverse spectral rearrangement - coeff_tmp = coeffs; - coeff_t *dst = lfnst_tmp; + bool transpose_flag = get_transpose_flag(intra_mode); + const int sb_size = whge3 ? 8 : 4; + bool tu_4x4_flag = (width == 4 && height == 4); + bool tu_8x8_flag = (width == 8 && height == 8); + coeff_t tmp_in_matrix[48]; + coeff_t tmp_out_matrix[48]; + coeff_t *lfnst_tmp; + coeff_t *coeff_tmp; + int y; + lfnst_tmp = tmp_in_matrix; // inverse spectral rearrangement + coeff_tmp = coeffs; + coeff_t *dst = lfnst_tmp; - const uint32_t *scan_ptr = scan; - for (y = 0; y < 16; y++) { - *dst++ = coeff_tmp[*scan_ptr]; - scan_ptr++; - } + const uint32_t *scan_ptr = scan; + for (y = 0; y < 16; y++) { + *dst++ = coeff_tmp[*scan_ptr]; + scan_ptr++; + } - kvz_inv_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size, - (tu_4x4_flag || tu_8x8_flag) ? 8 : 16, max_log2_dyn_range); - lfnst_tmp = tmp_out_matrix; // inverse low frequency non-separale transform + kvz_inv_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size, + (tu_4x4_flag || tu_8x8_flag) ? 8 : 16, max_log2_dyn_range); + lfnst_tmp = tmp_out_matrix; // inverse low frequency non-separale transform - if (transpose_flag) { - if (sb_size == 4) { - for (y = 0; y < 4; y++) { - coeff_tmp[0] = lfnst_tmp[0]; - coeff_tmp[1] = lfnst_tmp[4]; - coeff_tmp[2] = lfnst_tmp[8]; - coeff_tmp[3] = lfnst_tmp[12]; - lfnst_tmp++; - coeff_tmp += width; - } - } - else { // ( sb_size == 8 ) - for (y = 0; y < 8; y++) { - coeff_tmp[0] = lfnst_tmp[0]; - coeff_tmp[1] = lfnst_tmp[8]; - coeff_tmp[2] = lfnst_tmp[16]; - coeff_tmp[3] = lfnst_tmp[24]; - if (y < 4) { - coeff_tmp[4] = lfnst_tmp[32]; - coeff_tmp[5] = lfnst_tmp[36]; - coeff_tmp[6] = lfnst_tmp[40]; - coeff_tmp[7] = lfnst_tmp[44]; - } - lfnst_tmp++; - coeff_tmp += width; - } - } - } - else { - for (y = 0; y < sb_size; y++) { - uint32_t uiStride = (y < 4) ? sb_size : 4; - memcpy(coeff_tmp, lfnst_tmp, uiStride * sizeof(coeff_t)); - lfnst_tmp += uiStride; + if (transpose_flag) { + if (sb_size == 4) { + for (y = 0; y < 4; y++) { + coeff_tmp[0] = lfnst_tmp[0]; + coeff_tmp[1] = lfnst_tmp[4]; + coeff_tmp[2] = lfnst_tmp[8]; + coeff_tmp[3] = lfnst_tmp[12]; + lfnst_tmp++; coeff_tmp += width; } } + else { // ( sb_size == 8 ) + for (y = 0; y < 8; y++) { + coeff_tmp[0] = lfnst_tmp[0]; + coeff_tmp[1] = lfnst_tmp[8]; + coeff_tmp[2] = lfnst_tmp[16]; + coeff_tmp[3] = lfnst_tmp[24]; + if (y < 4) { + coeff_tmp[4] = lfnst_tmp[32]; + coeff_tmp[5] = lfnst_tmp[36]; + coeff_tmp[6] = lfnst_tmp[40]; + coeff_tmp[7] = lfnst_tmp[44]; + } + lfnst_tmp++; + coeff_tmp += width; + } + } + } + else { + for (y = 0; y < sb_size; y++) { + uint32_t uiStride = (y < 4) ? sb_size : 4; + memcpy(coeff_tmp, lfnst_tmp, uiStride * sizeof(coeff_t)); + lfnst_tmp += uiStride; + coeff_tmp += width; + } } } }