diff --git a/src/transform.c b/src/transform.c index 6a9655a4..79164b71 100644 --- a/src/transform.c +++ b/src/transform.c @@ -274,7 +274,7 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu, bool mts_skip = false; // LFNST_TODO: get proper mts skip value bool is_cclm_mode = (intra_mode >= 67 && intra_mode <= 69); - bool is_mip = false; // LFNST_TODO: get mip flag after mip is meged to master + bool is_mip = false; // LFNST_TODO: get mip flag after mip is merged to master bool is_wide_angle = false; // TODO: get wide angle mode when implemented // TODO: add check if separate tree structure. Original vtm check: (tu.cu->isSepTree() ? true : isLuma(compID)) @@ -367,6 +367,130 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu, } } +void kvz_inv_lfnst_NxN(coeff_t *src, coeff_t *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zero_out_size, const int max_log2_tr_dyn_range) +{ + const coeff_t output_min = -(1 << max_log2_tr_dyn_range); + const coeff_t output_max = (1 << max_log2_tr_dyn_range) - 1; + const int8_t *tr_mat = (size > 4) ? lfnst_8x8[mode][index][0] : lfnst_4x4[mode][index][0]; + const int tr_size = (size > 4) ? 48 : 16; + coeff_t resi; + coeff_t *out = dst; + assert(index < 3); + + for (int j = 0; j < tr_size; j++) + { + resi = 0; + const int8_t* tr_mat_tmp = tr_mat; + coeff_t *src_ptr = src; + for (int i = 0; i < zero_out_size; i++) + { + resi += *src_ptr++ * *tr_mat_tmp; + tr_mat_tmp += tr_size; + } + *out++ = CLIP(output_min, output_max, (resi + 64) >> 7); + tr_mat++; + } +} + +void kvz_inv_lfnst(const cu_info_t *cur_cu, + const int width, const int height, + const uint8_t color, + const uint16_t lfnst_idx, + coeff_t *coeffs) +{ + // In VTM, max log2 dynamic range is something in range [15, 20] depending on whether extended precision processing is enabled + // Such is not yet present in uvg266 so use 15 for now + const int max_log2_dyn_range = 15; + const uint32_t lfnst_index = lfnst_idx; + int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma; + bool mts_skip = false; // LFNST_TODO: get proper mts skip value + bool is_cclm_mode = (intra_mode >= 67 && intra_mode <= 69); + + bool is_mip = false; // LFNST_TODO: get mip flag after mip is merged to master + bool is_wide_angle = false; // TODO: get wide angle mode when implemented + + if (lfnst_index && !mts_skip && color == COLOR_Y) { + const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; + const bool whge3 = width >= 8 && height >= 8; + const uint32_t* scan = whge3 ? coef_top_left_diag_scan_8x8[log2_block_size - 1] : g_sig_last_scan_cg[log2_block_size - 1][SCAN_DIAG]; + + if (is_cclm_mode) { + intra_mode = cur_cu->intra.mode; + } + if (is_mip) { + intra_mode = 0; // Set to planar mode + } + assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode."); + + if (lfnst_index < 3) { + if (is_wide_angle) { + // Transform wide angle mode to intra mode + intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing. + } + + bool transpose_flag = get_transpose_flag(intra_mode); + const int sb_size = whge3 ? 8 : 4; + bool tu_4x4_flag = (width == 4 && height == 4); + bool tu_8x8_flag = (width == 8 && height == 8); + coeff_t tmp_in_matrix[48]; + coeff_t tmp_out_matrix[48]; + coeff_t *lfnst_tmp; + coeff_t *coeff_tmp; + int y; + lfnst_tmp = tmp_in_matrix; // inverse spectral rearrangement + coeff_tmp = coeffs; + coeff_t *dst = lfnst_tmp; + + const uint32_t *scan_ptr = scan; + for (y = 0; y < 16; y++) { + *dst++ = coeff_tmp[*scan_ptr]; + scan_ptr++; + } + + kvz_inv_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, lfnst_lut[intra_mode], lfnst_index - 1, sb_size, + (tu_4x4_flag || tu_8x8_flag) ? 8 : 16, max_log2_dyn_range); + lfnst_tmp = tmp_out_matrix; // inverse low frequency non-separale transform + + if (transpose_flag) { + if (sb_size == 4) { + for (y = 0; y < 4; y++) { + coeff_tmp[0] = lfnst_tmp[0]; + coeff_tmp[1] = lfnst_tmp[4]; + coeff_tmp[2] = lfnst_tmp[8]; + coeff_tmp[3] = lfnst_tmp[12]; + lfnst_tmp++; + coeff_tmp += width; + } + } + else { // ( sb_size == 8 ) + for (y = 0; y < 8; y++) { + coeff_tmp[0] = lfnst_tmp[0]; + coeff_tmp[1] = lfnst_tmp[8]; + coeff_tmp[2] = lfnst_tmp[16]; + coeff_tmp[3] = lfnst_tmp[24]; + if (y < 4) { + coeff_tmp[4] = lfnst_tmp[32]; + coeff_tmp[5] = lfnst_tmp[36]; + coeff_tmp[6] = lfnst_tmp[40]; + coeff_tmp[7] = lfnst_tmp[44]; + } + lfnst_tmp++; + coeff_tmp += width; + } + } + } + else { + for (y = 0; y < sb_size; y++) { + uint32_t uiStride = (y < 4) ? sb_size : 4; + memcpy(coeff_tmp, lfnst_tmp, uiStride * sizeof(coeff_t)); + lfnst_tmp += uiStride; + coeff_tmp += width; + } + } + } + } +} + /** * \brief Like uvg_quantize_residual except that this uses trskip if that is better. * diff --git a/src/transform.h b/src/transform.h index 1d1c0045..92cefc41 100644 --- a/src/transform.h +++ b/src/transform.h @@ -85,4 +85,10 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu, const uint16_t lfnst_idx, coeff_t *coeffs); +void kvz_inv_lfnst(const cu_info_t* cur_cu, + const int width, const int height, + const uint8_t color, + const uint16_t lfnst_idx, + coeff_t* coeffs); + #endif