[mtt] Fix sqrt adjustment, cclm calculation on edges of CTU and waip for lfnst

This commit is contained in:
Joose Sainio 2022-11-21 12:37:55 +02:00 committed by Marko Viitanen
parent 5875dc1ef4
commit ab21c7e1d7
7 changed files with 77 additions and 42 deletions

View file

@ -532,7 +532,7 @@ static void predict_cclm(
const int ctu_size = tree_type == UVG_CHROMA_T ? LCU_WIDTH_C : LCU_WIDTH;
if (y0) {
if (y_scu == 0) available_above_right = MIN(MIN(width / 2, (64-x_scu - width * 2) / 2), (state->tile->frame->width - x0 - width* 2) / 2);
if (y_scu == 0) available_above_right = MIN(MIN(width / 2, (64-x_scu - width * 2) / 4), (state->tile->frame->width - x0 - width* 2) / 4);
for (; available_above_right < width / 2; available_above_right++) {
int x_extension = x_scu + width * 2 + 4 * available_above_right;
x_extension >>= tree_type == UVG_CHROMA_T;
@ -559,7 +559,7 @@ static void predict_cclm(
}
if(x0) {
if (x_scu == 0) available_left_below = MIN(MIN(height / 2, (64 - y_scu - height * 2) / 2), (state->tile->frame->height - y0 - height * 2) / 2);
if (x_scu == 0) available_left_below = MIN(MIN(height / 2, (64 - y_scu - height * 2) / 4), (state->tile->frame->height - y0 - height * 2) / 4);
for (; available_left_below < height / 2; available_left_below++) {
int y_extension = y_scu + height * 2 + 4 * available_left_below;
y_extension >>= tree_type == UVG_CHROMA_T;
@ -916,6 +916,24 @@ static void mip_predict(
}
int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height)
{
int8_t pred_mode = mode;
if (!is_isp && log2_width != log2_height) {
if (mode > 1 && mode <= 66) {
const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
const int deltaSize = abs(log2_width - log2_height);
if (log2_width > log2_height && mode < 2 + modeShift[deltaSize]) {
pred_mode += (66 - 1);
}
else if (log2_height > log2_width && mode > 66 - modeShift[deltaSize]) {
pred_mode -= (66 - 1);
}
}
}
return pred_mode;
}
static void intra_predict_regular(
const encoder_state_t* const state,
uvg_intra_references *refs,
@ -937,19 +955,10 @@ static void intra_predict_regular(
uint8_t isp = color == COLOR_Y ? isp_mode : 0;
// Wide angle correction
int8_t pred_mode = mode;
if (!is_isp && width != height) {
if (mode > 1 && mode <= 66) {
const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
const int deltaSize = abs(log2_width - log2_height);
if (width > height && mode < 2 + modeShift[deltaSize]) {
pred_mode += (66 - 1);
}
else if (height > width && mode > 66 - modeShift[deltaSize]) {
pred_mode -= (66 - 1);
}
}
}
int8_t pred_mode = uvg_wide_angle_correction(mode,
is_isp,
log2_width,
log2_height);
const uvg_intra_ref *used_ref = &refs->ref;
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || isp_mode /*ISP_TODO: replace this fake ISP check*/) {

View file

@ -165,6 +165,12 @@ uint8_t uvg_get_mip_flag_context(
const lcu_t* lcu,
cu_array_t* const cu_a);
int8_t uvg_wide_angle_correction(
int_fast8_t mode,
const bool is_isp,
const int log2_width,
const int log2_height);
// ISP related defines
#define NUM_ISP_MODES 3
#define ISP_MODE_NO_ISP 0

View file

@ -1418,7 +1418,7 @@ void uvg_rdoq(
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
bool needs_block_size_trafo_scale = !false && ((log2_block_width + log2_block_height) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
needs_block_size_trafo_scale |= 0; // Non log2 block size
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1) + needs_block_size_trafo_scale; // Represents scaling through forward transform
uint16_t go_rice_param = 0;

View file

@ -1335,7 +1335,7 @@ static double search_cu(
// Recursively split all the way to max search depth.
if (can_split_cu) {
const int split_type = depth == 0 ? QT_SPLIT : BT_VER_SPLIT;
const int split_type = depth == 0 ? QT_SPLIT : TT_HOR_SPLIT;
const split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1,

View file

@ -387,7 +387,7 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
needs_block_size_trafo_scale |= 0; // Non log2 block size
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
@ -798,7 +798,7 @@ void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1);
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size// Represents scaling through forward transform
needs_block_size_trafo_scale |= 0; // Non log2 block size// Represents scaling through forward transform
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);

View file

@ -69,7 +69,7 @@ void uvg_quant_generic(
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
needs_block_size_trafo_scale |= 0; // Non log2 block size
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
@ -598,7 +598,7 @@ void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); // Represents scaling through forward transform
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 1; // Non log2 block size
needs_block_size_trafo_scale |= 0; // Non log2 block size
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;

View file

@ -34,6 +34,7 @@
#include "encode_coding_tree.h"
#include "image.h"
#include "intra.h"
#include "uvg266.h"
#include "lfnst_tables.h"
#include "rdo.h"
@ -184,8 +185,9 @@ void uvg_derive_lfnst_constraints(
coeff_scan_order_t scan_idx = SCAN_DIAG;
// ToDo: large block support in VVC?
const uint32_t log2_block_size = uvg_g_convert_to_log2[width];
const uint32_t* scan = uvg_g_sig_last_scan[scan_idx][log2_block_size - 1];
const uint32_t log2_tr_width = uvg_g_convert_to_log2[width];
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_tr_width, log2_tr_height);
signed scan_pos_last = -1;
coeff_t temp[TR_MAX_WIDTH * TR_MAX_WIDTH];
@ -801,7 +803,27 @@ void uvg_fwd_lfnst_NxN(coeff_t *src, coeff_t *dst, const int8_t mode, const int8
}
}
static inline bool get_transpose_flag(const int8_t intra_mode)
static uint32_t get_lfnst_intra_mode(int mode)
{
uint32_t intraMode;
if (mode < 0)
{
intraMode = (uint32_t)(mode + (NUM_EXT_LUMA_MODE >> 1) + NUM_LUMA_MODE);
}
else if (mode >= NUM_LUMA_MODE)
{
intraMode = (uint32_t)(mode + (NUM_EXT_LUMA_MODE >> 1));
}
else
{
intraMode = (uint32_t)mode;
}
return intraMode;
}
static bool get_transpose_flag(const int8_t intra_mode)
{
return ((intra_mode >= NUM_LUMA_MODE) && (intra_mode >= (NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1)))) ||
((intra_mode < NUM_LUMA_MODE) && (intra_mode > DIA_IDX));
@ -837,22 +859,22 @@ void uvg_fwd_lfnst(
enum uvg_tree_type tree_type)
{
const uint16_t lfnst_index = lfnst_idx;
const uint32_t log2_width = uvg_g_convert_to_log2[width];
const uint32_t log2_height = uvg_g_convert_to_log2[height];
int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
bool mts_skip = cur_cu->tr_idx == MTS_SKIP;
bool is_separate_tree = cur_cu->log2_height + cur_cu->log2_width < 6 || tree_type != UVG_BOTH_T;
bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83]
bool is_mip = block_is_mip(cur_cu, color, is_separate_tree);
bool is_wide_angle = false; // TODO: get wide angle mode when implemented
const int scan_order = SCAN_DIAG;
if (lfnst_index && !mts_skip && (is_separate_tree || color == COLOR_Y))
{
const uint32_t log2_block_size = uvg_g_convert_to_log2[width];
assert(log2_block_size != -1 && "LFNST: invalid block width.");
assert(log2_width != -1 && "LFNST: invalid block width.");
const bool whge3 = width >= 8 && height >= 8;
const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_block_size] : uvg_g_sig_last_scan[scan_order][log2_block_size - 1];
const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_width] : uvg_g_sig_last_scan[scan_order][log2_width - 1];
if (is_cclm_mode) {
intra_mode = cur_cu->intra.mode;
@ -862,11 +884,11 @@ void uvg_fwd_lfnst(
}
assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]");
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height);
// Transform wide angle mode to intra mode
intra_mode = get_lfnst_intra_mode(wide_adjusted_mode);
if (is_wide_angle) {
// Transform wide angle mode to intra mode
intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing.
}
bool transpose = get_transpose_flag(intra_mode);
const int sb_size = whge3 ? 8 : 4;
@ -971,20 +993,19 @@ void uvg_inv_lfnst(
// Such is not yet present in uvg266 so use 15 for now
const int max_log2_dyn_range = 15;
const uint32_t lfnst_index = lfnst_idx;
const uint32_t log2_width = uvg_g_convert_to_log2[width];
const uint32_t log2_height = uvg_g_convert_to_log2[height];
int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
bool mts_skip = cur_cu->tr_idx == MTS_SKIP;
bool is_separate_tree = cur_cu->log2_height + cur_cu->log2_width < 6 || tree_type != UVG_BOTH_T;
bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83]
bool is_mip = block_is_mip(cur_cu, color, is_separate_tree);
bool is_wide_angle = false; // TODO: get wide angle mode when implemented
const int scan_order = SCAN_DIAG;
if (lfnst_index && !mts_skip && (is_separate_tree || color == COLOR_Y)) {
const uint32_t log2_block_size = uvg_g_convert_to_log2[width];
const bool whge3 = width >= 8 && height >= 8;
const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_block_size] : uvg_g_sig_last_scan[scan_order][log2_block_size - 1];
const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_width] : uvg_g_sig_last_scan[scan_order][log2_width - 1];
if (is_cclm_mode) {
intra_mode = cur_cu->intra.mip_flag ? 0 : cur_cu->intra.mode;
@ -994,11 +1015,10 @@ void uvg_inv_lfnst(
}
assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]");
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height);
if (is_wide_angle) {
// Transform wide angle mode to intra mode
intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing.
}
intra_mode = get_lfnst_intra_mode(wide_adjusted_mode);
bool transpose_flag = get_transpose_flag(intra_mode);
const int sb_size = whge3 ? 8 : 4;