[mtt] square root adjustment for quantization

This commit is contained in:
Joose Sainio 2022-11-17 07:38:26 +02:00
parent 6fe09411c0
commit 902ea84187
3 changed files with 11 additions and 3 deletions

View file

@ -1431,6 +1431,8 @@ void uvg_rdoq(
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
const double lambda = color ? state->c_lambda : state->lambda; const double lambda = color ? state->c_lambda : state->lambda;
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
const bool use_scaling_list = state->encoder_control->cfg.scaling_list != UVG_SCALING_LIST_OFF;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6]; const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6];
const double *err_scale = encoder->scaling_list.error_scale[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6]; const double *err_scale = encoder->scaling_list.error_scale[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6];
@ -1509,7 +1511,7 @@ void uvg_rdoq(
if (lfnst_idx > 0 && scanpos > max_lfnst_pos) break; if (lfnst_idx > 0 && scanpos > max_lfnst_pos) break;
uint32_t blkpos = scan[scanpos]; uint32_t blkpos = scan[scanpos];
int32_t q = quant_coeff[blkpos]; int32_t q = use_scaling_list ? quant_coeff[blkpos] : default_quant_coeff;
int32_t level_double = coef[blkpos]; int32_t level_double = coef[blkpos];
level_double = MIN(abs(level_double) * q, MAX_INT - (1 << (q_bits - 1))); level_double = MIN(abs(level_double) * q, MAX_INT - (1 << (q_bits - 1)));
uint32_t max_abs_level = (level_double + (1 << (q_bits - 1))) >> q_bits; uint32_t max_abs_level = (level_double + (1 << (q_bits - 1))) >> q_bits;

View file

@ -396,6 +396,8 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9); const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
const int32_t q_bits8 = q_bits - 8; const int32_t q_bits8 = q_bits - 8;
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
uint32_t ac_sum = 0; uint32_t ac_sum = 0;
int32_t last_cg = -1; int32_t last_cg = -1;
@ -404,7 +406,7 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
// Loading once is enough if scaling lists are not off // Loading once is enough if scaling lists are not off
__m256i low_b = _mm256_setzero_si256(), high_b = _mm256_setzero_si256(); __m256i low_b = _mm256_setzero_si256(), high_b = _mm256_setzero_si256();
if (!(state->encoder_control->scaling_list.enable)) { if (!(state->encoder_control->scaling_list.enable)) {
low_b = _mm256_set1_epi32(quant_coeff[0]); low_b = _mm256_set1_epi32(default_quant_coeff);
high_b = low_b; high_b = low_b;
} }

View file

@ -78,8 +78,12 @@ void uvg_quant_generic(
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9); const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
const int32_t q_bits8 = q_bits - 8; const int32_t q_bits8 = q_bits - 8;
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
uint32_t ac_sum = 0; uint32_t ac_sum = 0;
const bool use_scaling_list = state->encoder_control->cfg.scaling_list != UVG_SCALING_LIST_OFF;
if(lfnst_idx == 0){ if(lfnst_idx == 0){
for (int32_t n = 0; n < width * height; n++) { for (int32_t n = 0; n < width * height; n++) {
int32_t level = coef[n]; int32_t level = coef[n];
@ -88,7 +92,7 @@ void uvg_quant_generic(
sign = (level < 0 ? -1 : 1); sign = (level < 0 ? -1 : 1);
int32_t curr_quant_coeff = quant_coeff[n]; int32_t curr_quant_coeff = use_scaling_list ? quant_coeff[n] : default_quant_coeff;
level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits); level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits);
ac_sum += level; ac_sum += level;