From 00f838306f416d099b8527607331fc880aa8ae37 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Mon, 17 Apr 2023 14:18:57 +0300 Subject: [PATCH] [depquant] Initialize quant_block only when necessary --- src/dep_quant.c | 44 +++++++++++++++++++------------------------- src/dep_quant.h | 16 ++++++++++++++++ src/encoderstate.h | 2 ++ src/intra.c | 1 + src/search_intra.c | 5 +++-- 5 files changed, 41 insertions(+), 27 deletions(-) diff --git a/src/dep_quant.c b/src/dep_quant.c index 6ea82fef..96002664 100644 --- a/src/dep_quant.c +++ b/src/dep_quant.c @@ -65,20 +65,7 @@ static const uint32_t g_goRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 }; -typedef struct -{ - int m_QShift; - int64_t m_QAdd; - int64_t m_QScale; - int64_t m_maxQIdx; - int64_t m_thresLast; - int64_t m_thresSSbb; - // distortion normalization - int m_DistShift; - int64_t m_DistAdd; - int64_t m_DistStepAdd; - int64_t m_DistOrgFact; -} quant_block; + typedef struct @@ -172,13 +159,13 @@ typedef struct typedef struct { - common_context m_common_context; + common_context m_common_context; all_depquant_states m_allStates; int m_curr_state_offset; int m_prev_state_offset; int m_skip_state_offset; depquant_state m_startState; - quant_block m_quant; + quant_block* m_quant; Decision m_trellis[TR_MAX_WIDTH * TR_MAX_WIDTH]; } context_store; @@ -443,6 +430,7 @@ static void init_quant_block( qp->m_DistAdd = ((int64_t)(1) << qp->m_DistShift) >> 1; qp->m_DistStepAdd = (int64_t)(nomDistFactor * (double)((int64_t)(1) << (qp->m_DistShift + qp->m_QShift)) + .5); qp->m_DistOrgFact = (int64_t)(nomDistFactor * (double)((int64_t)(1) << (qp->m_DistShift + 1)) + .5); + qp->needs_init = false; } static void reset_common_context(common_context* ctx, const rate_estimator * rate_estimator, int numSbb, int num_coeff) @@ -2241,7 +2229,7 @@ static void xDecideAndUpdate( spt = SCAN_EOCSBB; } - xDecide(&ctxs->m_allStates, &ctxs->m_startState, &ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset); + xDecide(&ctxs->m_allStates, &ctxs->m_startState, ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset); if (scan_pos) { if (!(scan_pos & 15)) { @@ -2313,11 +2301,17 @@ int uvg_dep_quant( const int32_t scalinglist_type = (cur_tu->type == CU_INTRA ? 0 : 3) + (int8_t)compID; const int32_t *q_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6]; - const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1) - needs_block_size_trafo_scale; //!< Represents scaling through forward transform - const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (is_ts ? 0 : transform_shift ); - const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9); - - init_quant_block(state, &dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1); + + if (compID != COLOR_Y) { + dep_quant_context.m_quant = (quant_block*)& state->quant_blocks[2]; + } else if (cur_tu->type == CU_INTRA && cur_tu->intra.isp_mode != ISP_MODE_NO_ISP) { + dep_quant_context.m_quant = (quant_block*)&state->quant_blocks[1]; + } else { + dep_quant_context.m_quant = (quant_block*)&state->quant_blocks[0]; + } + if (dep_quant_context.m_quant->needs_init) { + init_quant_block(state, dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1); + } //===== scaling matrix ==== //const int qpDQ = cQP.Qp + 1; @@ -2345,8 +2339,8 @@ int uvg_dep_quant( height >= 4) { firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15; } - const int32_t default_quant_coeff = dep_quant_context.m_quant.m_QScale; - const int32_t thres = dep_quant_context.m_quant.m_thresLast; + const int32_t default_quant_coeff = dep_quant_context.m_quant->m_QScale; + const int32_t thres = dep_quant_context.m_quant->m_thresLast; for (; firstTestPos >= 0; firstTestPos--) { coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[firstTestPos]])) : (thres / (4 * default_quant_coeff)); if (abs(srcCoeff[scan[firstTestPos]]) > thresTmp) { @@ -2419,7 +2413,7 @@ int uvg_dep_quant( context_store* ctxs = &dep_quant_context; if (enableScalingLists) { - init_quant_block(state, &dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[blkpos]); + init_quant_block(state, dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[blkpos]); xDecideAndUpdate( &rate_estimator, diff --git a/src/dep_quant.h b/src/dep_quant.h index a8483e40..1f059119 100644 --- a/src/dep_quant.h +++ b/src/dep_quant.h @@ -49,6 +49,22 @@ struct dep_quant_scan_info uint8_t next_sbb_below; }; +typedef struct +{ + int m_QShift; + int64_t m_QAdd; + int64_t m_QScale; + int64_t m_maxQIdx; + int64_t m_thresLast; + int64_t m_thresSSbb; + // distortion normalization + int m_DistShift; + int64_t m_DistAdd; + int64_t m_DistStepAdd; + int64_t m_DistOrgFact; + bool needs_init; +} quant_block; + typedef struct { uint8_t num; diff --git a/src/encoderstate.h b/src/encoderstate.h index 7afa78ab..f9d7d0a8 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -366,6 +366,8 @@ typedef struct encoder_state_t { // luma mode in the lfnst functions, instead store the current // collocated luma mode in the state. int8_t collocated_luma_mode; + + quant_block quant_blocks[3]; // luma, ISP, chroma } encoder_state_t; void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame); diff --git a/src/intra.c b/src/intra.c index 1b7026e5..a1d0cf42 100644 --- a/src/intra.c +++ b/src/intra.c @@ -2019,6 +2019,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, int split_limit = uvg_get_isp_split_num(width, height, split_type, true); int cbf_context = 2; + state->quant_blocks[1].needs_init = true; for (int i = 0; i < split_limit; ++i) { search_data->pred_cu.intra.isp_index = i; diff --git a/src/search_intra.c b/src/search_intra.c index 2e507f95..17ec6747 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -361,7 +361,6 @@ static double search_intra_trdepth( search_data->lfnst_costs[i] = MAX_DOUBLE; } - for (trafo = mts_start; trafo < num_transforms; trafo++) { for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) { // Initialize lfnst variables @@ -1492,6 +1491,7 @@ int8_t uvg_search_intra_chroma_rdo( ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; double original_c_lambda = state->c_lambda; + state->quant_blocks[2].needs_init = true; for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) { const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma; @@ -1968,7 +1968,8 @@ void uvg_search_cu_intra( number_of_modes_to_search++; } } - + + state->quant_blocks[0].needs_init = 1; search_intra_rdo( state, number_of_modes_to_search,