[depquant] Initialize quant_block only when necessary

This commit is contained in:
Joose Sainio 2023-04-17 14:18:57 +03:00
parent b8878f029f
commit e746db22ed
5 changed files with 41 additions and 27 deletions

View file

@ -65,20 +65,7 @@ static const uint32_t g_goRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 };
typedef struct
{
int m_QShift;
int64_t m_QAdd;
int64_t m_QScale;
int64_t m_maxQIdx;
int64_t m_thresLast;
int64_t m_thresSSbb;
// distortion normalization
int m_DistShift;
int64_t m_DistAdd;
int64_t m_DistStepAdd;
int64_t m_DistOrgFact;
} quant_block;
typedef struct
@ -172,13 +159,13 @@ typedef struct
typedef struct
{
common_context m_common_context;
common_context m_common_context;
all_depquant_states m_allStates;
int m_curr_state_offset;
int m_prev_state_offset;
int m_skip_state_offset;
depquant_state m_startState;
quant_block m_quant;
quant_block* m_quant;
Decision m_trellis[TR_MAX_WIDTH * TR_MAX_WIDTH];
} context_store;
@ -443,6 +430,7 @@ static void init_quant_block(
qp->m_DistAdd = ((int64_t)(1) << qp->m_DistShift) >> 1;
qp->m_DistStepAdd = (int64_t)(nomDistFactor * (double)((int64_t)(1) << (qp->m_DistShift + qp->m_QShift)) + .5);
qp->m_DistOrgFact = (int64_t)(nomDistFactor * (double)((int64_t)(1) << (qp->m_DistShift + 1)) + .5);
qp->needs_init = false;
}
static void reset_common_context(common_context* ctx, const rate_estimator * rate_estimator, int numSbb, int num_coeff)
@ -2241,7 +2229,7 @@ static void xDecideAndUpdate(
spt = SCAN_EOCSBB;
}
xDecide(&ctxs->m_allStates, &ctxs->m_startState, &ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset);
xDecide(&ctxs->m_allStates, &ctxs->m_startState, ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset);
if (scan_pos) {
if (!(scan_pos & 15)) {
@ -2313,11 +2301,17 @@ int uvg_dep_quant(
const int32_t scalinglist_type = (cur_tu->type == CU_INTRA ? 0 : 3) + (int8_t)compID;
const int32_t *q_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1) - needs_block_size_trafo_scale; //!< Represents scaling through forward transform
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (is_ts ? 0 : transform_shift );
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
init_quant_block(state, &dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1);
if (compID != COLOR_Y) {
dep_quant_context.m_quant = (quant_block*)& state->quant_blocks[2];
} else if (cur_tu->type == CU_INTRA && cur_tu->intra.isp_mode != ISP_MODE_NO_ISP) {
dep_quant_context.m_quant = (quant_block*)&state->quant_blocks[1];
} else {
dep_quant_context.m_quant = (quant_block*)&state->quant_blocks[0];
}
if (dep_quant_context.m_quant->needs_init) {
init_quant_block(state, dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1);
}
//===== scaling matrix ====
//const int qpDQ = cQP.Qp + 1;
@ -2345,8 +2339,8 @@ int uvg_dep_quant(
height >= 4) {
firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15;
}
const int32_t default_quant_coeff = dep_quant_context.m_quant.m_QScale;
const int32_t thres = dep_quant_context.m_quant.m_thresLast;
const int32_t default_quant_coeff = dep_quant_context.m_quant->m_QScale;
const int32_t thres = dep_quant_context.m_quant->m_thresLast;
for (; firstTestPos >= 0; firstTestPos--) {
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[firstTestPos]])) : (thres / (4 * default_quant_coeff));
if (abs(srcCoeff[scan[firstTestPos]]) > thresTmp) {
@ -2419,7 +2413,7 @@ int uvg_dep_quant(
context_store* ctxs = &dep_quant_context;
if (enableScalingLists) {
init_quant_block(state, &dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[blkpos]);
init_quant_block(state, dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[blkpos]);
xDecideAndUpdate(
&rate_estimator,

View file

@ -49,6 +49,22 @@ struct dep_quant_scan_info
uint8_t next_sbb_below;
};
typedef struct
{
int m_QShift;
int64_t m_QAdd;
int64_t m_QScale;
int64_t m_maxQIdx;
int64_t m_thresLast;
int64_t m_thresSSbb;
// distortion normalization
int m_DistShift;
int64_t m_DistAdd;
int64_t m_DistStepAdd;
int64_t m_DistOrgFact;
bool needs_init;
} quant_block;
typedef struct
{
uint8_t num;

View file

@ -369,6 +369,8 @@ typedef struct encoder_state_t {
// luma mode in the lfnst functions, instead store the current
// collocated luma mode in the state.
int8_t collocated_luma_mode;
quant_block quant_blocks[3]; // luma, ISP, chroma
} encoder_state_t;
void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame);

View file

@ -2019,6 +2019,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
int split_limit = uvg_get_isp_split_num(width, height, split_type, true);
int cbf_context = 2;
state->quant_blocks[1].needs_init = true;
for (int i = 0; i < split_limit; ++i) {
search_data->pred_cu.intra.isp_index = i;

View file

@ -361,7 +361,6 @@ static double search_intra_trdepth(
search_data->lfnst_costs[i] = MAX_DOUBLE;
}
for (trafo = mts_start; trafo < num_transforms; trafo++) {
for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) {
// Initialize lfnst variables
@ -1492,6 +1491,7 @@ int8_t uvg_search_intra_chroma_rdo(
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
double original_c_lambda = state->c_lambda;
state->quant_blocks[2].needs_init = true;
for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
@ -1969,6 +1969,7 @@ void uvg_search_cu_intra(
}
}
state->quant_blocks[0].needs_init = 1;
search_intra_rdo(
state,
number_of_modes_to_search,