[depquant] Initialize quant_block only when necessary

2024-11-23 18:14:06 +00:00 · 2023-04-17 14:18:57 +03:00 · 2023-04-17 14:18:57 +03:00 · 00f838306f
parent 9e27b4056a
commit 00f838306f
5 changed files with 41 additions and 27 deletions
--- a/src/dep_quant.c
+++ b/src/dep_quant.c
@ -65,20 +65,7 @@ static const uint32_t g_goRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,

 enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 };

-typedef struct
-{
-  int m_QShift;
-  int64_t m_QAdd;
-  int64_t m_QScale;
-  int64_t m_maxQIdx;
-  int64_t m_thresLast;
-  int64_t m_thresSSbb;
-  // distortion normalization
-  int m_DistShift;
-  int64_t m_DistAdd;
-  int64_t m_DistStepAdd;
-  int64_t m_DistOrgFact;
-} quant_block;
+


 typedef struct
@ -172,13 +159,13 @@ typedef struct

 typedef struct
 {
-    common_context   m_common_context;
+    common_context  m_common_context;
    all_depquant_states m_allStates;
    int m_curr_state_offset;
    int m_prev_state_offset;
    int m_skip_state_offset;
    depquant_state       m_startState;
-    quant_block   m_quant;
+    quant_block*   m_quant;
    Decision    m_trellis[TR_MAX_WIDTH * TR_MAX_WIDTH];
 } context_store;

@ -443,6 +430,7 @@ static void init_quant_block(
  qp->m_DistAdd = ((int64_t)(1) << qp->m_DistShift) >> 1;
  qp->m_DistStepAdd = (int64_t)(nomDistFactor * (double)((int64_t)(1) << (qp->m_DistShift + qp->m_QShift)) + .5);
  qp->m_DistOrgFact = (int64_t)(nomDistFactor * (double)((int64_t)(1) << (qp->m_DistShift + 1)) + .5);
+  qp->needs_init = false;
 }

 static void reset_common_context(common_context* ctx, const rate_estimator * rate_estimator, int numSbb, int num_coeff)
@ -2241,7 +2229,7 @@ static void xDecideAndUpdate(
    spt = SCAN_EOCSBB;
  }

-  xDecide(&ctxs->m_allStates, &ctxs->m_startState, &ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset);
+  xDecide(&ctxs->m_allStates, &ctxs->m_startState, ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset);

  if (scan_pos) {
    if (!(scan_pos & 15)) {
@ -2313,11 +2301,17 @@ int uvg_dep_quant(

  const int32_t scalinglist_type = (cur_tu->type == CU_INTRA ? 0 : 3) + (int8_t)compID;
  const int32_t *q_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
-  const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1) - needs_block_size_trafo_scale; //!< Represents scaling through forward transform
-  const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (is_ts ? 0 : transform_shift );
-  const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
-  
-  init_quant_block(state, &dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1);
+
+  if (compID != COLOR_Y) {
+    dep_quant_context.m_quant = (quant_block*)& state->quant_blocks[2];
+  } else if (cur_tu->type == CU_INTRA && cur_tu->intra.isp_mode != ISP_MODE_NO_ISP) {
+    dep_quant_context.m_quant = (quant_block*)&state->quant_blocks[1];    
+  } else {
+    dep_quant_context.m_quant = (quant_block*)&state->quant_blocks[0];   
+  }
+  if (dep_quant_context.m_quant->needs_init) {
+    init_quant_block(state, dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1);
+  }
  
  //===== scaling matrix ====
  //const int         qpDQ = cQP.Qp + 1;
@ -2345,8 +2339,8 @@ int uvg_dep_quant(
    height >= 4) {
    firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15;
  }
-  const int32_t default_quant_coeff = dep_quant_context.m_quant.m_QScale;
-  const int32_t thres               = dep_quant_context.m_quant.m_thresLast;
+  const int32_t default_quant_coeff = dep_quant_context.m_quant->m_QScale;
+  const int32_t thres               = dep_quant_context.m_quant->m_thresLast;
  for (; firstTestPos >= 0; firstTestPos--) {
    coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[firstTestPos]])) : (thres / (4 * default_quant_coeff));
    if (abs(srcCoeff[scan[firstTestPos]]) > thresTmp) {
@ -2419,7 +2413,7 @@ int uvg_dep_quant(

    context_store* ctxs = &dep_quant_context;
    if (enableScalingLists) {
-      init_quant_block(state, &dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[blkpos]);
+      init_quant_block(state, dep_quant_context.m_quant, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[blkpos]);

      xDecideAndUpdate(
        &rate_estimator,
--- a/src/dep_quant.h
+++ b/src/dep_quant.h
@ -49,6 +49,22 @@ struct dep_quant_scan_info
  uint8_t next_sbb_below;
 };

+typedef struct
+{
+  int     m_QShift;
+  int64_t m_QAdd;
+  int64_t m_QScale;
+  int64_t m_maxQIdx;
+  int64_t m_thresLast;
+  int64_t m_thresSSbb;
+  // distortion normalization
+  int     m_DistShift;
+  int64_t m_DistAdd;
+  int64_t m_DistStepAdd;
+  int64_t m_DistOrgFact;
+  bool    needs_init;
+} quant_block;
+
 typedef struct
 {
  uint8_t num;
--- a/src/encoderstate.h
+++ b/src/encoderstate.h
@ -366,6 +366,8 @@ typedef struct encoder_state_t {
  // luma mode in the lfnst functions, instead store the current
  // collocated luma mode in the state.
  int8_t collocated_luma_mode;
+
+  quant_block quant_blocks[3]; // luma, ISP, chroma
 } encoder_state_t;

 void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame);
--- a/src/intra.c
+++ b/src/intra.c
@ -2019,6 +2019,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
  int split_limit = uvg_get_isp_split_num(width, height, split_type, true);

  int cbf_context = 2;
+  state->quant_blocks[1].needs_init = true;

  for (int i = 0; i < split_limit; ++i) {
    search_data->pred_cu.intra.isp_index = i;
--- a/src/search_intra.c
+++ b/src/search_intra.c
@ -361,7 +361,6 @@ static double search_intra_trdepth(
      search_data->lfnst_costs[i] = MAX_DOUBLE;
    }

-
    for (trafo = mts_start; trafo < num_transforms; trafo++) {
      for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) {
        // Initialize lfnst variables
@ -1492,6 +1491,7 @@ int8_t uvg_search_intra_chroma_rdo(
    ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];

    double original_c_lambda = state->c_lambda;
+    state->quant_blocks[2].needs_init = true;

    for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
      const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
@ -1968,7 +1968,8 @@ void uvg_search_cu_intra(
        number_of_modes_to_search++;
      }
    }
-    
+
+    state->quant_blocks[0].needs_init = 1;
    search_intra_rdo(
      state,
      number_of_modes_to_search,