Enable chroma scaling

WIP: user defined scaling array
2024-11-27 19:24:06 +00:00 · 2021-03-16 10:22:40 +02:00 · 2021-03-16 10:22:40 +02:00 · b2076d3b39
parent 412781db41
commit b2076d3b39
12 changed files with 129 additions and 35 deletions
--- a/src/cfg.c
+++ b/src/cfg.c
@ -175,6 +175,20 @@ int kvz_config_init(kvz_config *cfg)
  cfg->file_format = KVZ_FORMAT_AUTO;

  cfg->stats_file_prefix = NULL;
+
+  int8_t in[] = { 17, 27, 32, 44 };
+  int8_t out[] = { 17, 29, 34, 41 };
+
+  cfg->num_used_table = 1;
+  cfg->qp_table_length_minus1[0] = 2;
+  cfg->qp_table_start_minus26[0] = 17 - 26;
+  cfg->delta_qp_in_val_minus1[0] = malloc(cfg->qp_table_length_minus1[0] + 1);
+  cfg->delta_qp_out_val[0] = malloc(cfg->qp_table_length_minus1[0]  +  1);
+  for (int i = 0; i < cfg->qp_table_length_minus1[0] + 1; i++) {
+    cfg->delta_qp_in_val_minus1[0][i] = in[i + 1] - in[i] - (int8_t)1;
+    cfg->delta_qp_out_val[0][i] = out[i + 1] - out[i];
+  }
+
  return 1;
 }

@ -191,6 +205,10 @@ int kvz_config_destroy(kvz_config *cfg)
    {
      FREE_POINTER(cfg->param_set_map);
    }
+    for (int i = 0; i < cfg->num_used_table; i++) {
+      if (cfg->delta_qp_in_val_minus1[i]) FREE_POINTER(cfg->delta_qp_in_val_minus1[i]);
+      if (cfg->delta_qp_out_val[i]) FREE_POINTER(cfg->delta_qp_out_val[i]);
+    }
  }
  free(cfg);

--- a/src/encoder.c
+++ b/src/encoder.c
@ -199,6 +199,49 @@ static void init_erp_aqp_roi(encoder_control_t* encoder,
 }


+static int8_t* derive_chroma_QP_mapping_table(const kvz_config* const cfg, int i)
+{
+  const int MAX_QP = 63;
+
+  int8_t qpInVal[16], qpOutVal[16];
+  int8_t* table = calloc(MAX_QP + 1, sizeof(int8_t));
+
+
+  const int qpBdOffsetC = (cfg->input_bitdepth - 8) * 6;
+  const int numPtsInCQPTableMinus1 = cfg->qp_table_length_minus1[i];
+
+  qpInVal[0] = cfg->qp_table_start_minus26[i] + 26;
+  qpOutVal[0] = qpInVal[0];
+  for (int j = 0; j <= cfg->qp_table_length_minus1[i]; j++)
+  {
+    qpInVal[j + 1] = qpInVal[j] + cfg->delta_qp_in_val_minus1[i][j] + 1;
+    qpOutVal[j + 1] = qpOutVal[j] + cfg->delta_qp_out_val[i][j];
+  }
+
+  table[qpInVal[0]] = qpOutVal[0];
+  for (int k = qpInVal[0] - 1; k >= -qpBdOffsetC; k--)
+  {
+    table[k] = CLIP(-qpBdOffsetC, MAX_QP, table[k + 1] - 1);
+  }
+  for (int j = 0; j <= numPtsInCQPTableMinus1; j++)
+  {
+    int sh = (cfg->delta_qp_in_val_minus1[i][j] + 1) >> 1;
+    for (int k = qpInVal[j] + 1, m = 1; k <= qpInVal[j + 1]; k++, m++)
+    {
+      table[k] = table[qpInVal[j]] + ((qpOutVal[j + 1] - qpOutVal[j]) * m + sh) / (cfg->delta_qp_in_val_minus1[i][j] + 1);
+    }
+  }
+  for (int k = qpInVal[numPtsInCQPTableMinus1 + 1] + 1; k <= MAX_QP; k++)
+  {
+    table[k] = CLIP(-qpBdOffsetC, MAX_QP, table[k - 1] + 1);
+  }
+  for(int i = 0; i < MAX_QP; i++) {
+    printf("%3d %3d\n", i, table[i]);
+  }
+  return table;
+}
+
+
 /**
 * \brief Allocate and initialize an encoder control structure.
 *
@ -655,6 +698,10 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg)
    memcpy(encoder->cfg.optional_key, cfg->optional_key, 16);
  }

+  for (int i = 0; i < cfg->num_used_table; i++) {
+    encoder->qp_map[i] = derive_chroma_QP_mapping_table(cfg, i);
+  }
+
  return encoder;

 init_failed:
@ -691,6 +738,9 @@ void kvz_encoder_control_free(encoder_control_t *const encoder)

  kvz_threadqueue_free(encoder->threadqueue);
  encoder->threadqueue = NULL;
+  for (int i = 0; i < encoder->cfg.num_used_table; i++) {
+    if (encoder->qp_map[i]) FREE_POINTER(encoder->qp_map[i]);
+  }

  free(encoder);
 }
--- a/src/encoder.h
+++ b/src/encoder.h
@ -135,6 +135,8 @@ typedef struct encoder_control_t

  int32_t poc_lsb_bits;

+  int8_t* qp_map[3];
+
 } encoder_control_t;

 encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);
--- a/src/encoder_state-bitstream.c
+++ b/src/encoder_state-bitstream.c
@ -608,13 +608,17 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
  WRITE_U(stream, 0, 1, "sps_joint_cbcr_enabled_flag");

  if (encoder->chroma_format != KVZ_CSP_400) {    
-    WRITE_U(stream, 1, 1, "same_qp_table_for_chroma"); //TODO: Enable chroma QP scaling and fix kvz_get_scaled_qp()
+    WRITE_U(stream, 1, 1, "same_qp_table_for_chroma");

-    WRITE_SE(stream, 0, "qp_table_starts_minus26");    
-    WRITE_UE(stream, 0, "num_points_in_qp_table_minus1");
+    for (int i = 0; i < encoder->cfg.num_used_table; i++) {
+      WRITE_SE(stream, encoder->cfg.qp_table_start_minus26[i], "qp_table_starts_minus26");    
+      WRITE_UE(stream, encoder->cfg.qp_table_length_minus1[i], "num_points_in_qp_table_minus1");

-      WRITE_UE(stream, 0, "delta_qp_in_val_minus1");
-      WRITE_UE(stream, 1, "delta_qp_diff_val");
+      for (int j = 0; j <= encoder->cfg.qp_table_length_minus1[i]; j++) {
+        WRITE_UE(stream, encoder->cfg.delta_qp_in_val_minus1[i][j], "delta_qp_in_val_minus1");
+        WRITE_UE(stream, encoder->cfg.delta_qp_out_val[i][j] ^ encoder->cfg.delta_qp_in_val_minus1[i][j], "delta_qp_diff_val");
+      }
+    }

  }

--- a/src/encoderstate.h
+++ b/src/encoderstate.h
@ -314,6 +314,8 @@ typedef struct encoder_state_t {
  //! \brief Quantization parameter for the current LCU
  int8_t qp;

+  double c_lambda;
+
  /**
   * \brief Whether a QP delta value must be coded for the current LCU.
   */
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -475,6 +475,12 @@ typedef struct kvz_config
  char *stats_file_prefix;

  struct param_set_map *param_set_map;
+
+  int8_t num_used_table;
+  int8_t qp_table_start_minus26[3];
+  int8_t qp_table_length_minus1[3];
+  int8_t* delta_qp_in_val_minus1[3];
+  int8_t* delta_qp_out_val[3];
 } kvz_config;

 /**
--- a/src/rate_control.c
+++ b/src/rate_control.c
@ -787,6 +787,9 @@ static double qp_to_lambda(encoder_state_t* const state, int qp)
  state->lambda = est_lambda;
  state->lambda_sqrt = sqrt(est_lambda);
  state->qp = est_qp;
+  int8_t chroma_qp = encoder->qp_map[0][est_qp];
+  double tmpWeight = pow(2.0, (est_qp - chroma_qp) / 3.0);
+  state->c_lambda = est_lambda / tmpWeight;
  ctu->qp = est_qp;
  ctu->lambda = est_lambda;
  ctu->i_cost = 0;
@ -1086,7 +1089,6 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
    state->qp = CLIP_TO_QP(state->frame->QP + dqp);
    state->lambda = qp_to_lambda(state, state->qp);
    state->lambda_sqrt = sqrt(state->lambda);
-
  }
  else if (ctrl->cfg.target_bitrate > 0) {
    const uint32_t pixels    = MIN(LCU_WIDTH, state->tile->frame->width  - LCU_WIDTH * pos.x) *
@ -1134,6 +1136,10 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
  lcu->lambda = state->lambda;
  lcu->qp = state->qp;

+  int8_t chroma_qp = ctrl->qp_map[0][state->qp];
+  double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
+  state->c_lambda = state->lambda / tmpWeight;
+
  // Apply variance adaptive quantization
  if (ctrl->cfg.vaq) {
    vector2d_t lcu_pos = {
--- a/src/rdo.c
+++ b/src/rdo.c
@ -412,9 +412,10 @@ INLINE uint32_t kvz_get_coded_level( encoder_state_t * const state, double *code
  int32_t abs_level;
  int32_t min_abs_level;
  cabac_ctx_t* base_sig_model = type?(cabac->ctx.cu_sig_model_chroma[0]):(cabac->ctx.cu_sig_model_luma[0]);
+  const double lambda = type ? state->c_lambda : state->lambda;

  if( !last && max_abs_level < 3 ) {
-    *coded_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
+    *coded_cost_sig = lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
    *coded_cost     = *coded_cost0 + *coded_cost_sig;
    if (max_abs_level == 0) return best_abs_level;
  } else {
@ -422,13 +423,13 @@ INLINE uint32_t kvz_get_coded_level( encoder_state_t * const state, double *code
  }

  if( !last ) {
-    cur_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
+    cur_cost_sig = lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
  }

  min_abs_level    = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
  for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
    double err       = (double)(level_double - ( abs_level * (1 << q_bits) ) );
-    double cur_cost  = err * err * temp + state->lambda *
+    double cur_cost  = err * err * temp + lambda *
                       kvz_get_ic_rate( state, abs_level, ctx_num_gt1, ctx_num_gt2, ctx_num_par,
                                    abs_go_rice, reg_bins, type);
    cur_cost        += cur_cost_sig;
@ -452,7 +453,7 @@ INLINE uint32_t kvz_get_coded_level( encoder_state_t * const state, double *code
 *
 * From HM 12.0
 */
-static double get_rate_last(const encoder_state_t * const state,
+static double get_rate_last(double lambda,
                            const uint32_t  pos_x, const uint32_t pos_y,
                            int32_t* last_x_bits, int32_t* last_y_bits)
 {
@ -465,7 +466,7 @@ static double get_rate_last(const encoder_state_t * const state,
  if( ctx_y > 3 ) {
    uiCost += CTX_FRAC_ONE_BIT * ((ctx_y - 2) >> 1);
  }
-  return state->lambda * uiCost;
+  return lambda * uiCost;
 }

 static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t height, int8_t type,
@ -514,16 +515,18 @@ void kvz_rdoq_sign_hiding(
    const struct sh_rates_t *const sh_rates,
    const int32_t last_pos,
    const coeff_t *const coeffs,
-    coeff_t *const quant_coeffs)
+    coeff_t *const quant_coeffs, 
+    const int8_t type)
 {
  const encoder_control_t * const ctrl = state->encoder_control;
+  const double lambda = type ? state->c_lambda : state->lambda;

  int inv_quant = kvz_g_inv_quant_scales[qp_scaled % 6];
  // This somehow scales quant_delta into fractional bits. Instead of the bits
  // being multiplied by lambda, the residual is divided by it, or something
  // like that.
  const int64_t rd_factor = (inv_quant * inv_quant * (1 << (2 * (qp_scaled / 6)))
-                      / state->lambda / 16 / (1 << (2 * (ctrl->bitdepth - 8))) + 0.5);
+                      / lambda / 16 / (1 << (2 * (ctrl->bitdepth - 8))) + 0.5);
  const int last_cg = (last_pos - 1) >> LOG2_SCAN_SET_SIZE;

  for (int32_t cg_scan = last_cg; cg_scan >= 0; cg_scan--) {
@ -665,10 +668,12 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
  const uint32_t log2_block_size   = kvz_g_convert_to_bit[ width ] + 2;
  int32_t  scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);

-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
  
  int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;

+  const double lambda = type ? state->c_lambda : state->lambda;
+
  const int32_t *quant_coeff  = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
  const double *err_scale     = encoder->scaling_list.error_scale[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];

@ -866,7 +871,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
      if (sig_coeffgroup_flag[cg_blkpos] == 0) {
        uint32_t ctx_sig  = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
                                                        cg_pos_y, cg_width);
-        cost_coeffgroup_sig[cg_scanpos] = state->lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+        cost_coeffgroup_sig[cg_scanpos] = lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
        base_cost += cost_coeffgroup_sig[cg_scanpos]  - rd_stats.sig_cost;
      } else {
        if (cg_scanpos < cg_last_scanpos){
@ -883,9 +888,9 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
          ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
            cg_pos_y, cg_width);

-          cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
+          cost_coeffgroup_sig[cg_scanpos] = lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
          base_cost += cost_coeffgroup_sig[cg_scanpos];
-          cost_zero_cg += state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
+          cost_zero_cg += lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);

          // try to convert the current coeff group from non-zero to all-zero
          cost_zero_cg += rd_stats.uncoded_dist;          // distortion for resetting non-zero levels to zero levels
@ -898,7 +903,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
            sig_coeffgroup_flag[cg_blkpos] = 0;
            base_cost = cost_zero_cg;

-            cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
+            cost_coeffgroup_sig[cg_scanpos] = lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);

            // reset coeffs to 0 in this block
            for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
@ -925,14 +930,14 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
  int32_t best_last_idx_p1 = 0;

  if( block_type != CU_INTRA && !type ) {
-    best_cost  = block_uncoded_cost +  state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
-    base_cost +=   state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
+    best_cost  = block_uncoded_cost +  lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
+    base_cost +=   lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
  } else {
    // ToDo: update for VVC contexts
    cabac_ctx_t* base_cbf_model = type?(cabac->ctx.qt_cbf_model_cb):(cabac->ctx.qt_cbf_model_luma);
    ctx_cbf    = ( type ? tr_depth : !tr_depth);
-    best_cost  = block_uncoded_cost +  state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
-    base_cost +=   state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
+    best_cost  = block_uncoded_cost +  lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
+    base_cost +=   lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
  }

  for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
@ -949,7 +954,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
          uint32_t   pos_y = blkpos >> log2_block_size;
          uint32_t   pos_x = blkpos - ( pos_y << log2_block_size );

-          double cost_last = get_rate_last(state, pos_x, pos_y, last_x_bits,last_y_bits );
+          double cost_last = get_rate_last(lambda, pos_x, pos_y, last_x_bits,last_y_bits );
          double totalCost = base_cost + cost_last - cost_sig[ scanpos ];

          if( totalCost < best_cost ) {
@ -983,7 +988,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
  }

  if (encoder->cfg.signhide_enable && abs_sum >= 2) {
-    kvz_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff);
+    kvz_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, type);
  }
 }

--- a/src/strategies/avx2/quant-avx2.c
+++ b/src/strategies/avx2/quant-avx2.c
@ -368,7 +368,7 @@ void kvz_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
  const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
  const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];

-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
  uint32_t log2_tr_width = kvz_math_floor_log2(height);
  uint32_t log2_tr_height = kvz_math_floor_log2(width);
  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
@ -738,7 +738,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
  int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform


-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6);
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);

  shift = 20 - QUANT_SHIFT - transform_shift;

--- a/src/strategies/generic/quant-generic.c
+++ b/src/strategies/generic/quant-generic.c
@ -42,7 +42,7 @@ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff
  const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
  const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];

-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
  uint32_t log2_tr_width = kvz_math_floor_log2(height);
  uint32_t log2_tr_height = kvz_math_floor_log2(width);
  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
@ -295,7 +295,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
  int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform


-  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6);
+  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);

  shift = 20 - QUANT_SHIFT - transform_shift;

--- a/src/transform.c
+++ b/src/transform.c
@ -126,17 +126,18 @@ static void rdpcm(const int width,
 * \brief Get scaled QP used in quantization
 *
 */
-int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset)
+int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const * const chroma_scale)
 {
  int32_t qp_scaled = 0;
  if(type == 0) {
    qp_scaled = qp + qp_offset;
  } else {
    qp_scaled = CLIP(-qp_offset, 57, qp);
-    if(true||qp_scaled < 0) { //TODO: Enable chroma QP scaling in the SPS headers
+    if (chroma_scale) {
+      qp_scaled = chroma_scale[qp] + qp_offset;
+    }
+    else {
      qp_scaled = qp_scaled + qp_offset;
-    } else {
-      qp_scaled = kvz_g_chroma_scale[qp_scaled] + qp_offset;
    } 
  }
  return qp_scaled;
--- a/src/transform.h
+++ b/src/transform.h
@ -53,7 +53,7 @@ void kvz_itransform2d(const encoder_control_t * const encoder,
                      const cu_info_t *tu);


-int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset);
+int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);

 void kvz_quantize_lcu_residual(encoder_state_t *state,
                               bool luma,