Add an option to skip CABAC for residual costs

Adds command line option --fast-residual-cost=<limit>. When QP is below the limit, estimates the cost of coding the residual coefficients from the sum of absolute coefficients. Skipping CABAC is not worth it with high QPs because there are fewer coefficients so CABAC is not as slow.
2024-11-23 18:14:06 +00:00 · 2018-06-19 13:39:23 +03:00 · 2018-06-19 13:39:23 +03:00 · c438bb4a19
parent cbb5b20449
commit c438bb4a19
9 changed files with 55 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -195,6 +195,8 @@ Compression tools:
                                   - off: Don't terminate early.
                                   - on: Terminate early.
                                   - sensitive: Terminate even earlier.
+      --fast-residual-cost <int> : Skip CABAC cost for residual coefficients
+                                   when QP is below the limit. [0]
      --(no-)intra-rdo-et    : Check intra modes in rdo stage only until
                               a zero coefficient CU is found. [disabled]
      --(no-)implicit-rdpcm  : Implicit residual DPCM. Currently only supported
--- a/doc/kvazaar.1
+++ b/doc/kvazaar.1
@ -1,4 +1,4 @@
-.TH KVAZAAR "1" "June 2018" "kvazaar v1.2.0" "User Commands"
+.TH KVAZAAR "1" "July 2018" "kvazaar v1.2.0" "User Commands"
 .SH NAME
 kvazaar \- open source HEVC encoder
 .SH SYNOPSIS
@ -257,6 +257,10 @@ Motion estimation termination [on]
    \- on: Terminate early.
    \- sensitive: Terminate even earlier.
 .TP
+\fB\-\-fast\-residual\-cost <int>
+Skip CABAC cost for residual coefficients
+    when QP is below the limit. [0]
+.TP
 \fB\-\-(no\-)intra\-rdo\-et   
 Check intra modes in rdo stage only until
 a zero coefficient CU is found. [disabled]
--- a/src/cfg.c
+++ b/src/cfg.c
@ -79,6 +79,7 @@ int kvz_config_init(kvz_config *cfg)
  cfg->lossless        = false;
  cfg->tmvp_enable     = true;
  cfg->implicit_rdpcm  = false;
+  cfg->fast_residual_cost_limit = 0;

  cfg->cu_split_termination = KVZ_CU_SPLIT_TERMINATION_ZERO;

@ -1194,6 +1195,8 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)

    cfg->me_max_steps = (uint32_t)steps;
  }
+  else if (OPT("fast-residual-cost"))
+    cfg->fast_residual_cost_limit = atoi(value);
  else {
    return 0;
  }
--- a/src/cli.c
+++ b/src/cli.c
@ -129,6 +129,7 @@ static const struct option long_options[] = {
  { "force-level",        required_argument, NULL, 0 },
  { "high-tier",                no_argument, NULL, 0 },
  { "me-steps",           required_argument, NULL, 0 },
+  { "fast-residual-cost", required_argument, NULL, 0 },
  {0, 0, 0, 0}
 };

@ -472,6 +473,8 @@ void print_help(void)
    "                                   - off: Don't terminate early.\n"
    "                                   - on: Terminate early.\n"
    "                                   - sensitive: Terminate even earlier.\n"
+    "      --fast-residual-cost <int> : Skip CABAC cost for residual coefficients\n"
+    "                                   when QP is below the limit. [0]\n"
    "      --(no-)intra-rdo-et    : Check intra modes in rdo stage only until\n"
    "                               a zero coefficient CU is found. [disabled]\n"
    "      --(no-)implicit-rdpcm  : Implicit residual DPCM. Currently only supported\n"
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -365,6 +365,9 @@ typedef struct kvz_config

  /** \brief Maximum steps that hexagonal and diagonal motion estimation can use. -1 to disable */
  uint32_t me_max_steps;
+
+  /** \brief Minimum QP that uses CABAC for residual cost instead of a fast estimate. */
+  int8_t fast_residual_cost_limit;
 } kvz_config;

 /**
--- a/src/rdo.c
+++ b/src/rdo.c
@ -41,6 +41,9 @@
 #define LOG2_SCAN_SET_SIZE    4
 #define SBH_THRESHOLD         4

+static const double COEFF_COST_QP_FACTOR = 0.044407704;
+static const double COEFF_COST_BIAS      = 0.557323653;
+
 const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 };
 const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 };

@ -152,7 +155,7 @@ struct sh_rates_t {
 *
 * \returns bits needed to code input coefficients
 */
-uint32_t kvz_get_coeff_cabac_cost(
+static INLINE uint32_t get_coeff_cabac_cost(
    const encoder_state_t * const state,
    const coeff_t *coeff,
    int32_t width,
@ -193,6 +196,31 @@ uint32_t kvz_get_coeff_cabac_cost(
  return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
 }

+/**
+ * \brief Estimate bitcost for coding coefficients.
+ *
+ * \param coeff   coefficient array
+ * \param width   coeff block width
+ * \param type    data type (0 == luma)
+ *
+ * \returns       number of bits needed to code coefficients
+ */
+uint32_t kvz_get_coeff_cost(const encoder_state_t * const state,
+                            const coeff_t *coeff,
+                            int32_t width,
+                            int32_t type,
+                            int8_t scan_mode)
+{
+  if (state->qp >= state->encoder_control->cfg.fast_residual_cost_limit) {
+    return get_coeff_cabac_cost(state, coeff, width, type, scan_mode);
+
+  } else {
+    // Estimate coeff coding cost based on QP and sum of absolute coeffs.
+    const uint32_t sum = kvz_coeff_abs_sum(coeff, width * width);
+    return (uint32_t)(sum * (state->qp * COEFF_COST_QP_FACTOR + COEFF_COST_BIAS) + 0.5);
+  }
+}
+
 #define COEF_REMAIN_BIN_REDUCTION 3
 /** Calculates the cost for specific absolute transform level
 * \param abs_level scaled quantized level
--- a/src/rdo.h
+++ b/src/rdo.h
@ -39,12 +39,11 @@ extern const uint32_t kvz_g_go_rice_prefix_len[5];
 void  kvz_rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width,
           int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);

-uint32_t kvz_get_coeff_cabac_cost(
-  const encoder_state_t * const state,
-  const coeff_t *coeff,
-  int32_t width,
-  int32_t type,
-  int8_t scan_mode);
+uint32_t kvz_get_coeff_cost(const encoder_state_t * const state,
+                            const coeff_t *coeff,
+                            int32_t width,
+                            int32_t type,
+                            int8_t scan_mode);

 int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                    uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
--- a/src/search.c
+++ b/src/search.c
@ -263,7 +263,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
    int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
    const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];

-    coeff_bits += kvz_get_coeff_cabac_cost(state, coeffs, width, 0, luma_scan_mode);
+    coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode);
  }

  double bits = tr_tree_bits + coeff_bits;
@ -332,8 +332,8 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
    int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
    const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);

-    coeff_bits += kvz_get_coeff_cabac_cost(state, &lcu->coeff.u[index], width, 2, scan_order);
-    coeff_bits += kvz_get_coeff_cabac_cost(state, &lcu->coeff.v[index], width, 2, scan_order);
+    coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order);
+    coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order);
  }

  double bits = tr_tree_bits + coeff_bits;
--- a/src/transform.c
+++ b/src/transform.c
@ -248,14 +248,14 @@ int kvz_quantize_residual_trskip(
      0, in_stride, 4,
      ref_in, pred_in, noskip.rec, noskip.coeff);
  noskip.cost = kvz_pixels_calc_ssd(ref_in, noskip.rec, in_stride, 4, 4);
-  noskip.cost += kvz_get_coeff_cabac_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost;
+  noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost;

  skip.has_coeffs = kvz_quantize_residual(
    state, cur_cu, width, color, scan_order,
    1, in_stride, 4,
    ref_in, pred_in, skip.rec, skip.coeff);
  skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, 4, 4);
-  skip.cost += kvz_get_coeff_cabac_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost;
+  skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost;

  if (noskip.cost <= skip.cost) {
    *trskip_out = 0;