From c438bb4a193192d0059eb57b1b21c6fd5786c5a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Tue, 19 Jun 2018 13:39:23 +0300 Subject: [PATCH] Add an option to skip CABAC for residual costs Adds command line option --fast-residual-cost=. When QP is below the limit, estimates the cost of coding the residual coefficients from the sum of absolute coefficients. Skipping CABAC is not worth it with high QPs because there are fewer coefficients so CABAC is not as slow. --- README.md | 2 ++ doc/kvazaar.1 | 6 +++++- src/cfg.c | 3 +++ src/cli.c | 3 +++ src/kvazaar.h | 3 +++ src/rdo.c | 30 +++++++++++++++++++++++++++++- src/rdo.h | 11 +++++------ src/search.c | 6 +++--- src/transform.c | 4 ++-- 9 files changed, 55 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 968e98ea..dd23dafc 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,8 @@ Compression tools: - off: Don't terminate early. - on: Terminate early. - sensitive: Terminate even earlier. + --fast-residual-cost : Skip CABAC cost for residual coefficients + when QP is below the limit. [0] --(no-)intra-rdo-et : Check intra modes in rdo stage only until a zero coefficient CU is found. [disabled] --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported diff --git a/doc/kvazaar.1 b/doc/kvazaar.1 index c936d27c..6e4411b6 100644 --- a/doc/kvazaar.1 +++ b/doc/kvazaar.1 @@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "June 2018" "kvazaar v1.2.0" "User Commands" +.TH KVAZAAR "1" "July 2018" "kvazaar v1.2.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -257,6 +257,10 @@ Motion estimation termination [on] \- on: Terminate early. \- sensitive: Terminate even earlier. .TP +\fB\-\-fast\-residual\-cost +Skip CABAC cost for residual coefficients + when QP is below the limit. [0] +.TP \fB\-\-(no\-)intra\-rdo\-et Check intra modes in rdo stage only until a zero coefficient CU is found. [disabled] diff --git a/src/cfg.c b/src/cfg.c index 1701826b..98eb2448 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -79,6 +79,7 @@ int kvz_config_init(kvz_config *cfg) cfg->lossless = false; cfg->tmvp_enable = true; cfg->implicit_rdpcm = false; + cfg->fast_residual_cost_limit = 0; cfg->cu_split_termination = KVZ_CU_SPLIT_TERMINATION_ZERO; @@ -1194,6 +1195,8 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) cfg->me_max_steps = (uint32_t)steps; } + else if (OPT("fast-residual-cost")) + cfg->fast_residual_cost_limit = atoi(value); else { return 0; } diff --git a/src/cli.c b/src/cli.c index 22c78b35..57393ada 100644 --- a/src/cli.c +++ b/src/cli.c @@ -129,6 +129,7 @@ static const struct option long_options[] = { { "force-level", required_argument, NULL, 0 }, { "high-tier", no_argument, NULL, 0 }, { "me-steps", required_argument, NULL, 0 }, + { "fast-residual-cost", required_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -472,6 +473,8 @@ void print_help(void) " - off: Don't terminate early.\n" " - on: Terminate early.\n" " - sensitive: Terminate even earlier.\n" + " --fast-residual-cost : Skip CABAC cost for residual coefficients\n" + " when QP is below the limit. [0]\n" " --(no-)intra-rdo-et : Check intra modes in rdo stage only until\n" " a zero coefficient CU is found. [disabled]\n" " --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n" diff --git a/src/kvazaar.h b/src/kvazaar.h index acc0a37d..6f9e9107 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -365,6 +365,9 @@ typedef struct kvz_config /** \brief Maximum steps that hexagonal and diagonal motion estimation can use. -1 to disable */ uint32_t me_max_steps; + + /** \brief Minimum QP that uses CABAC for residual cost instead of a fast estimate. */ + int8_t fast_residual_cost_limit; } kvz_config; /** diff --git a/src/rdo.c b/src/rdo.c index 813aeede..fbf1ecb2 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -41,6 +41,9 @@ #define LOG2_SCAN_SET_SIZE 4 #define SBH_THRESHOLD 4 +static const double COEFF_COST_QP_FACTOR = 0.044407704; +static const double COEFF_COST_BIAS = 0.557323653; + const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 }; const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 }; @@ -152,7 +155,7 @@ struct sh_rates_t { * * \returns bits needed to code input coefficients */ -uint32_t kvz_get_coeff_cabac_cost( +static INLINE uint32_t get_coeff_cabac_cost( const encoder_state_t * const state, const coeff_t *coeff, int32_t width, @@ -193,6 +196,31 @@ uint32_t kvz_get_coeff_cabac_cost( return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3); } +/** + * \brief Estimate bitcost for coding coefficients. + * + * \param coeff coefficient array + * \param width coeff block width + * \param type data type (0 == luma) + * + * \returns number of bits needed to code coefficients + */ +uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode) +{ + if (state->qp >= state->encoder_control->cfg.fast_residual_cost_limit) { + return get_coeff_cabac_cost(state, coeff, width, type, scan_mode); + + } else { + // Estimate coeff coding cost based on QP and sum of absolute coeffs. + const uint32_t sum = kvz_coeff_abs_sum(coeff, width * width); + return (uint32_t)(sum * (state->qp * COEFF_COST_QP_FACTOR + COEFF_COST_BIAS) + 0.5); + } +} + #define COEF_REMAIN_BIN_REDUCTION 3 /** Calculates the cost for specific absolute transform level * \param abs_level scaled quantized level diff --git a/src/rdo.h b/src/rdo.h index 72450fb7..c8490f39 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -39,12 +39,11 @@ extern const uint32_t kvz_g_go_rice_prefix_len[5]; void kvz_rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width, int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth); -uint32_t kvz_get_coeff_cabac_cost( - const encoder_state_t * const state, - const coeff_t *coeff, - int32_t width, - int32_t type, - int8_t scan_mode); +uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode); int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type); diff --git a/src/search.c b/src/search.c index 12c76237..f89f7383 100644 --- a/src/search.c +++ b/src/search.c @@ -263,7 +263,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; - coeff_bits += kvz_get_coeff_cabac_cost(state, coeffs, width, 0, luma_scan_mode); + coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode); } double bits = tr_tree_bits + coeff_bits; @@ -332,8 +332,8 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); - coeff_bits += kvz_get_coeff_cabac_cost(state, &lcu->coeff.u[index], width, 2, scan_order); - coeff_bits += kvz_get_coeff_cabac_cost(state, &lcu->coeff.v[index], width, 2, scan_order); + coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order); + coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order); } double bits = tr_tree_bits + coeff_bits; diff --git a/src/transform.c b/src/transform.c index 92f4c88e..26876efe 100644 --- a/src/transform.c +++ b/src/transform.c @@ -248,14 +248,14 @@ int kvz_quantize_residual_trskip( 0, in_stride, 4, ref_in, pred_in, noskip.rec, noskip.coeff); noskip.cost = kvz_pixels_calc_ssd(ref_in, noskip.rec, in_stride, 4, 4); - noskip.cost += kvz_get_coeff_cabac_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost; + noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost; skip.has_coeffs = kvz_quantize_residual( state, cur_cu, width, color, scan_order, 1, in_stride, 4, ref_in, pred_in, skip.rec, skip.coeff); skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, 4, 4); - skip.cost += kvz_get_coeff_cabac_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost; + skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost; if (noskip.cost <= skip.cost) { *trskip_out = 0;