From 6c4f2d196a2ac333e80c75478c8db671750bfba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Sun, 21 Aug 2016 12:27:58 +0900 Subject: [PATCH 01/12] Move fields from encoder_state_t to frame Moves fields prepared and frame_done from encoder_state_t to encoder_state_config_frame_t. --- src/encoder_state-ctors_dtors.c | 5 +++-- src/encoderstate.c | 8 ++++---- src/encoderstate.h | 26 +++++++++++++------------- src/kvazaar.c | 10 +++++----- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index 24b7add2..e53ac4e4 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -48,8 +48,11 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) { state->frame->poc = 0; state->frame->total_bits_coded = 0; state->frame->cur_gop_bits_coded = 0; + state->frame->prepared = 0; + state->frame->done = 1; state->frame->rc_alpha = 3.2003; state->frame->rc_beta = -1.367; + return 1; } @@ -303,8 +306,6 @@ int kvz_encoder_state_init(encoder_state_t * const child_state, encoder_state_t child_state->children[0].encoder_control = NULL; child_state->tqj_bitstream_written = NULL; child_state->tqj_recon_done = NULL; - child_state->prepared = 0; - child_state->frame_done = 1; if (!parent_state) { const encoder_control_t * const encoder = child_state->encoder_control; diff --git a/src/encoderstate.c b/src/encoderstate.c index 172c101a..bd1cbf99 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -967,7 +967,7 @@ void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame) assert(!state->tqj_bitstream_written); state->tqj_bitstream_written = job; } - state->frame_done = 0; + state->frame->done = 0; //kvz_threadqueue_flush(main_state->encoder_control->threadqueue); } @@ -985,7 +985,7 @@ void kvz_encoder_prepare(encoder_state_t *state) const encoder_control_t * const encoder = state->encoder_control; // The previous frame must be done before the next one is started. - assert(state->frame_done); + assert(state->frame->done); if (state->frame->num == -1) { // We're at the first frame, so don't care about all this stuff. @@ -993,7 +993,7 @@ void kvz_encoder_prepare(encoder_state_t *state) state->frame->poc = 0; assert(!state->tile->frame->source); assert(!state->tile->frame->rec); - state->prepared = 1; + state->frame->prepared = 1; return; } @@ -1034,7 +1034,7 @@ void kvz_encoder_prepare(encoder_state_t *state) state->frame->num = prev_state->frame->num + 1; state->frame->poc = prev_state->frame->poc + 1; - state->prepared = 1; + state->frame->prepared = 1; } coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth) diff --git a/src/encoderstate.h b/src/encoderstate.h index de495a98..321f8137 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -88,6 +88,19 @@ typedef struct encoder_state_config_frame_t { double rc_alpha; double rc_beta; + /** + * \brief Indicates that this encoder state is ready for encoding the + * next frame i.e. kvz_encoder_prepare has been called. + */ + bool prepared; + + /** + * \brief Indicates that the previous frame has been encoded and the + * encoded data written and the encoding the next frame has not been + * started yet. + */ + bool done; + } encoder_state_config_frame_t; typedef struct encoder_state_config_tile_t { @@ -185,19 +198,6 @@ typedef struct encoder_state_t { bitstream_t stream; cabac_data_t cabac; - /** - * \brief Indicates that this encoder state is ready for encoding the - * next frame i.e. kvz_encoder_prepare has been called. - */ - int prepared; - - /** - * \brief Indicates that the previous frame has been encoded and the - * encoded data written and the encoding the next frame has not been - * started yet. - */ - int frame_done; - uint32_t stats_bitstream_length; //Bitstream length written in bytes //Jobs to wait for diff --git a/src/kvazaar.c b/src/kvazaar.c index d5d3dcac..b18e18ac 100644 --- a/src/kvazaar.c +++ b/src/kvazaar.c @@ -213,7 +213,7 @@ static int kvazaar_encode(kvz_encoder *enc, encoder_state_t *state = &enc->states[enc->cur_state_num]; - if (!state->prepared) { + if (!state->frame->prepared) { kvz_encoder_prepare(state); } @@ -235,13 +235,13 @@ static int kvazaar_encode(kvz_encoder *enc, return 1; } - if (!state->frame_done) { + if (!state->frame->done) { // We started encoding a frame; move to the next encoder state. enc->cur_state_num = (enc->cur_state_num + 1) % (enc->num_encoder_states); } encoder_state_t *output_state = &enc->states[enc->out_state_num]; - if (!output_state->frame_done && + if (!output_state->frame->done && (pic_in == NULL || enc->cur_state_num == enc->out_state_num)) { kvz_threadqueue_waitfor(enc->control->threadqueue, output_state->tqj_bitstream_written); @@ -256,8 +256,8 @@ static int kvazaar_encode(kvz_encoder *enc, if (src_out) *src_out = kvz_image_copy_ref(output_state->tile->frame->source); if (info_out) set_frame_info(info_out, output_state); - output_state->frame_done = 1; - output_state->prepared = 0; + output_state->frame->done = 1; + output_state->frame->prepared = 0; enc->frames_done += 1; enc->out_state_num = (enc->out_state_num + 1) % (enc->num_encoder_states); From 435c3873577d5cfaedbfa63525a0747b75073508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Wed, 14 Sep 2016 12:52:56 +0900 Subject: [PATCH 02/12] Refactor rate control - Defines MIN_LAMBDA and MAX_LAMBDA constants. - Moves resetting state->frame->cur_gop_bits_coded to rate_control.c. - Changes gop_allocate_bits to return the number of bits allocated like pic_allocate_bits does. --- src/encoder_state-bitstream.c | 6 +--- src/rate_control.c | 61 ++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 9fe424f6..7de90bb4 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -972,11 +972,7 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state) } state->frame->total_bits_coded += newpos - curpos; - if (encoder->cfg->gop_len > 0 && state->frame->gop_offset > 0) { - state->frame->cur_gop_bits_coded = state->previous_encoder_state->frame->cur_gop_bits_coded; - } else { - state->frame->cur_gop_bits_coded = 0; - } + state->frame->cur_gop_bits_coded = state->previous_encoder_state->frame->cur_gop_bits_coded; state->frame->cur_gop_bits_coded += newpos - curpos; } diff --git a/src/rate_control.c b/src/rate_control.c index 07173198..6e86f3f1 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -27,6 +27,8 @@ static const int SMOOTHING_WINDOW = 40; +static const double MIN_LAMBDA = 0.1; +static const double MAX_LAMBDA = 10000; /** * \brief Update alpha and beta parameters. @@ -45,7 +47,7 @@ static void update_rc_parameters(encoder_state_t * state) const double alpha_old = state->frame->rc_alpha; const double beta_old = state->frame->rc_beta; // lambda computed from real bpp - const double lambda_comp = CLIP(0.1, 10000, alpha_old * pow(bpp, beta_old)); + const double lambda_comp = CLIP(MIN_LAMBDA, MAX_LAMBDA, alpha_old * pow(bpp, beta_old)); // lambda used in encoding const double lambda_real = state->frame->cur_lambda_cost; const double lambda_log_ratio = log(lambda_real) - log(lambda_comp); @@ -59,13 +61,10 @@ static void update_rc_parameters(encoder_state_t * state) /** * \brief Allocate bits for the current GOP. - * \param state the main encoder state - * - * If GOPs are not used, allocates bits for a single picture. - * - * Sets the cur_gop_target_bits of the encoder state. + * \param state the main encoder state + * \return target number of bits */ -static void gop_allocate_bits(encoder_state_t * const state) +static double gop_allocate_bits(encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; @@ -83,21 +82,35 @@ static void gop_allocate_bits(encoder_state_t * const state) pictures_coded -= gop_offset + 1; } + // Equation 12 from https://doi.org/10.1109/TIP.2014.2336550 double gop_target_bits = (encoder->target_avg_bppic * (pictures_coded + SMOOTHING_WINDOW) - bits_coded) * MAX(1, encoder->cfg->gop_len) / SMOOTHING_WINDOW; - state->frame->cur_gop_target_bits = MAX(200, gop_target_bits); + // Allocate at least 200 bits for each GOP like HM does. + return MAX(200, gop_target_bits); } /** * Allocate bits for the current picture. - * \param state the main encoder state - * \return target number of bits + * \param state the main encoder state + * \return target number of bits */ -static double pic_allocate_bits(const encoder_state_t * const state) +static double pic_allocate_bits(encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; + if (encoder->cfg->gop_len == 0 || + state->frame->gop_offset == 0 || + state->frame->num == 0) + { + // A new GOP starts at this frame. + state->frame->cur_gop_target_bits = gop_allocate_bits(state); + state->frame->cur_gop_bits_coded = 0; + } else { + state->frame->cur_gop_target_bits = + state->previous_encoder_state->frame->cur_gop_target_bits; + } + if (encoder->cfg->gop_len <= 0) { return state->frame->cur_gop_target_bits; } @@ -105,13 +118,14 @@ static double pic_allocate_bits(const encoder_state_t * const state) const double pic_weight = encoder->gop_layer_weights[ encoder->cfg->gop[state->frame->gop_offset].layer - 1]; double pic_target_bits = state->frame->cur_gop_target_bits * pic_weight; + // Allocate at least 100 bits for each picture like HM does. return MAX(100, pic_target_bits); } /** * \brief Select a lambda value for encoding the next picture - * \param state the main encoder state - * \return lambda for the next picture + * \param state the main encoder state + * \return lambda for the next picture * * Rate control must be enabled (i.e. cfg->target_bitrate > 0) when this * function is called. @@ -127,24 +141,11 @@ double kvz_select_picture_lambda(encoder_state_t * const state) update_rc_parameters(state); } - if (encoder->cfg->gop_len == 0 || - state->frame->gop_offset == 0 || - state->frame->num == 0) - { - // A new GOP begins at this frame. - gop_allocate_bits(state); - } else { - state->frame->cur_gop_target_bits = - state->previous_encoder_state->frame->cur_gop_target_bits; - } - // TODO: take the picture headers into account - const double target_bits_current_picture = pic_allocate_bits(state); - const double target_bits_per_pixel = - target_bits_current_picture / encoder->in.pixels_per_pic; - const double lambda = - state->frame->rc_alpha * pow(target_bits_per_pixel, state->frame->rc_beta); - return CLIP(0.1, 10000, lambda); + const double pic_target_bits = pic_allocate_bits(state); + const double target_bpp = pic_target_bits / encoder->in.pixels_per_pic; + const double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta); + return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); } int8_t kvz_lambda_to_QP(const double lambda) From 640ff94ecd3dd54db515a819613ebb490e9bd1be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Sun, 21 Aug 2016 13:16:59 +0900 Subject: [PATCH 03/12] Use separate lambda and QP for each LCU Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames cur_lambda_cost to lambda. --- src/encoderstate.c | 11 +++++--- src/encoderstate.h | 36 ++++++++++++++++++++------ src/filter.c | 4 +-- src/rate_control.c | 4 +-- src/rate_control.h | 2 +- src/rdo.c | 30 ++++++++++----------- src/sao.c | 12 ++++----- src/search.c | 22 ++++++++-------- src/search_inter.c | 2 +- src/search_intra.c | 14 +++++----- src/strategies/avx2/quant-avx2.c | 4 +-- src/strategies/generic/quant-generic.c | 4 +-- src/transform.c | 2 +- 13 files changed, 85 insertions(+), 62 deletions(-) diff --git a/src/encoderstate.c b/src/encoderstate.c index bd1cbf99..25daa1b8 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -201,7 +201,11 @@ static void encoder_state_worker_encode_lcu(void * opaque) { encoder_state_t *state = lcu->encoder_state; const encoder_control_t * const encoder = state->encoder_control; videoframe_t* const frame = state->tile->frame; - + + state->lambda = state->frame->lambda; + state->lambda_sqrt = sqrt(state->frame->lambda); + state->qp = state->frame->QP; + //This part doesn't write to bitstream, it's only search, deblock and sao kvz_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search); @@ -902,7 +906,7 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict if (cfg->target_bitrate > 0) { // Rate control enabled. lambda = kvz_select_picture_lambda(state); - state->frame->QP = kvz_lambda_to_QP(lambda); + state->frame->QP = kvz_lambda_to_qp(lambda); } else { if (cfg->gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) { kvz_gop_config const * const gop = @@ -914,8 +918,7 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict } lambda = kvz_select_picture_lambda_from_qp(state); } - state->frame->cur_lambda_cost = lambda; - state->frame->cur_lambda_cost_sqrt = sqrt(lambda); + state->frame->lambda = lambda; encoder_state_init_children(state); } diff --git a/src/encoderstate.h b/src/encoderstate.h index 321f8137..a2ee3cc2 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -51,16 +51,29 @@ typedef enum { typedef struct encoder_state_config_frame_t { - double cur_lambda_cost; //!< \brief Lambda for SSE - double cur_lambda_cost_sqrt; //!< \brief Lambda for SAD and SATD - + /** + * \brief Frame-level lambda. + * + * Use state->lambda or state->lambda_sqrt for cost computations. + * + * \see encoder_state_t::lambda + * \see encoder_state_t::lambda_sqrt + */ + double lambda; + int32_t num; /*!< \brief Frame number */ int32_t poc; /*!< \brief Picture order count */ int8_t gop_offset; /*!< \brief Offset in the gop structure */ - - int8_t QP; //!< \brief Quantization parameter - double QP_factor; //!< \brief Quantization factor - + + /** + * \brief Frame-level quantization parameter + * + * \see encoder_state_t::qp + */ + int8_t QP; + //! \brief quantization factor + double QP_factor; + //Current picture available references image_list_t *ref; int8_t ref_list; @@ -199,7 +212,14 @@ typedef struct encoder_state_t { cabac_data_t cabac; uint32_t stats_bitstream_length; //Bitstream length written in bytes - + + //! \brief Lambda for SSE + double lambda; + //! \brief Lambda for SAD and SATD + double lambda_sqrt; + //! \brief Quantization parameter for the current LCU + int8_t qp; + //Jobs to wait for threadqueue_job_t * tqj_recon_done; //Reconstruction is done threadqueue_job_t * tqj_bitstream_written; //Bitstream is written diff --git a/src/filter.c b/src/filter.c index d0fa01bf..dce73199 100644 --- a/src/filter.c +++ b/src/filter.c @@ -291,7 +291,7 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, kvz_pixel *src = orig_src; int8_t strength = 0; - int32_t qp = state->frame->QP; + int32_t qp = state->qp; int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8); int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1)); int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale; @@ -490,7 +490,7 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, }; int8_t strength = 2; - int32_t QP = kvz_g_chroma_scale[state->frame->QP]; + int32_t QP = kvz_g_chroma_scale[state->qp]; int32_t bitdepth_scale = 1 << (encoder->bitdepth-8); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale; diff --git a/src/rate_control.c b/src/rate_control.c index 6e86f3f1..6e1f0db8 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -49,7 +49,7 @@ static void update_rc_parameters(encoder_state_t * state) // lambda computed from real bpp const double lambda_comp = CLIP(MIN_LAMBDA, MAX_LAMBDA, alpha_old * pow(bpp, beta_old)); // lambda used in encoding - const double lambda_real = state->frame->cur_lambda_cost; + const double lambda_real = state->frame->lambda; const double lambda_log_ratio = log(lambda_real) - log(lambda_comp); const double alpha = alpha_old + 0.1 * lambda_log_ratio * alpha_old; @@ -148,7 +148,7 @@ double kvz_select_picture_lambda(encoder_state_t * const state) return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); } -int8_t kvz_lambda_to_QP(const double lambda) +int8_t kvz_lambda_to_qp(const double lambda) { const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5; return CLIP(0, 51, qp); diff --git a/src/rate_control.h b/src/rate_control.h index 3c2d4362..f0b2befb 100644 --- a/src/rate_control.h +++ b/src/rate_control.h @@ -33,7 +33,7 @@ double kvz_select_picture_lambda(encoder_state_t * const state); -int8_t kvz_lambda_to_QP(const double lambda); +int8_t kvz_lambda_to_qp(const double lambda); double kvz_select_picture_lambda_from_qp(encoder_state_t const * const state); diff --git a/src/rdo.c b/src/rdo.c index b64fb5df..65ca6dd9 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -257,7 +257,7 @@ uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost cabac_ctx_t* base_sig_model = type?(cabac->ctx.cu_sig_model_chroma):(cabac->ctx.cu_sig_model_luma); if( !last && max_abs_level < 3 ) { - *coded_cost_sig = state->frame->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0); + *coded_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0); *coded_cost = *coded_cost0 + *coded_cost_sig; if (max_abs_level == 0) return best_abs_level; } else { @@ -265,13 +265,13 @@ uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost } if( !last ) { - cur_cost_sig = state->frame->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1); + cur_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1); } min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 ); for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) { double err = (double)(level_double - ( abs_level << q_bits ) ); - double cur_cost = err * err * temp + state->frame->cur_lambda_cost * + double cur_cost = err * err * temp + state->lambda * kvz_get_ic_rate( state, abs_level, ctx_num_one, ctx_num_abs, abs_go_rice, c1_idx, c2_idx, type); cur_cost += cur_cost_sig; @@ -308,7 +308,7 @@ static double get_rate_last(const encoder_state_t * const state, if( ctx_y > 3 ) { uiCost += 32768.0 * ((ctx_y-2)>>1); } - return state->frame->cur_lambda_cost*uiCost; + return state->lambda * uiCost; } static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t height, int8_t type, @@ -358,7 +358,7 @@ void kvz_rdoq_sign_hiding(const encoder_state_t *const state, int64_t rd_factor = (int64_t)( kvz_g_inv_quant_scales[qp_scaled % 6] * kvz_g_inv_quant_scales[qp_scaled % 6] * (1 << (2 * (qp_scaled / 6))) - / state->frame->cur_lambda_cost / 16 / (1 << (2 * (encoder->bitdepth - 8))) + / state->lambda / 16 / (1 << (2 * (encoder->bitdepth - 8))) + 0.5); int32_t lastCG = -1; int32_t absSum = 0; @@ -467,7 +467,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, uint32_t max_num_coeff = width * height; int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); - int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth - 8) * 6); + int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6); int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; @@ -669,7 +669,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, if (sig_coeffgroup_flag[cg_blkpos] == 0) { uint32_t ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); - cost_coeffgroup_sig[cg_scanpos] = state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_coeffgroup_sig[cg_scanpos] = state->lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); base_cost += cost_coeffgroup_sig[cg_scanpos] - rd_stats.sig_cost; } else { if (cg_scanpos < cg_last_scanpos){ @@ -686,9 +686,9 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); - cost_coeffgroup_sig[cg_scanpos] = state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1); + cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1); base_cost += cost_coeffgroup_sig[cg_scanpos]; - cost_zero_cg += state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0); + cost_zero_cg += state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0); // try to convert the current coeff group from non-zero to all-zero cost_zero_cg += rd_stats.uncoded_dist; // distortion for resetting non-zero levels to zero levels @@ -701,7 +701,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, sig_coeffgroup_flag[cg_blkpos] = 0; base_cost = cost_zero_cg; - cost_coeffgroup_sig[cg_scanpos] = state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0); + cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0); // reset coeffs to 0 in this block for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) { @@ -728,13 +728,13 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, int32_t best_last_idx_p1 = 0; if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) { - best_cost = block_uncoded_cost + state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0); - base_cost += state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1); + best_cost = block_uncoded_cost + state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0); + base_cost += state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1); } else { cabac_ctx_t* base_cbf_model = type?(cabac->ctx.qt_cbf_model_chroma):(cabac->ctx.qt_cbf_model_luma); ctx_cbf = ( type ? tr_depth : !tr_depth); - best_cost = block_uncoded_cost + state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0); - base_cost += state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1); + best_cost = block_uncoded_cost + state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0); + base_cost += state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1); } for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) { @@ -1006,5 +1006,5 @@ int kvz_calc_mvd_cost_cabac(encoder_state_t * const state, int x, int y, int mv_ *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3); // Store bitcost before restoring cabac - return *bitcost * (int32_t)(state->frame->cur_lambda_cost_sqrt + 0.5); + return *bitcost * (int32_t)(state->lambda_sqrt + 0.5); } diff --git a/src/sao.c b/src/sao.c index 2bafbca5..36a88bd5 100644 --- a/src/sao.c +++ b/src/sao.c @@ -501,7 +501,7 @@ static void sao_search_edge_sao(const encoder_state_t * const state, { float mode_bits = sao_mode_bits_edge(state, edge_class, edge_offset, sao_top, sao_left, buf_cnt); - sum_ddistortion += (int)((double)mode_bits*state->frame->cur_lambda_cost+0.5); + sum_ddistortion += (int)((double)mode_bits*state->lambda +0.5); } // SAO is not applied for category 0. edge_offset[SAO_EO_CAT0] = 0; @@ -545,7 +545,7 @@ static void sao_search_band_sao(const encoder_state_t * const state, const kvz_p } temp_rate = sao_mode_bits_band(state, sao_out->band_position, temp_offsets, sao_top, sao_left, buf_cnt); - ddistortion += (int)((double)temp_rate*state->frame->cur_lambda_cost + 0.5); + ddistortion += (int)((double)temp_rate*state->lambda + 0.5); // Select band sao over edge sao when distortion is lower if (ddistortion < sao_out->ddistortion) { @@ -589,7 +589,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_ { float mode_bits = sao_mode_bits_edge(state, edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left, buf_cnt); - int ddistortion = (int)(mode_bits * state->frame->cur_lambda_cost + 0.5); + int ddistortion = (int)(mode_bits * state->lambda + 0.5); unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { @@ -603,7 +603,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_ { float mode_bits = sao_mode_bits_band(state, band_sao.band_position, band_sao.offsets, sao_top, sao_left, buf_cnt); - int ddistortion = (int)(mode_bits * state->frame->cur_lambda_cost + 0.5); + int ddistortion = (int)(mode_bits * state->lambda + 0.5); unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { @@ -626,7 +626,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_ // Choose between SAO and doing nothing, taking into account the // rate-distortion cost of coding do nothing. { - int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->frame->cur_lambda_cost + 0.5); + int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->lambda + 0.5); if (sao_out->ddistortion >= cost_of_nothing) { sao_out->type = SAO_TYPE_NONE; merge_cost[0] = cost_of_nothing; @@ -643,7 +643,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_ if (merge_cand) { unsigned buf_i; float mode_bits = sao_mode_bits_merge(state, i + 1); - int ddistortion = (int)(mode_bits * state->frame->cur_lambda_cost + 0.5); + int ddistortion = (int)(mode_bits * state->lambda + 0.5); switch (merge_cand->type) { case SAO_TYPE_EDGE: diff --git a/src/search.c b/src/search.c index adb1a890..440e7dca 100644 --- a/src/search.c +++ b/src/search.c @@ -321,7 +321,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); - return sum + tr_tree_bits * state->frame->cur_lambda_cost; + return sum + tr_tree_bits * state->lambda; } // Add transform_tree cbf_luma bit cost. @@ -353,7 +353,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, } double bits = tr_tree_bits + coeff_bits; - return (double)ssd * LUMA_MULT + bits * state->frame->cur_lambda_cost; + return (double)ssd * LUMA_MULT + bits * state->lambda; } @@ -398,7 +398,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); - return sum + tr_tree_bits * state->frame->cur_lambda_cost; + return sum + tr_tree_bits * state->lambda; } // Chroma SSD @@ -428,7 +428,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, } double bits = tr_tree_bits + coeff_bits; - return (double)ssd * CHROMA_MULT + bits * state->frame->cur_lambda_cost; + return (double)ssd * CHROMA_MULT + bits * state->lambda; } @@ -682,7 +682,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, mode_bits = inter_bitcost; } - cost += mode_bits * state->frame->cur_lambda_cost; + cost += mode_bits * state->lambda; } // Recursively split all the way to max search depth. @@ -695,15 +695,15 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // Add cost of cu_split_flag. uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]); - cost += CTX_ENTROPY_FBITS(ctx, 0) * state->frame->cur_lambda_cost; - split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->frame->cur_lambda_cost; + cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; + split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; } if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) { // Add cost of intra part_size. const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]); - cost += CTX_ENTROPY_FBITS(ctx, 1) * state->frame->cur_lambda_cost; // 2Nx2N - split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->frame->cur_lambda_cost; // NxN + cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N + split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN } // If skip mode was selected for the block, skip further search. @@ -750,11 +750,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // Add the cost of coding no-split. uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]); - cost += CTX_ENTROPY_FBITS(ctx, 0) * state->frame->cur_lambda_cost; + cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // Add the cost of coding intra mode only once. double mode_bits = calc_mode_bits(state, &work_tree[depth], cur_cu, x, y); - cost += mode_bits * state->frame->cur_lambda_cost; + cost += mode_bits * state->lambda; } } diff --git a/src/search_inter.c b/src/search_inter.c index ebd51524..3b81e709 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -253,7 +253,7 @@ static int calc_mvd_cost(encoder_state_t * const state, int x, int y, int mv_shi temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost; } *bitcost = temp_bitcost; - return temp_bitcost*(int32_t)(state->frame->cur_lambda_cost_sqrt+0.5); + return temp_bitcost*(int32_t)(state->lambda_sqrt + 0.5); } diff --git a/src/search_intra.c b/src/search_intra.c index 0d63ea05..2e79fc96 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -117,7 +117,7 @@ static double get_cost(encoder_state_t * const state, trskip_bits += 2.0 * (CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0)); } - double sad_cost = TRSKIP_RATIO * sad_func(pred, orig_block) + state->frame->cur_lambda_cost_sqrt * trskip_bits; + double sad_cost = TRSKIP_RATIO * sad_func(pred, orig_block) + state->lambda_sqrt * trskip_bits; if (sad_cost < satd_cost) { return sad_cost; } @@ -164,7 +164,7 @@ static void get_cost_dual(encoder_state_t * const state, double sad_costs[PARALLEL_BLKS] = { 0 }; sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs); for (int i = 0; i < PARALLEL_BLKS; ++i) { - sad_costs[i] = TRSKIP_RATIO * (double)unsigned_sad_costs[i] + state->frame->cur_lambda_cost_sqrt * trskip_bits; + sad_costs[i] = TRSKIP_RATIO * (double)unsigned_sad_costs[i] + state->lambda_sqrt * trskip_bits; if (sad_costs[i] < (double)satd_costs[i]) { costs_out[i] = sad_costs[i]; } @@ -254,7 +254,7 @@ static double search_intra_trdepth(encoder_state_t * const state, // max_depth. // - Min transform size hasn't been reached (MAX_PU_DEPTH). if (depth < max_depth && depth < MAX_PU_DEPTH) { - split_cost = 3 * state->frame->cur_lambda_cost; + split_cost = 3 * state->lambda; split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu); if (split_cost < nosplit_cost) { @@ -296,7 +296,7 @@ static double search_intra_trdepth(encoder_state_t * const state, } double bits = tr_split_bit + cbf_bits; - split_cost += bits * state->frame->cur_lambda_cost; + split_cost += bits * state->lambda; } else { assert(width <= TR_MAX_WIDTH); } @@ -529,7 +529,7 @@ static int8_t search_intra_rough(encoder_state_t * const state, // Add prediction mode coding cost as the last thing. We don't want this // affecting the halving search. - int lambda_cost = (int)(state->frame->cur_lambda_cost_sqrt + 0.5); + int lambda_cost = (int)(state->lambda_sqrt + 0.5); for (int mode_i = 0; mode_i < modes_selected; ++mode_i) { costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds); } @@ -600,7 +600,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state, for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds); - costs[rdo_mode] = rdo_bitcost * (int)(state->frame->cur_lambda_cost + 0.5); + costs[rdo_mode] = rdo_bitcost * (int)(state->lambda + 0.5); // Perform transform split search and save mode RD cost for the best one. cu_info_t pred_cu; @@ -701,7 +701,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); - chroma.cost += mode_bits * state->frame->cur_lambda_cost; + chroma.cost += mode_bits * state->lambda; if (chroma.cost < best_chroma.cost) { best_chroma = chroma; diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index de973122..bb9d5ac4 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -52,7 +52,7 @@ void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coe const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; - int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth - 8) * 6); + int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6); const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2; const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6]; @@ -457,7 +457,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef int32_t n; int32_t transform_shift = 15 - encoder->bitdepth - (kvz_g_convert_to_bit[ width ] + 2); - int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth-8)*6); + int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6); shift = 20 - QUANT_SHIFT - transform_shift; diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index 75dd127c..69f8ca01 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -41,7 +41,7 @@ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; - int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth - 8) * 6); + int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6); const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2; const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6]; @@ -286,7 +286,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c int32_t n; int32_t transform_shift = 15 - encoder->bitdepth - (kvz_g_convert_to_bit[ width ] + 2); - int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth-8)*6); + int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6); shift = 20 - QUANT_SHIFT - transform_shift; diff --git a/src/transform.c b/src/transform.c index 6beb3492..4e5bf236 100644 --- a/src/transform.c +++ b/src/transform.c @@ -232,7 +232,7 @@ int kvz_quantize_residual_trskip( int has_coeffs; } skip, noskip, *best; - const int bit_cost = (int)(state->frame->cur_lambda_cost+0.5); + const int bit_cost = (int)(state->lambda + 0.5); noskip.has_coeffs = kvz_quantize_residual( state, cur_cu, width, color, scan_order, From 71633889ce132d55f993dd427776be11117ae31e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Sun, 21 Aug 2016 13:36:13 +0900 Subject: [PATCH 04/12] Enable CU QP delta when using rate control When rate control is enabled, enable cu_qp_delta_enabled_flag in PPS with diff_cu_qp_delta_depth set to 0. Also adds code for writing the QP deltas and a new cabac context. --- src/cabac.h | 1 + src/context.c | 9 +++++++++ src/encode_coding_tree.c | 28 ++++++++++++++++++++++++++-- src/encode_coding_tree.h | 2 +- src/encoder_state-bitstream.c | 11 ++++++++--- src/encoderstate.c | 7 +++++++ src/encoderstate.h | 11 +++++++++++ src/global.h | 1 + 8 files changed, 64 insertions(+), 6 deletions(-) diff --git a/src/cabac.h b/src/cabac.h index 95571c3b..a1779c59 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -60,6 +60,7 @@ typedef struct cabac_ctx_t trans_subdiv_model[3]; //!< \brief intra mode context models cabac_ctx_t qt_cbf_model_luma[4]; cabac_ctx_t qt_cbf_model_chroma[4]; + cabac_ctx_t cu_qp_delta_abs[4]; cabac_ctx_t part_size_model[4]; cabac_ctx_t cu_sig_coeff_group_model[4]; cabac_ctx_t cu_sig_model_luma[27]; diff --git a/src/context.c b/src/context.c index 1244245c..ada2683e 100644 --- a/src/context.c +++ b/src/context.c @@ -121,6 +121,12 @@ static const uint8_t INIT_QT_CBF[3][8] = { { 111, 141, CNU, CNU, 94, 138, 182, 154 }, }; +static const uint8_t INIT_CU_QP_DELTA_ABS[3][2] = { + { 154, 154 }, + { 154, 154 }, + { 154, 154 }, +}; + static const uint8_t INIT_SIG_CG_FLAG[3][4] = { { 121, 140, 61, 154 }, { 121, 140, 61, 154 }, @@ -243,6 +249,9 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) kvz_ctx_init(&cabac->ctx.mvp_idx_model[0], QP, INIT_MVP_IDX[slice][0]); kvz_ctx_init(&cabac->ctx.mvp_idx_model[1], QP, INIT_MVP_IDX[slice][1]); + kvz_ctx_init(&cabac->ctx.cu_qp_delta_abs[0], QP, INIT_CU_QP_DELTA_ABS[slice][0]); + kvz_ctx_init(&cabac->ctx.cu_qp_delta_abs[1], QP, INIT_CU_QP_DELTA_ABS[slice][1]); + for (i = 0; i < 4; i++) { kvz_ctx_init(&cabac->ctx.cu_sig_coeff_group_model[i], QP, INIT_SIG_CG_FLAG[slice][i]); kvz_ctx_init(&cabac->ctx.cu_abs_model_luma[i], QP, INIT_ABS_FLAG[slice][i]); diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 42ca1557..f7aa1688 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -517,6 +517,28 @@ static void encode_transform_coeff(encoder_state_t * const state, } if (cb_flag_y | cb_flag_u | cb_flag_v) { + if (state->must_code_qp_delta) { + const int qp_delta = state->qp - state->ref_qp; + const int qp_delta_abs = ABS(qp_delta); + cabac_data_t* cabac = &state->cabac; + + // cu_qp_delta_abs prefix + cabac->cur_ctx = &cabac->ctx.cu_qp_delta_abs[0]; + kvz_cabac_write_unary_max_symbol(cabac, cabac->ctx.cu_qp_delta_abs, MIN(qp_delta_abs, 5), 1, 5); + + if (qp_delta_abs >= 5) { + // cu_qp_delta_abs suffix + kvz_cabac_write_ep_ex_golomb(state, cabac, qp_delta_abs - 5, 0); + } + + if (qp_delta != 0) { + CABAC_BIN_EP(cabac, (qp_delta >= 0 ? 0 : 1), "qp_delta_sign_flag"); + } + + state->must_code_qp_delta = false; + state->ref_qp = state->qp; + } + encode_transform_unit(state, x_pu, y_pu, depth); } } @@ -894,14 +916,16 @@ static void encode_part_mode(encoder_state_t * const state, } void kvz_encode_coding_tree(encoder_state_t * const state, - uint16_t x_ctb, uint16_t y_ctb, uint8_t depth) + uint16_t x_ctb, + uint16_t y_ctb, + uint8_t depth) { cabac_data_t * const cabac = &state->cabac; const videoframe_t * const frame = state->tile->frame; const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x_ctb, y_ctb); uint8_t split_flag = GET_SPLITDATA(cur_cu, depth); uint8_t split_model = 0; - + //Absolute ctb uint16_t abs_x_ctb = x_ctb + (state->tile->lcu_offset_x * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); uint16_t abs_y_ctb = y_ctb + (state->tile->lcu_offset_y * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index b8e2dc10..e284d7c2 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -29,7 +29,7 @@ #include "encoderstate.h" #include "global.h" -void kvz_encode_coding_tree(encoder_state_t *state, +void kvz_encode_coding_tree(encoder_state_t * const state, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth); diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 7de90bb4..edeef983 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -451,9 +451,14 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream, WRITE_SE(stream, ((int8_t)encoder->cfg->qp) - 26, "pic_init_qp_minus26"); WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag"); - WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); - //if cu_qp_delta_enabled_flag - //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); + + if (encoder->cfg->target_bitrate > 0) { + // Use separate QP for each LCU when rate control is enabled. + WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag"); + WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); + } else { + WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); + } //TODO: add QP offsets WRITE_SE(stream, 0, "pps_cb_qp_offset"); diff --git a/src/encoderstate.c b/src/encoderstate.c index 25daa1b8..cf8c54e6 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -255,6 +255,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) { encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]); } + + // QP delta is not used when rate control is turned off. + state->must_code_qp_delta = (state->encoder_control->cfg->target_bitrate > 0); + //Encode coding tree kvz_encode_coding_tree(state, lcu->position.x << MAX_DEPTH, lcu->position.y << MAX_DEPTH, 0); @@ -305,6 +309,9 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) { InitC(state->tile->dbs_g); state->tile->m_prev_pos = 0; } + + state->ref_qp = state->frame->QP; + // Select whether to encode the frame/tile in current thread or to define // wavefront jobs for other threads to handle. bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW; diff --git a/src/encoderstate.h b/src/encoderstate.h index a2ee3cc2..ab3603b9 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -220,6 +220,17 @@ typedef struct encoder_state_t { //! \brief Quantization parameter for the current LCU int8_t qp; + /** + * \brief Whether a QP delta value must be coded for the current LCU. + */ + bool must_code_qp_delta; + + /** + * \brief Reference for computing QP delta for the next LCU that is coded + * next. Updated whenever a QP delta is coded. + */ + int8_t ref_qp; + //Jobs to wait for threadqueue_job_t * tqj_recon_done; //Reconstruction is done threadqueue_job_t * tqj_bitstream_written; //Bitstream is written diff --git a/src/global.h b/src/global.h index 0a423911..9da0a74d 100644 --- a/src/global.h +++ b/src/global.h @@ -158,6 +158,7 @@ typedef int16_t coeff_t; #define MRG_MAX_NUM_CANDS 5 /* Some tools */ +#define ABS(a) ((a) >= 0 ? (a) : (-a)) #define MAX(a,b) (((a)>(b))?(a):(b)) #define MIN(a,b) (((a)<(b))?(a):(b)) #define CLIP(low,high,value) MAX((low),MIN((high),(value))) From 2a4243acbe08edad4028e61d86577ceb5d36f77c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Sun, 21 Aug 2016 14:03:57 +0900 Subject: [PATCH 05/12] Refactor rate control Moves all code related to setting QP and lambda values to rate_control module. --- src/encoderstate.c | 23 ++------- src/rate_control.c | 116 +++++++++++++++++++++++---------------------- src/rate_control.h | 7 +-- 3 files changed, 65 insertions(+), 81 deletions(-) diff --git a/src/encoderstate.c b/src/encoderstate.c index cf8c54e6..31212ebe 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -202,9 +202,7 @@ static void encoder_state_worker_encode_lcu(void * opaque) { const encoder_control_t * const encoder = state->encoder_control; videoframe_t* const frame = state->tile->frame; - state->lambda = state->frame->lambda; - state->lambda_sqrt = sqrt(state->frame->lambda); - state->qp = state->frame->QP; + kvz_set_lcu_lambda_and_qp(state); //This part doesn't write to bitstream, it's only search, deblock and sao @@ -909,23 +907,8 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict encoder_state_remove_refs(state); encoder_state_ref_sort(state); - double lambda; - if (cfg->target_bitrate > 0) { - // Rate control enabled. - lambda = kvz_select_picture_lambda(state); - state->frame->QP = kvz_lambda_to_qp(lambda); - } else { - if (cfg->gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) { - kvz_gop_config const * const gop = - cfg->gop + state->frame->gop_offset; - state->frame->QP = cfg->qp + gop->qp_offset; - state->frame->QP_factor = gop->qp_factor; - } else { - state->frame->QP = cfg->qp; - } - lambda = kvz_select_picture_lambda_from_qp(state); - } - state->frame->lambda = lambda; + + kvz_set_picture_lambda_and_qp(state); encoder_state_init_children(state); } diff --git a/src/rate_control.c b/src/rate_control.c index 6e1f0db8..5be244ae 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -122,73 +122,77 @@ static double pic_allocate_bits(encoder_state_t * const state) return MAX(100, pic_target_bits); } -/** - * \brief Select a lambda value for encoding the next picture - * \param state the main encoder state - * \return lambda for the next picture - * - * Rate control must be enabled (i.e. cfg->target_bitrate > 0) when this - * function is called. - */ -double kvz_select_picture_lambda(encoder_state_t * const state) -{ - const encoder_control_t * const encoder = state->encoder_control; - - assert(encoder->cfg->target_bitrate > 0); - - if (state->frame->num > encoder->owf) { - // At least one frame has been written. - update_rc_parameters(state); - } - - // TODO: take the picture headers into account - const double pic_target_bits = pic_allocate_bits(state); - const double target_bpp = pic_target_bits / encoder->in.pixels_per_pic; - const double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta); - return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); -} - -int8_t kvz_lambda_to_qp(const double lambda) +int8_t lambda_to_qp(const double lambda) { const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5; return CLIP(0, 51, qp); } /** - * \brief Select a lambda value according to current QP value + * \brief Allocate bits and set lambda and QP for the current picture. * \param state the main encoder state - * \return lambda for the next picture - * - * This function should be used to select lambda when rate control is - * disabled. */ -double kvz_select_picture_lambda_from_qp(encoder_state_t const * const state) +void kvz_set_picture_lambda_and_qp(encoder_state_t * const state) { - const int gop_len = state->encoder_control->cfg->gop_len; - const int intra_period = state->encoder_control->cfg->intra_period; - const int keyframe_period = gop_len > 0 ? gop_len : intra_period; - - double lambda = pow(2.0, (state->frame->QP - 12) / 3.0); + const encoder_control_t * const ctrl = state->encoder_control; - if (state->frame->slicetype == KVZ_SLICE_I) { - lambda *= 0.57; - - // Reduce lambda for I-frames according to the number of references. - if (keyframe_period == 0) { - lambda *= 0.5; - } else { - lambda *= 1.0 - CLIP(0.0, 0.5, 0.05 * (keyframe_period - 1)); + if (ctrl->cfg->target_bitrate > 0) { + // Rate control enabled + + if (state->frame->num > ctrl->owf) { + // At least one frame has been written. + update_rc_parameters(state); } - } else if (gop_len > 0) { - lambda *= state->frame->QP_factor; - } else { - lambda *= 0.4624; - } - // Increase lambda if not key-frame. - if (keyframe_period > 0 && state->frame->poc % keyframe_period != 0) { - lambda *= CLIP(2.0, 4.0, (state->frame->QP - 12) / 6.0); + // TODO: take the picture headers into account + const double pic_target_bits = pic_allocate_bits(state); + const double target_bpp = pic_target_bits / ctrl->in.pixels_per_pic; + const double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta); + state->frame->lambda = CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); + state->frame->QP = lambda_to_qp(lambda); + + } else { + // Rate control disabled + kvz_gop_config const * const gop = &ctrl->cfg->gop[state->frame->gop_offset]; + const int gop_len = ctrl->cfg->gop_len; + const int period = gop_len > 0 ? gop_len : ctrl->cfg->intra_period; + + state->frame->QP = ctrl->cfg->qp; + + if (gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) { + state->frame->QP += gop->qp_offset; + } + + double lambda = pow(2.0, (state->frame->QP - 12) / 3.0); + + if (state->frame->slicetype == KVZ_SLICE_I) { + lambda *= 0.57; + + // Reduce lambda for I-frames according to the number of references. + if (period == 0) { + lambda *= 0.5; + } else { + lambda *= 1.0 - CLIP(0.0, 0.5, 0.05 * (period - 1)); + } + } else if (gop_len > 0) { + lambda *= gop->qp_factor; + + } else { + lambda *= 0.4624; + } + + // Increase lambda if not key-frame. + if (period > 0 && state->frame->poc % period != 0) { + lambda *= CLIP(2.0, 4.0, (state->frame->QP - 12) / 6.0); + } + + state->frame->lambda = lambda; } - - return lambda; +} + +void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state) +{ + state->lambda = state->frame->lambda; + state->lambda_sqrt = sqrt(state->frame->lambda); + state->qp = state->frame->QP; } diff --git a/src/rate_control.h b/src/rate_control.h index f0b2befb..b1e9281d 100644 --- a/src/rate_control.h +++ b/src/rate_control.h @@ -30,11 +30,8 @@ #include "encoderstate.h" +void kvz_set_picture_lambda_and_qp(encoder_state_t * const state); -double kvz_select_picture_lambda(encoder_state_t * const state); - -int8_t kvz_lambda_to_qp(const double lambda); - -double kvz_select_picture_lambda_from_qp(encoder_state_t const * const state); +void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state); #endif // RATE_CONTROL_H_ From ff5e5ec6d42f07ef31af65df83965c5c6fcd0800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Wed, 24 Aug 2016 10:16:48 +0900 Subject: [PATCH 06/12] Record info about coded LCUs Adds field lcu_stats to encoder_state_config_frame_t. The following data is recorded for each LCU: - number of bits - squared cost - used lambda value - alpha parameter used for rate control - beta parameter used for rate control --- src/encoder_state-ctors_dtors.c | 5 +++++ src/encoderstate.c | 33 +++++++++++++++++++++++++++++++-- src/encoderstate.h | 26 ++++++++++++++++++++++++++ src/rate_control.c | 8 +++++++- src/rate_control.h | 3 ++- src/search.c | 5 ++++- 6 files changed, 75 insertions(+), 5 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index e53ac4e4..49d438b9 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -53,11 +53,16 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) { state->frame->rc_alpha = 3.2003; state->frame->rc_beta = -1.367; + const encoder_control_t * const encoder = state->encoder_control; + const int num_lcus = encoder->in.width_in_lcu * encoder->in.height_in_lcu; + state->frame->lcu_stats = MALLOC(lcu_stats_t, num_lcus); + return 1; } static void encoder_state_config_frame_finalize(encoder_state_t * const state) { kvz_image_list_destroy(state->frame->ref); + FREE_POINTER(state->frame->lcu_stats); } static int encoder_state_config_tile_init(encoder_state_t * const state, diff --git a/src/encoderstate.c b/src/encoderstate.c index 31212ebe..967cfa8c 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -202,7 +202,7 @@ static void encoder_state_worker_encode_lcu(void * opaque) { const encoder_control_t * const encoder = state->encoder_control; videoframe_t* const frame = state->tile->frame; - kvz_set_lcu_lambda_and_qp(state); + kvz_set_lcu_lambda_and_qp(state, lcu->position); //This part doesn't write to bitstream, it's only search, deblock and sao @@ -241,6 +241,7 @@ static void encoder_state_worker_encode_lcu(void * opaque) { } //Now write data to bitstream (required to have a correct CABAC state) + const uint64_t existing_bits = kvz_bitstream_tell(&state->stream); //First LCU, and we are in a slice. We need a slice header if (state->type == ENCODER_STATE_TYPE_SLICE && lcu->index == 0) { @@ -266,7 +267,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) { //Always 0 since otherwise it would be split kvz_cabac_encode_bin_trm(&state->cabac, 0); // end_of_slice_segment_flag } - + + const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits; + kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits; + //Wavefronts need the context to be copied to the next row if (state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && lcu->index == 1) { int j; @@ -860,6 +864,22 @@ static void encoder_state_init_children(encoder_state_t * const state) { } } +static void normalize_lcu_weights(encoder_state_t * const state) +{ + if (state->frame->num == 0) return; + + const uint32_t num_lcus = state->encoder_control->in.width_in_lcu * + state->encoder_control->in.height_in_lcu; + double sum = 0.0; + for (uint32_t i = 0; i < num_lcus; i++) { + sum += state->frame->lcu_stats[i].weight; + } + + for (uint32_t i = 0; i < num_lcus; i++) { + state->frame->lcu_stats[i].weight /= sum; + } +} + static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_picture* frame) { assert(state->type == ENCODER_STATE_TYPE_MAIN); @@ -908,6 +928,7 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict encoder_state_remove_refs(state); encoder_state_ref_sort(state); + normalize_lcu_weights(state); kvz_set_picture_lambda_and_qp(state); encoder_state_init_children(state); @@ -1045,3 +1066,11 @@ coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth) return SCAN_DIAG; } + +lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y) +{ + const int index = lcu_x + state->tile->lcu_offset_x + + (lcu_y + state->tile->lcu_offset_y) * + state->encoder_control->in.width_in_lcu; + return &state->frame->lcu_stats[index]; +} diff --git a/src/encoderstate.h b/src/encoderstate.h index ab3603b9..d883c11a 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -49,6 +49,23 @@ typedef enum { } encoder_state_type; +typedef struct lcu_stats_t { + //! \brief Number of bits that were spent + uint32_t bits; + + //! \brief Weight of the LCU for rate control + double weight; + + //! \brief Lambda value which was used for this LCU + double lambda; + + //! \brief Rate control alpha parameter + double rc_alpha; + + //! \brief Rate control beta parameter + double rc_beta; +} lcu_stats_t; + typedef struct encoder_state_config_frame_t { /** @@ -114,6 +131,13 @@ typedef struct encoder_state_config_frame_t { */ bool done; + /** + * \brief Information about the coded LCUs. + * + * Used for rate control. + */ + lcu_stats_t *lcu_stats; + } encoder_state_config_frame_t; typedef struct encoder_state_config_tile_t { @@ -249,6 +273,8 @@ void kvz_encoder_get_ref_lists(const encoder_state_t *const state, int ref_list_len_out[2], int ref_list_poc_out[2][16]); +lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y); + static const uint8_t g_group_idx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, diff --git a/src/rate_control.c b/src/rate_control.c index 5be244ae..9ce91959 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -190,9 +190,15 @@ void kvz_set_picture_lambda_and_qp(encoder_state_t * const state) } } -void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state) +void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, + vector2d_t pos) { state->lambda = state->frame->lambda; state->lambda_sqrt = sqrt(state->frame->lambda); state->qp = state->frame->QP; + + lcu_stats_t *lcu_stats = kvz_get_lcu_stats(state, pos.x, pos.y); + lcu_stats->lambda = state->lambda; + lcu_stats->rc_alpha = state->frame->rc_alpha; + lcu_stats->rc_beta = state->frame->rc_beta; } diff --git a/src/rate_control.h b/src/rate_control.h index b1e9281d..1ead1ca6 100644 --- a/src/rate_control.h +++ b/src/rate_control.h @@ -32,6 +32,7 @@ void kvz_set_picture_lambda_and_qp(encoder_state_t * const state); -void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state); +void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, + vector2d_t pos); #endif // RATE_CONTROL_H_ diff --git a/src/search.c b/src/search.c index 440e7dca..6d31a50f 100644 --- a/src/search.c +++ b/src/search.c @@ -949,7 +949,10 @@ void kvz_search_lcu(encoder_state_t * const state, const int x, const int y, con } // Start search from depth 0. - search_cu(state, x, y, 0, work_tree); + double cost = search_cu(state, x, y, 0, work_tree); + + // Save squared cost for rate control. + kvz_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight = cost * cost; // The best decisions through out the LCU got propagated back to depth 0, // so copy those back to the frame. From 3af4e9cc8a06dbdde4eea385e4a4d19bf9b708b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Wed, 24 Aug 2016 11:38:10 +0900 Subject: [PATCH 07/12] Allocate bits separately for each LCU Bits are allocated based on the costs of the LCUs in the previous completely coded frame. Breaks deblock when rate control is used. --- src/encoderstate.h | 3 ++ src/rate_control.c | 77 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/src/encoderstate.h b/src/encoderstate.h index d883c11a..f0a56e3a 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -114,6 +114,9 @@ typedef struct encoder_state_config_frame_t { //! Number of bits targeted for the current GOP. double cur_gop_target_bits; + //! Number of bits targeted for the current picture. + double cur_pic_target_bits; + // Parameters used in rate control double rc_alpha; double rc_beta; diff --git a/src/rate_control.c b/src/rate_control.c index 9ce91959..154570b2 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -30,6 +30,14 @@ static const int SMOOTHING_WINDOW = 40; static const double MIN_LAMBDA = 0.1; static const double MAX_LAMBDA = 10000; +/** + * \brief Clip lambda value to a valid range. + */ +static double clip_lambda(double lambda) { + if (isnan(lambda)) return MAX_LAMBDA; + return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); +} + /** * \brief Update alpha and beta parameters. * \param state the main encoder state @@ -47,7 +55,7 @@ static void update_rc_parameters(encoder_state_t * state) const double alpha_old = state->frame->rc_alpha; const double beta_old = state->frame->rc_beta; // lambda computed from real bpp - const double lambda_comp = CLIP(MIN_LAMBDA, MAX_LAMBDA, alpha_old * pow(bpp, beta_old)); + const double lambda_comp = clip_lambda(alpha_old * pow(bpp, beta_old)); // lambda used in encoding const double lambda_real = state->frame->lambda; const double lambda_log_ratio = log(lambda_real) - log(lambda_comp); @@ -147,9 +155,12 @@ void kvz_set_picture_lambda_and_qp(encoder_state_t * const state) // TODO: take the picture headers into account const double pic_target_bits = pic_allocate_bits(state); const double target_bpp = pic_target_bits / ctrl->in.pixels_per_pic; - const double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta); - state->frame->lambda = CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); - state->frame->QP = lambda_to_qp(lambda); + double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta); + lambda = clip_lambda(lambda); + + state->frame->lambda = lambda; + state->frame->QP = lambda_to_qp(lambda); + state->frame->cur_pic_target_bits = pic_target_bits; } else { // Rate control disabled @@ -190,15 +201,59 @@ void kvz_set_picture_lambda_and_qp(encoder_state_t * const state) } } +/** + * \brief Allocate bits for a LCU. + * \param state the main encoder state + * \param pos location of the LCU as number of LCUs from top left + * \return number of bits allocated for the LCU + */ +static double lcu_allocate_bits(encoder_state_t * const state, + vector2d_t pos) +{ + double lcu_weight; + if (state->frame->num > state->encoder_control->owf) { + lcu_weight = kvz_get_lcu_stats(state, pos.x, pos.y)->weight; + } else { + const uint32_t num_lcus = state->encoder_control->in.width_in_lcu * + state->encoder_control->in.height_in_lcu; + lcu_weight = 1.0 / num_lcus; + } + + // Target number of bits for the current LCU. + const double lcu_target_bits = state->frame->cur_pic_target_bits * lcu_weight; + + // Allocate at least one bit for each LCU. + return MAX(1, lcu_target_bits); +} + void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, vector2d_t pos) { - state->lambda = state->frame->lambda; - state->lambda_sqrt = sqrt(state->frame->lambda); - state->qp = state->frame->QP; + const encoder_control_t * const ctrl = state->encoder_control; - lcu_stats_t *lcu_stats = kvz_get_lcu_stats(state, pos.x, pos.y); - lcu_stats->lambda = state->lambda; - lcu_stats->rc_alpha = state->frame->rc_alpha; - lcu_stats->rc_beta = state->frame->rc_beta; + if (ctrl->cfg->target_bitrate > 0) { + const int32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) * + MIN(LCU_WIDTH, state->tile->frame->height - LCU_WIDTH * pos.y); + const double target_bits = lcu_allocate_bits(state, pos); + const double target_bpp = target_bits / pixels; + const double alpha = state->frame->rc_alpha; + const double beta = state->frame->rc_beta; + + double lambda = alpha * pow(target_bpp, beta); + lambda = clip_lambda(lambda); + + state->qp = lambda_to_qp(lambda); + state->lambda = lambda; + state->lambda_sqrt = sqrt(lambda); + + lcu_stats_t *lcu_stats = kvz_get_lcu_stats(state, pos.x, pos.y); + lcu_stats->lambda = lambda; + lcu_stats->rc_alpha = alpha; + lcu_stats->rc_beta = beta; + + } else { + state->qp = state->frame->QP; + state->lambda = state->frame->lambda; + state->lambda_sqrt = sqrt(state->frame->lambda); + } } From 93172fd25114aa45cc81539b87122fdecca52e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Wed, 24 Aug 2016 12:51:54 +0900 Subject: [PATCH 08/12] Use separate alpha, beta and lambda for each LCU Changes rate control to use the alpha and beta values stored in lcu_stats_t instead of the frame-level values when selecting lambda and QP for an LCU. --- src/rate_control.c | 69 +++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/src/rate_control.c b/src/rate_control.c index 154570b2..a631e52a 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -40,31 +40,28 @@ static double clip_lambda(double lambda) { /** * \brief Update alpha and beta parameters. - * \param state the main encoder state * - * Sets global->rc_alpha and global->rc_beta of the encoder state. + * \param bits number of bits spent for coding the area + * \param pixels size of the area in pixels + * \param lambda_real lambda used for coding the area + * \param[in,out] alpha alpha parameter to update + * \param[in,out] beta beta parameter to update */ -static void update_rc_parameters(encoder_state_t * state) +static void update_parameters(uint32_t bits, + uint32_t pixels, + double lambda_real, + double *alpha, + double *beta) { - const encoder_control_t * const encoder = state->encoder_control; - - const double pixels_per_picture = encoder->in.width * encoder->in.height; - const double bpp = state->stats_bitstream_length * 8 / pixels_per_picture; - const double log_bpp = log(bpp); - - const double alpha_old = state->frame->rc_alpha; - const double beta_old = state->frame->rc_beta; - // lambda computed from real bpp - const double lambda_comp = clip_lambda(alpha_old * pow(bpp, beta_old)); - // lambda used in encoding - const double lambda_real = state->frame->lambda; + const double bpp = bits / (double)pixels; + const double lambda_comp = clip_lambda(*alpha * pow(bpp, *beta)); const double lambda_log_ratio = log(lambda_real) - log(lambda_comp); - const double alpha = alpha_old + 0.1 * lambda_log_ratio * alpha_old; - state->frame->rc_alpha = CLIP(0.05, 20, alpha); + *alpha += 0.10 * lambda_log_ratio * (*alpha); + *alpha = CLIP(0.05, 20, *alpha); - const double beta = beta_old + 0.05 * lambda_log_ratio * CLIP(-5, 1, log_bpp); - state->frame->rc_beta = CLIP(-3, -0.1, beta); + *beta += 0.05 * lambda_log_ratio * CLIP(-5.0, -1.0, log(bpp)); + *beta = CLIP(-3, -0.1, *beta); } /** @@ -149,7 +146,11 @@ void kvz_set_picture_lambda_and_qp(encoder_state_t * const state) if (state->frame->num > ctrl->owf) { // At least one frame has been written. - update_rc_parameters(state); + update_parameters(state->stats_bitstream_length * 8, + ctrl->in.pixels_per_pic, + state->frame->lambda, + &state->frame->rc_alpha, + &state->frame->rc_beta); } // TODO: take the picture headers into account @@ -232,24 +233,30 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, const encoder_control_t * const ctrl = state->encoder_control; if (ctrl->cfg->target_bitrate > 0) { - const int32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) * + lcu_stats_t *lcu = kvz_get_lcu_stats(state, pos.x, pos.y); + const uint32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) * MIN(LCU_WIDTH, state->tile->frame->height - LCU_WIDTH * pos.y); + + if (state->frame->num > ctrl->owf) { + update_parameters(lcu->bits, + pixels, + lcu->lambda, + &lcu->rc_alpha, + &lcu->rc_beta); + } else { + lcu->rc_alpha = state->frame->rc_alpha; + lcu->rc_beta = state->frame->rc_beta; + } + const double target_bits = lcu_allocate_bits(state, pos); const double target_bpp = target_bits / pixels; - const double alpha = state->frame->rc_alpha; - const double beta = state->frame->rc_beta; - double lambda = alpha * pow(target_bpp, beta); - lambda = clip_lambda(lambda); + double lambda = clip_lambda(lcu->rc_alpha * pow(target_bpp, lcu->rc_beta)); - state->qp = lambda_to_qp(lambda); + lcu->lambda = lambda; state->lambda = lambda; state->lambda_sqrt = sqrt(lambda); - - lcu_stats_t *lcu_stats = kvz_get_lcu_stats(state, pos.x, pos.y); - lcu_stats->lambda = lambda; - lcu_stats->rc_alpha = alpha; - lcu_stats->rc_beta = beta; + state->qp = lambda_to_qp(lambda); } else { state->qp = state->frame->QP; From 82a98180e43eaf85da60ad76bbdc90161d8b3500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Wed, 24 Aug 2016 12:57:31 +0900 Subject: [PATCH 09/12] Clip LCU lambda to reduce quality fluctuation Limits lambdas for each LCU based on the computed lambda from the previous frame and the frame-level lambda. --- src/rate_control.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/rate_control.c b/src/rate_control.c index a631e52a..76c435a3 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -252,6 +252,19 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, const double target_bpp = target_bits / pixels; double lambda = clip_lambda(lcu->rc_alpha * pow(target_bpp, lcu->rc_beta)); + // Clip lambda according to the equations 24 and 26 in + // https://doi.org/10.1109/TIP.2014.2336550 + if (state->frame->num > ctrl->owf) { + const double bpp = lcu->bits / (double)pixels; + const double lambda_comp = clip_lambda(lcu->rc_alpha * pow(bpp, lcu->rc_beta)); + lambda = CLIP(lambda_comp * 0.7937005259840998, + lambda_comp * 1.2599210498948732, + lambda); + } + lambda = CLIP(state->frame->lambda * 0.6299605249474366, + state->frame->lambda * 1.5874010519681994, + lambda); + lambda = clip_lambda(lambda); lcu->lambda = lambda; state->lambda = lambda; From c219d3cd944993c536c038d7ca0e17363380a0d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Tue, 27 Sep 2016 20:39:37 +0900 Subject: [PATCH 10/12] Fix deblock when CU QP delta is enabled Fixes deblock functions so that they use the correct QP for the filtered edge. Adds field qp to cu_info_t. --- src/cu.h | 7 +++++ src/encoderstate.c | 75 +++++++++++++++++++++++++++++++++++++++++++++- src/filter.c | 27 +++++++++++++++-- 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/src/cu.h b/src/cu.h index cf2a4e9a..6b5bdf38 100644 --- a/src/cu.h +++ b/src/cu.h @@ -126,6 +126,13 @@ typedef struct uint16_t cbf; + /** + * \brief QP used for the CU. + * + * This is required for deblocking when per-LCU QPs are enabled. + */ + uint8_t qp; + union { struct { int8_t mode; diff --git a/src/encoderstate.c b/src/encoderstate.c index 967cfa8c..825a7373 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -196,7 +196,76 @@ static void encode_sao(encoder_state_t * const state, } -static void encoder_state_worker_encode_lcu(void * opaque) { +/** + * \brief Sets the QP for each CU in state->tile->frame->cu_array. + * + * The QPs are used in deblocking. + * + * The delta QP for an LCU is coded when the first CU with coded block flag + * set is encountered. Hence, for the purposes of deblocking, all CUs + * before the first one with cbf set use state->ref_qp and all CUs after + * that use state->qp. + * + * \param state encoder state + * \param x x-coordinate of the left edge of the root CU + * \param y y-coordinate of the top edge of the root CU + * \param depth depth in the CU quadtree + * \param coeffs_coded Used for tracking whether a CU with a residual + * has been encountered. Should be set to false at + * the top level. + * \return Whether there were any CUs with residual or not. + */ +static bool set_cu_qps(encoder_state_t *state, int x, int y, int depth, bool coeffs_coded) +{ + if (state->qp == state->ref_qp) { + // If the QPs are equal there is no need to care about the residuals. + coeffs_coded = true; + } + + cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y); + const int cu_width = LCU_WIDTH >> depth; + coeffs_coded = coeffs_coded || cbf_is_set_any(cu->cbf, cu->depth); + + if (!coeffs_coded && cu->depth > depth) { + // Recursively process sub-CUs. + const int d = cu_width >> 1; + coeffs_coded = set_cu_qps(state, x, y, depth + 1, coeffs_coded); + coeffs_coded = set_cu_qps(state, x + d, y, depth + 1, coeffs_coded); + coeffs_coded = set_cu_qps(state, x, y + d, depth + 1, coeffs_coded); + coeffs_coded = set_cu_qps(state, x + d, y + d, depth + 1, coeffs_coded); + + } else { + if (!coeffs_coded && cu->tr_depth > depth) { + // The CU is split into smaller transform units. Check whether coded + // block flag is set for any of the TUs. + const int tu_width = LCU_WIDTH >> cu->tr_depth; + for (int y_scu = y; y_scu < y + cu_width; y_scu += tu_width) { + for (int x_scu = x; x_scu < x + cu_width; x_scu += tu_width) { + cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu); + if (cbf_is_set_any(tu->cbf, cu->depth)) { + coeffs_coded = true; + } + } + } + } + + // Set the correct QP for all state->tile->frame->cu_array elements in + // the area covered by the CU. + const int8_t qp = coeffs_coded ? state->qp : state->ref_qp; + + for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) { + for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) { + kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp; + } + } + } + + return coeffs_coded; +} + + +static void encoder_state_worker_encode_lcu(void * opaque) +{ const lcu_order_element_t * const lcu = opaque; encoder_state_t *state = lcu->encoder_state; const encoder_control_t * const encoder = state->encoder_control; @@ -211,6 +280,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) { encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search); if (encoder->deblock_enable) { + if (encoder->cfg->target_bitrate > 0) { + set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, false); + } + kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y); } diff --git a/src/filter.c b/src/filter.c index dce73199..d96db710 100644 --- a/src/filter.c +++ b/src/filter.c @@ -247,6 +247,27 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir) } } +static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir) +{ + if (state->encoder_control->cfg->target_bitrate <= 0) { + return state->qp; + } + + int32_t qp_p; + if (dir == EDGE_HOR && y > 0) { + qp_p = kvz_cu_array_at_const(state->tile->frame->cu_array, x, y - 1)->qp; + } else if (dir == EDGE_VER && x > 0) { + qp_p = kvz_cu_array_at_const(state->tile->frame->cu_array, x - 1, y)->qp; + } else { + qp_p = state->frame->QP; + } + + const int32_t qp_q = + kvz_cu_array_at_const(state->tile->frame->cu_array, x, y)->qp; + + return (qp_p + qp_q + 1) >> 1; +} + /** * \brief Apply the deblocking filter to luma pixels on a single edge. * @@ -290,8 +311,9 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, kvz_pixel *orig_src = &frame->rec->y[x + y*stride]; kvz_pixel *src = orig_src; + const int32_t qp = get_qp_y_pred(state, x, y, dir); + int8_t strength = 0; - int32_t qp = state->qp; int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8); int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1)); int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale; @@ -490,7 +512,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, }; int8_t strength = 2; - int32_t QP = kvz_g_chroma_scale[state->qp]; + const int32_t luma_qp = get_qp_y_pred(state, x << 1, y << 1, dir); + int32_t QP = kvz_g_chroma_scale[luma_qp]; int32_t bitdepth_scale = 1 << (encoder->bitdepth-8); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale; From ee518e8ac4f3b82f3d3a1ed4494ad38ba1796142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Thu, 6 Oct 2016 21:27:01 +0900 Subject: [PATCH 11/12] Take header bits into account in rate control --- src/encoder_state-bitstream.c | 6 ++--- src/encoderstate.h | 13 ++++++++++ src/rate_control.c | 45 ++++++++++++++++++++++++++++++++--- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index edeef983..cb2be0f9 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -918,10 +918,8 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state) first_nal_in_au = false; encoder_state_write_bitstream_aud(state); } - - if ((encoder->vps_period > 0 && state->frame->num % encoder->vps_period == 0) - || (state->frame->num == 0 && encoder->vps_period >= 0)) - { + + if (encoder_state_must_write_vps(state)) { first_nal_in_au = false; kvz_encoder_state_write_parameter_sets(&state->stream, state); } diff --git a/src/encoderstate.h b/src/encoderstate.h index f0a56e3a..5354bc34 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -278,6 +278,19 @@ void kvz_encoder_get_ref_lists(const encoder_state_t *const state, lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y); + +/** + * Whether the parameter sets should be written with the current frame. + */ +static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state) +{ + const int32_t frame = state->frame->num; + const int32_t vps_period = state->encoder_control->vps_period; + + return (vps_period > 0 && frame % vps_period == 0) || + (vps_period >= 0 && frame == 0); +} + static const uint8_t g_group_idx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, diff --git a/src/rate_control.c b/src/rate_control.c index 76c435a3..aa770528 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -95,10 +95,49 @@ static double gop_allocate_bits(encoder_state_t * const state) return MAX(200, gop_target_bits); } +/** + * Estimate number of bits used for headers of the current picture. + * \param state the main encoder state + * \return number of header bits + */ +static uint64_t pic_header_bits(encoder_state_t * const state) +{ + const kvz_config* cfg = state->encoder_control->cfg; + + // nal type and slice header + uint64_t bits = 48 + 24; + + // entry points + bits += 12 * state->encoder_control->in.height_in_lcu; + + switch (cfg->hash) { + case KVZ_HASH_CHECKSUM: + bits += 168; + break; + + case KVZ_HASH_MD5: + bits += 456; + break; + + case KVZ_HASH_NONE: + break; + } + + if (encoder_state_must_write_vps(state)) { + bits += 613; + } + + if (state->frame->num == 0 && cfg->add_encoder_info) { + bits += 1392; + } + + return bits; +} + /** * Allocate bits for the current picture. * \param state the main encoder state - * \return target number of bits + * \return target number of bits, excluding headers */ static double pic_allocate_bits(encoder_state_t * const state) { @@ -122,7 +161,8 @@ static double pic_allocate_bits(encoder_state_t * const state) const double pic_weight = encoder->gop_layer_weights[ encoder->cfg->gop[state->frame->gop_offset].layer - 1]; - double pic_target_bits = state->frame->cur_gop_target_bits * pic_weight; + const double pic_target_bits = + state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state); // Allocate at least 100 bits for each picture like HM does. return MAX(100, pic_target_bits); } @@ -153,7 +193,6 @@ void kvz_set_picture_lambda_and_qp(encoder_state_t * const state) &state->frame->rc_beta); } - // TODO: take the picture headers into account const double pic_target_bits = pic_allocate_bits(state); const double target_bpp = pic_target_bits / ctrl->in.pixels_per_pic; double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta); From 05794c3548400ab5d9f958e9faaeecedaae380d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arttu=20Yl=C3=A4-Outinen?= Date: Wed, 11 Jan 2017 15:47:53 +0900 Subject: [PATCH 12/12] Add missing static to function lambda_to_qp --- src/rate_control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rate_control.c b/src/rate_control.c index aa770528..e9c6096e 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -167,7 +167,7 @@ static double pic_allocate_bits(encoder_state_t * const state) return MAX(100, pic_target_bits); } -int8_t lambda_to_qp(const double lambda) +static int8_t lambda_to_qp(const double lambda) { const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5; return CLIP(0, 51, qp);