/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see .
****************************************************************************/
#include "rate_control.h"
#include
#include "encoder.h"
#include "kvazaar.h"
static const int SMOOTHING_WINDOW = 40;
static const double MIN_LAMBDA = 0.1;
static const double MAX_LAMBDA = 10000;
/**
* \brief Clip lambda value to a valid range.
*/
static double clip_lambda(double lambda) {
if (isnan(lambda)) return MAX_LAMBDA;
return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda);
}
/**
* \brief Update alpha and beta parameters.
*
* \param bits number of bits spent for coding the area
* \param pixels size of the area in pixels
* \param lambda_real lambda used for coding the area
* \param[in,out] alpha alpha parameter to update
* \param[in,out] beta beta parameter to update
*/
static void update_parameters(uint32_t bits,
uint32_t pixels,
double lambda_real,
double *alpha,
double *beta)
{
const double bpp = bits / (double)pixels;
const double lambda_comp = clip_lambda(*alpha * pow(bpp, *beta));
const double lambda_log_ratio = log(lambda_real) - log(lambda_comp);
*alpha += 0.10 * lambda_log_ratio * (*alpha);
*alpha = CLIP(0.05, 20, *alpha);
*beta += 0.05 * lambda_log_ratio * CLIP(-5.0, -1.0, log(bpp));
*beta = CLIP(-3, -0.1, *beta);
}
/**
* \brief Allocate bits for the current GOP.
* \param state the main encoder state
* \return target number of bits
*/
static double gop_allocate_bits(encoder_state_t * const state)
{
const encoder_control_t * const encoder = state->encoder_control;
// At this point, total_bits_coded of the current state contains the
// number of bits written encoder->owf frames before the current frame.
uint64_t bits_coded = state->frame->total_bits_coded;
int pictures_coded = MAX(0, state->frame->num - encoder->cfg.owf);
int gop_offset = (state->frame->gop_offset - encoder->cfg.owf) % MAX(1, encoder->cfg.gop_len);
if (encoder->cfg.gop_len > 0 && gop_offset != encoder->cfg.gop_len - 1 && encoder->cfg.gop_lp_definition.d == 0) {
// Subtract number of bits in the partially coded GOP.
bits_coded -= state->frame->cur_gop_bits_coded;
// Subtract number of pictures in the partially coded GOP.
pictures_coded -= gop_offset + 1;
}
// Equation 12 from https://doi.org/10.1109/TIP.2014.2336550
double gop_target_bits =
(encoder->target_avg_bppic * (pictures_coded + SMOOTHING_WINDOW) - bits_coded)
* MAX(1, encoder->cfg.gop_len) / SMOOTHING_WINDOW;
// Allocate at least 200 bits for each GOP like HM does.
return MAX(200, gop_target_bits);
}
/**
* Estimate number of bits used for headers of the current picture.
* \param state the main encoder state
* \return number of header bits
*/
static uint64_t pic_header_bits(encoder_state_t * const state)
{
const kvz_config* cfg = &state->encoder_control->cfg;
// nal type and slice header
uint64_t bits = 48 + 24;
// entry points
bits += 12 * state->encoder_control->in.height_in_lcu;
switch (cfg->hash) {
case KVZ_HASH_CHECKSUM:
bits += 168;
break;
case KVZ_HASH_MD5:
bits += 456;
break;
case KVZ_HASH_NONE:
break;
}
if (encoder_state_must_write_vps(state)) {
bits += 613;
}
if (state->frame->num == 0 && cfg->add_encoder_info) {
bits += 1392;
}
return bits;
}
/**
* Allocate bits for the current picture.
* \param state the main encoder state
* \return target number of bits, excluding headers
*/
static double pic_allocate_bits(encoder_state_t * const state)
{
const encoder_control_t * const encoder = state->encoder_control;
if (encoder->cfg.gop_len == 0 ||
state->frame->gop_offset == 0 ||
state->frame->num == 0)
{
// A new GOP starts at this frame.
state->frame->cur_gop_target_bits = gop_allocate_bits(state);
state->frame->cur_gop_bits_coded = 0;
} else {
state->frame->cur_gop_target_bits =
state->previous_encoder_state->frame->cur_gop_target_bits;
}
if (encoder->cfg.gop_len <= 0) {
return state->frame->cur_gop_target_bits;
}
const double pic_weight = encoder->gop_layer_weights[
encoder->cfg.gop[state->frame->gop_offset].layer - 1];
const double pic_target_bits =
state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state);
// Allocate at least 100 bits for each picture like HM does.
return MAX(100, pic_target_bits);
}
static double solve_cubic_equation(const encoder_state_config_frame_t * const state,
int ctu_index,
int last_ctu,
int layer,
double est_lambda,
double target_bits)
{
double bestlambda = 0.0;
double paraA = 0.0;
double paraB = 0.0;
double paraC = 0.0;
double paraD = 0.0;
double delta = 0.0;
double paraAA = 0.0;
double paraBB = 0.0;
double paraCC = 0.0;
for (int i = ctu_index; i < last_ctu; i++)
{
double a = 0.0;
double b = 0.0;
double c = 0.0;
double d = 0.0;
assert((state->new_lookahead.c_para[layer][i] <= 0) || (state->new_lookahead.k_para[layer][i] >= 0)); //Check C and K during each solution
double CLCU = state->new_lookahead.c_para[layer][i];
double KLCU = state->new_lookahead.k_para[layer][i];
a = -CLCU * KLCU / pow(state->lcu_stats[i].pixels, KLCU - 1.0);
b = -1.0 / (KLCU - 1.0);
d = est_lambda;
c = pow(a / d, b);
paraA = paraA - c * pow(b, 3.0) / 6.0;
paraB = paraB + (pow(b, 2.0) / 2.0 + pow(b, 3.0)*log(d) / 2.0)*c;
paraC = paraC - (pow(b, 3.0) / 2.0*pow(log(d), 2.0) + pow(b, 2.0)*log(d) + b)*c;
paraD = paraD + c * (1 + b * log(d) + pow(b, 2.0) / 2 * pow(log(d), 2.0) + pow(b, 3.0) / 6 * pow(log(d), 3.0));
}
paraD = paraD - target_bits;
paraAA = paraB * paraB - 3 * paraA*paraC;
paraBB = paraB * paraC - 9 * paraA*paraD;
paraCC = paraC * paraC - 3 * paraB*paraD;
delta = paraBB * paraBB - 4 * paraAA*paraCC;
if (delta > 0.0) //Check whether delta is right
{
double tempx = 0.0;
double part1 = 0.0;
double part2 = 0.0;
double flag1 = 0.0;
double flag2 = 0.0;
part1 = paraAA * paraB + 3 * paraA*(-paraBB - pow(delta, 0.5)) / 2.0;
part2 = paraAA * paraB + 3 * paraA*(-paraBB + pow(delta, 0.5)) / 2.0;
if (part1 < 0.0) {
part1 = -part1;
flag1 = -1.0;
}
else {
flag1 = 1.0;
}
if (part2 < 0.0) {
part2 = -part2;
flag2 = -1.0;
}
else {
flag2 = 1.0;
}
tempx = (-paraB - flag1 * pow(part1, 1.0 / 3.0) - flag2 * pow(part2, 1.0 / 3.0)) / 3 / paraA;
bestlambda = exp(tempx);
}
else {
bestlambda = est_lambda; //Use the original picture estimated lambda for the current CTU
}
bestlambda = CLIP(0.001, 100000000.0, bestlambda);
return bestlambda;
}
static INLINE double calculate_weights(encoder_state_t* const state, const int layer, const int ctu_count, double estLambda) {
double total_weight = 0;
for(int i = 0; i < ctu_count; i++) {
double CLCU = state->frame->new_lookahead.c_para[layer][i];
double KLCU = state->frame->new_lookahead.k_para[layer][i];
double a = -CLCU * KLCU / pow(state->frame->lcu_stats[i].pixels, KLCU - 1.0);
double b = -1.0 / (KLCU - 1.0);
state->frame->lcu_stats[i].weight = pow(a / estLambda, b);
if (state->frame->lcu_stats[i].weight < 0.01) {
state->frame->lcu_stats[i].weight = 0.01;
}
total_weight += state->frame->lcu_stats[i].weight;
}
return total_weight;
}
void estimatePicLambda(encoder_state_t * const state) {
double bits = pic_allocate_bits(state);
const int layer = state->frame->gop_offset - (state->frame->is_irap ? 1 : 0);
const int ctu_count = state->tile->frame->height_in_lcu * state->tile->frame->width_in_lcu;
double alpha;
double beta;
if(state->frame->poc == 0) {
alpha = state->frame->rc_alpha;
beta = state->frame->rc_beta;
}
else {
alpha = -state->frame->new_lookahead.pic_c_para[state->frame->gop_offset] *
state->frame->new_lookahead.pic_k_para[state->frame->gop_offset];
beta = state->frame->new_lookahead.pic_k_para[state->frame->gop_offset] - 1;
}
double estLambda;
double bpp = bits / (state->encoder_control->cfg.width * state->encoder_control->cfg.height);
if (state->frame->is_irap) {
// TODO: Intra
estLambda = alpha * pow(bpp, beta) * 0.5;
}
else {
estLambda = alpha * pow(bpp, beta);
}
double temp_lambda;
if ((temp_lambda = state->frame->new_lookahead.previous_lambdas[layer]) > 0.0) {
estLambda = CLIP(temp_lambda * pow(2.0, -1), temp_lambda * 2, estLambda);
}
if((temp_lambda = state->frame->new_lookahead.last_frame_lambda) > 0.0) {
estLambda = CLIP(temp_lambda * pow(2.0, -10.0 / 3.0), temp_lambda * pow(2.0, 10.0 / 3.0), estLambda);
}
estLambda = MIN(estLambda, 0.1);
double total_weight = 0;
if(!state->frame->is_irap) {
if(!state->encoder_control->cfg.frame_allocation) {
double best_lambda = 0.0;
temp_lambda = estLambda;
double taylor_e3;
int iteration_number = 0;
do {
taylor_e3 = 0.0;
best_lambda = temp_lambda = solve_cubic_equation(state->frame, 0, ctu_count, layer, temp_lambda, bits);
for (int i = 0; i < ctu_count; ++i) {
double CLCU = state->frame->new_lookahead.c_para[layer][i];
double KLCU = state->frame->new_lookahead.k_para[layer][i];
double a = -CLCU * KLCU / pow(state->frame->lcu_stats[i].pixels, KLCU - 1.0);
double b = -1.0 / (KLCU - 1.0);
taylor_e3 += pow(a / best_lambda, b);
}
}
while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
}
total_weight = calculate_weights(state, layer, ctu_count, estLambda);
}
else {
for (int i = 0; i < ctu_count; ++i) {
state->frame->lcu_stats[i].weight = MAX(0.01,
state->frame->lcu_stats[i].pixels * pow(estLambda / state->frame->rc_alpha,
1.0 / state->frame->rc_beta));
total_weight += state->frame->lcu_stats[i].weight;
}
}
for(int i = 0; i < ctu_count; ++i) {
state->frame->lcu_stats[i].weight = bits * state->frame->lcu_stats[i].weight / total_weight;
}
state->frame->lambda = estLambda;
}
static int8_t lambda_to_qp(const double lambda)
{
const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5;
return CLIP_TO_QP(qp);
}
static double qp_to_lamba(encoder_state_t * const state, int qp)
{
const encoder_control_t * const ctrl = state->encoder_control;
const int gop_len = ctrl->cfg.gop_len;
const int period = gop_len > 0 ? gop_len : ctrl->cfg.intra_period;
kvz_gop_config const * const gop = &ctrl->cfg.gop[state->frame->gop_offset];
double lambda = pow(2.0, (qp - 12) / 3.0);
if (state->frame->slicetype == KVZ_SLICE_I) {
lambda *= 0.57;
// Reduce lambda for I-frames according to the number of references.
if (period == 0) {
lambda *= 0.5;
} else {
lambda *= 1.0 - CLIP(0.0, 0.5, 0.05 * (period - 1));
}
} else if (gop_len > 0) {
lambda *= gop->qp_factor;
} else {
lambda *= 0.4624;
}
// Increase lambda if not key-frame.
if (period > 0 && state->frame->poc % period != 0) {
lambda *= CLIP(2.0, 4.0, (state->frame->QP - 12) / 6.0);
}
return lambda;
}
/**
* \brief Allocate bits and set lambda and QP for the current picture.
* \param state the main encoder state
*/
void kvz_set_picture_lambda_and_qp(encoder_state_t * const state)
{
const encoder_control_t * const ctrl = state->encoder_control;
if (ctrl->cfg.target_bitrate > 0) {
// Rate control enabled
if (state->frame->num > ctrl->cfg.owf) {
// At least one frame has been written.
update_parameters(state->stats_bitstream_length * 8,
ctrl->in.pixels_per_pic,
state->frame->lambda,
&state->frame->rc_alpha,
&state->frame->rc_beta);
}
const double pic_target_bits = pic_allocate_bits(state);
const double target_bpp = pic_target_bits / ctrl->in.pixels_per_pic;
double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta);
lambda = clip_lambda(lambda);
state->frame->lambda = lambda;
state->frame->QP = lambda_to_qp(lambda);
state->frame->cur_pic_target_bits = pic_target_bits;
} else {
// Rate control disabled
kvz_gop_config const * const gop = &ctrl->cfg.gop[state->frame->gop_offset];
const int gop_len = ctrl->cfg.gop_len;
if (gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) {
state->frame->QP = CLIP_TO_QP(ctrl->cfg.qp + gop->qp_offset);
} else {
state->frame->QP = ctrl->cfg.qp;
}
state->frame->lambda = qp_to_lamba(state, state->frame->QP);
}
}
/**
* \brief Allocate bits for a LCU.
* \param state the main encoder state
* \param pos location of the LCU as number of LCUs from top left
* \return number of bits allocated for the LCU
*/
static double lcu_allocate_bits(encoder_state_t * const state,
vector2d_t pos)
{
double lcu_weight;
if (state->frame->num > state->encoder_control->cfg.owf) {
lcu_weight = kvz_get_lcu_stats(state, pos.x, pos.y)->weight;
} else {
const uint32_t num_lcus = state->encoder_control->in.width_in_lcu *
state->encoder_control->in.height_in_lcu;
lcu_weight = 1.0 / num_lcus;
}
// Target number of bits for the current LCU.
const double lcu_target_bits = state->frame->cur_pic_target_bits * lcu_weight;
// Allocate at least one bit for each LCU.
return MAX(1, lcu_target_bits);
}
void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
vector2d_t pos)
{
const encoder_control_t * const ctrl = state->encoder_control;
if (ctrl->cfg.roi.dqps != NULL) {
vector2d_t lcu = {
pos.x + state->tile->lcu_offset_x,
pos.y + state->tile->lcu_offset_y
};
vector2d_t roi = {
lcu.x * ctrl->cfg.roi.width / ctrl->in.width_in_lcu,
lcu.y * ctrl->cfg.roi.height / ctrl->in.height_in_lcu
};
int roi_index = roi.x + roi.y * ctrl->cfg.roi.width;
int dqp = ctrl->cfg.roi.dqps[roi_index];
state->qp = CLIP_TO_QP(state->frame->QP + dqp);
state->lambda = qp_to_lamba(state, state->qp);
state->lambda_sqrt = sqrt(state->lambda);
}
else if (ctrl->cfg.target_bitrate > 0) {
lcu_stats_t *lcu = kvz_get_lcu_stats(state, pos.x, pos.y);
const uint32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) *
MIN(LCU_WIDTH, state->tile->frame->height - LCU_WIDTH * pos.y);
if (state->frame->num > ctrl->cfg.owf) {
update_parameters(lcu->bits,
pixels,
lcu->lambda,
&lcu->rc_alpha,
&lcu->rc_beta);
} else {
lcu->rc_alpha = state->frame->rc_alpha;
lcu->rc_beta = state->frame->rc_beta;
}
const double target_bits = lcu_allocate_bits(state, pos);
const double target_bpp = target_bits / pixels;
double lambda = clip_lambda(lcu->rc_alpha * pow(target_bpp, lcu->rc_beta));
// Clip lambda according to the equations 24 and 26 in
// https://doi.org/10.1109/TIP.2014.2336550
if (state->frame->num > ctrl->cfg.owf) {
const double bpp = lcu->bits / (double)pixels;
const double lambda_comp = clip_lambda(lcu->rc_alpha * pow(bpp, lcu->rc_beta));
lambda = CLIP(lambda_comp * 0.7937005259840998,
lambda_comp * 1.2599210498948732,
lambda);
}
lambda = CLIP(state->frame->lambda * 0.6299605249474366,
state->frame->lambda * 1.5874010519681994,
lambda);
lambda = clip_lambda(lambda);
lcu->lambda = lambda;
state->lambda = lambda;
state->lambda_sqrt = sqrt(lambda);
state->qp = lambda_to_qp(lambda);
} else {
state->qp = state->frame->QP;
state->lambda = state->frame->lambda;
state->lambda_sqrt = sqrt(state->frame->lambda);
}
}