Merge branch 'rate-control-improvements'

This commit is contained in:
Arttu Ylä-Outinen 2017-01-11 15:54:42 +09:00
commit 2318bd77ed
22 changed files with 583 additions and 219 deletions

View file

@ -60,6 +60,7 @@ typedef struct
cabac_ctx_t trans_subdiv_model[3]; //!< \brief intra mode context models
cabac_ctx_t qt_cbf_model_luma[4];
cabac_ctx_t qt_cbf_model_chroma[4];
cabac_ctx_t cu_qp_delta_abs[4];
cabac_ctx_t part_size_model[4];
cabac_ctx_t cu_sig_coeff_group_model[4];
cabac_ctx_t cu_sig_model_luma[27];

View file

@ -121,6 +121,12 @@ static const uint8_t INIT_QT_CBF[3][8] = {
{ 111, 141, CNU, CNU, 94, 138, 182, 154 },
};
static const uint8_t INIT_CU_QP_DELTA_ABS[3][2] = {
{ 154, 154 },
{ 154, 154 },
{ 154, 154 },
};
static const uint8_t INIT_SIG_CG_FLAG[3][4] = {
{ 121, 140, 61, 154 },
{ 121, 140, 61, 154 },
@ -243,6 +249,9 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
kvz_ctx_init(&cabac->ctx.mvp_idx_model[0], QP, INIT_MVP_IDX[slice][0]);
kvz_ctx_init(&cabac->ctx.mvp_idx_model[1], QP, INIT_MVP_IDX[slice][1]);
kvz_ctx_init(&cabac->ctx.cu_qp_delta_abs[0], QP, INIT_CU_QP_DELTA_ABS[slice][0]);
kvz_ctx_init(&cabac->ctx.cu_qp_delta_abs[1], QP, INIT_CU_QP_DELTA_ABS[slice][1]);
for (i = 0; i < 4; i++) {
kvz_ctx_init(&cabac->ctx.cu_sig_coeff_group_model[i], QP, INIT_SIG_CG_FLAG[slice][i]);
kvz_ctx_init(&cabac->ctx.cu_abs_model_luma[i], QP, INIT_ABS_FLAG[slice][i]);

View file

@ -126,6 +126,13 @@ typedef struct
uint16_t cbf;
/**
* \brief QP used for the CU.
*
* This is required for deblocking when per-LCU QPs are enabled.
*/
uint8_t qp;
union {
struct {
int8_t mode;

View file

@ -517,6 +517,28 @@ static void encode_transform_coeff(encoder_state_t * const state,
}
if (cb_flag_y | cb_flag_u | cb_flag_v) {
if (state->must_code_qp_delta) {
const int qp_delta = state->qp - state->ref_qp;
const int qp_delta_abs = ABS(qp_delta);
cabac_data_t* cabac = &state->cabac;
// cu_qp_delta_abs prefix
cabac->cur_ctx = &cabac->ctx.cu_qp_delta_abs[0];
kvz_cabac_write_unary_max_symbol(cabac, cabac->ctx.cu_qp_delta_abs, MIN(qp_delta_abs, 5), 1, 5);
if (qp_delta_abs >= 5) {
// cu_qp_delta_abs suffix
kvz_cabac_write_ep_ex_golomb(state, cabac, qp_delta_abs - 5, 0);
}
if (qp_delta != 0) {
CABAC_BIN_EP(cabac, (qp_delta >= 0 ? 0 : 1), "qp_delta_sign_flag");
}
state->must_code_qp_delta = false;
state->ref_qp = state->qp;
}
encode_transform_unit(state, x_pu, y_pu, depth);
}
}
@ -894,14 +916,16 @@ static void encode_part_mode(encoder_state_t * const state,
}
void kvz_encode_coding_tree(encoder_state_t * const state,
uint16_t x_ctb, uint16_t y_ctb, uint8_t depth)
uint16_t x_ctb,
uint16_t y_ctb,
uint8_t depth)
{
cabac_data_t * const cabac = &state->cabac;
const videoframe_t * const frame = state->tile->frame;
const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x_ctb, y_ctb);
uint8_t split_flag = GET_SPLITDATA(cur_cu, depth);
uint8_t split_model = 0;
//Absolute ctb
uint16_t abs_x_ctb = x_ctb + (state->tile->lcu_offset_x * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH);
uint16_t abs_y_ctb = y_ctb + (state->tile->lcu_offset_y * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH);

View file

@ -29,7 +29,7 @@
#include "encoderstate.h"
#include "global.h"
void kvz_encode_coding_tree(encoder_state_t *state,
void kvz_encode_coding_tree(encoder_state_t * const state,
uint16_t x_ctb,
uint16_t y_ctb,
uint8_t depth);

View file

@ -451,9 +451,14 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
WRITE_SE(stream, ((int8_t)encoder->cfg->qp) - 26, "pic_init_qp_minus26");
WRITE_U(stream, 0, 1, "constrained_intra_pred_flag");
WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag");
WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
//if cu_qp_delta_enabled_flag
//WRITE_UE(stream, 0, "diff_cu_qp_delta_depth");
if (encoder->cfg->target_bitrate > 0) {
// Use separate QP for each LCU when rate control is enabled.
WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag");
WRITE_UE(stream, 0, "diff_cu_qp_delta_depth");
} else {
WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
}
//TODO: add QP offsets
WRITE_SE(stream, 0, "pps_cb_qp_offset");
@ -913,10 +918,8 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state)
first_nal_in_au = false;
encoder_state_write_bitstream_aud(state);
}
if ((encoder->vps_period > 0 && state->frame->num % encoder->vps_period == 0)
|| (state->frame->num == 0 && encoder->vps_period >= 0))
{
if (encoder_state_must_write_vps(state)) {
first_nal_in_au = false;
kvz_encoder_state_write_parameter_sets(&state->stream, state);
}
@ -972,11 +975,7 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state)
}
state->frame->total_bits_coded += newpos - curpos;
if (encoder->cfg->gop_len > 0 && state->frame->gop_offset > 0) {
state->frame->cur_gop_bits_coded = state->previous_encoder_state->frame->cur_gop_bits_coded;
} else {
state->frame->cur_gop_bits_coded = 0;
}
state->frame->cur_gop_bits_coded = state->previous_encoder_state->frame->cur_gop_bits_coded;
state->frame->cur_gop_bits_coded += newpos - curpos;
}

View file

@ -48,13 +48,21 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) {
state->frame->poc = 0;
state->frame->total_bits_coded = 0;
state->frame->cur_gop_bits_coded = 0;
state->frame->prepared = 0;
state->frame->done = 1;
state->frame->rc_alpha = 3.2003;
state->frame->rc_beta = -1.367;
const encoder_control_t * const encoder = state->encoder_control;
const int num_lcus = encoder->in.width_in_lcu * encoder->in.height_in_lcu;
state->frame->lcu_stats = MALLOC(lcu_stats_t, num_lcus);
return 1;
}
static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
kvz_image_list_destroy(state->frame->ref);
FREE_POINTER(state->frame->lcu_stats);
}
static int encoder_state_config_tile_init(encoder_state_t * const state,
@ -303,8 +311,6 @@ int kvz_encoder_state_init(encoder_state_t * const child_state, encoder_state_t
child_state->children[0].encoder_control = NULL;
child_state->tqj_bitstream_written = NULL;
child_state->tqj_recon_done = NULL;
child_state->prepared = 0;
child_state->frame_done = 1;
if (!parent_state) {
const encoder_control_t * const encoder = child_state->encoder_control;

View file

@ -196,12 +196,83 @@ static void encode_sao(encoder_state_t * const state,
}
static void encoder_state_worker_encode_lcu(void * opaque) {
/**
* \brief Sets the QP for each CU in state->tile->frame->cu_array.
*
* The QPs are used in deblocking.
*
* The delta QP for an LCU is coded when the first CU with coded block flag
* set is encountered. Hence, for the purposes of deblocking, all CUs
* before the first one with cbf set use state->ref_qp and all CUs after
* that use state->qp.
*
* \param state encoder state
* \param x x-coordinate of the left edge of the root CU
* \param y y-coordinate of the top edge of the root CU
* \param depth depth in the CU quadtree
* \param coeffs_coded Used for tracking whether a CU with a residual
* has been encountered. Should be set to false at
* the top level.
* \return Whether there were any CUs with residual or not.
*/
static bool set_cu_qps(encoder_state_t *state, int x, int y, int depth, bool coeffs_coded)
{
if (state->qp == state->ref_qp) {
// If the QPs are equal there is no need to care about the residuals.
coeffs_coded = true;
}
cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y);
const int cu_width = LCU_WIDTH >> depth;
coeffs_coded = coeffs_coded || cbf_is_set_any(cu->cbf, cu->depth);
if (!coeffs_coded && cu->depth > depth) {
// Recursively process sub-CUs.
const int d = cu_width >> 1;
coeffs_coded = set_cu_qps(state, x, y, depth + 1, coeffs_coded);
coeffs_coded = set_cu_qps(state, x + d, y, depth + 1, coeffs_coded);
coeffs_coded = set_cu_qps(state, x, y + d, depth + 1, coeffs_coded);
coeffs_coded = set_cu_qps(state, x + d, y + d, depth + 1, coeffs_coded);
} else {
if (!coeffs_coded && cu->tr_depth > depth) {
// The CU is split into smaller transform units. Check whether coded
// block flag is set for any of the TUs.
const int tu_width = LCU_WIDTH >> cu->tr_depth;
for (int y_scu = y; y_scu < y + cu_width; y_scu += tu_width) {
for (int x_scu = x; x_scu < x + cu_width; x_scu += tu_width) {
cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu);
if (cbf_is_set_any(tu->cbf, cu->depth)) {
coeffs_coded = true;
}
}
}
}
// Set the correct QP for all state->tile->frame->cu_array elements in
// the area covered by the CU.
const int8_t qp = coeffs_coded ? state->qp : state->ref_qp;
for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) {
for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) {
kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp;
}
}
}
return coeffs_coded;
}
static void encoder_state_worker_encode_lcu(void * opaque)
{
const lcu_order_element_t * const lcu = opaque;
encoder_state_t *state = lcu->encoder_state;
const encoder_control_t * const encoder = state->encoder_control;
videoframe_t* const frame = state->tile->frame;
kvz_set_lcu_lambda_and_qp(state, lcu->position);
//This part doesn't write to bitstream, it's only search, deblock and sao
kvz_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search);
@ -209,6 +280,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
if (encoder->deblock_enable) {
if (encoder->cfg->target_bitrate > 0) {
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, false);
}
kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y);
}
@ -239,6 +314,7 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
}
//Now write data to bitstream (required to have a correct CABAC state)
const uint64_t existing_bits = kvz_bitstream_tell(&state->stream);
//First LCU, and we are in a slice. We need a slice header
if (state->type == ENCODER_STATE_TYPE_SLICE && lcu->index == 0) {
@ -251,6 +327,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
}
// QP delta is not used when rate control is turned off.
state->must_code_qp_delta = (state->encoder_control->cfg->target_bitrate > 0);
//Encode coding tree
kvz_encode_coding_tree(state, lcu->position.x << MAX_DEPTH, lcu->position.y << MAX_DEPTH, 0);
@ -260,7 +340,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
//Always 0 since otherwise it would be split
kvz_cabac_encode_bin_trm(&state->cabac, 0); // end_of_slice_segment_flag
}
const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
//Wavefronts need the context to be copied to the next row
if (state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && lcu->index == 1) {
int j;
@ -301,6 +384,9 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) {
InitC(state->tile->dbs_g);
state->tile->m_prev_pos = 0;
}
state->ref_qp = state->frame->QP;
// Select whether to encode the frame/tile in current thread or to define
// wavefront jobs for other threads to handle.
bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
@ -851,6 +937,22 @@ static void encoder_state_init_children(encoder_state_t * const state) {
}
}
static void normalize_lcu_weights(encoder_state_t * const state)
{
if (state->frame->num == 0) return;
const uint32_t num_lcus = state->encoder_control->in.width_in_lcu *
state->encoder_control->in.height_in_lcu;
double sum = 0.0;
for (uint32_t i = 0; i < num_lcus; i++) {
sum += state->frame->lcu_stats[i].weight;
}
for (uint32_t i = 0; i < num_lcus; i++) {
state->frame->lcu_stats[i].weight /= sum;
}
}
static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_picture* frame) {
assert(state->type == ENCODER_STATE_TYPE_MAIN);
@ -898,24 +1000,9 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict
encoder_state_remove_refs(state);
encoder_state_ref_sort(state);
double lambda;
if (cfg->target_bitrate > 0) {
// Rate control enabled.
lambda = kvz_select_picture_lambda(state);
state->frame->QP = kvz_lambda_to_QP(lambda);
} else {
if (cfg->gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) {
kvz_gop_config const * const gop =
cfg->gop + state->frame->gop_offset;
state->frame->QP = cfg->qp + gop->qp_offset;
state->frame->QP_factor = gop->qp_factor;
} else {
state->frame->QP = cfg->qp;
}
lambda = kvz_select_picture_lambda_from_qp(state);
}
state->frame->cur_lambda_cost = lambda;
state->frame->cur_lambda_cost_sqrt = sqrt(lambda);
normalize_lcu_weights(state);
kvz_set_picture_lambda_and_qp(state);
encoder_state_init_children(state);
}
@ -967,7 +1054,7 @@ void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
assert(!state->tqj_bitstream_written);
state->tqj_bitstream_written = job;
}
state->frame_done = 0;
state->frame->done = 0;
//kvz_threadqueue_flush(main_state->encoder_control->threadqueue);
}
@ -985,7 +1072,7 @@ void kvz_encoder_prepare(encoder_state_t *state)
const encoder_control_t * const encoder = state->encoder_control;
// The previous frame must be done before the next one is started.
assert(state->frame_done);
assert(state->frame->done);
if (state->frame->num == -1) {
// We're at the first frame, so don't care about all this stuff.
@ -993,7 +1080,7 @@ void kvz_encoder_prepare(encoder_state_t *state)
state->frame->poc = 0;
assert(!state->tile->frame->source);
assert(!state->tile->frame->rec);
state->prepared = 1;
state->frame->prepared = 1;
return;
}
@ -1034,7 +1121,7 @@ void kvz_encoder_prepare(encoder_state_t *state)
state->frame->num = prev_state->frame->num + 1;
state->frame->poc = prev_state->frame->poc + 1;
state->prepared = 1;
state->frame->prepared = 1;
}
coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth)
@ -1052,3 +1139,11 @@ coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth)
return SCAN_DIAG;
}
lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
{
const int index = lcu_x + state->tile->lcu_offset_x +
(lcu_y + state->tile->lcu_offset_y) *
state->encoder_control->in.width_in_lcu;
return &state->frame->lcu_stats[index];
}

View file

@ -49,18 +49,48 @@ typedef enum {
} encoder_state_type;
typedef struct lcu_stats_t {
//! \brief Number of bits that were spent
uint32_t bits;
//! \brief Weight of the LCU for rate control
double weight;
//! \brief Lambda value which was used for this LCU
double lambda;
//! \brief Rate control alpha parameter
double rc_alpha;
//! \brief Rate control beta parameter
double rc_beta;
} lcu_stats_t;
typedef struct encoder_state_config_frame_t {
double cur_lambda_cost; //!< \brief Lambda for SSE
double cur_lambda_cost_sqrt; //!< \brief Lambda for SAD and SATD
/**
* \brief Frame-level lambda.
*
* Use state->lambda or state->lambda_sqrt for cost computations.
*
* \see encoder_state_t::lambda
* \see encoder_state_t::lambda_sqrt
*/
double lambda;
int32_t num; /*!< \brief Frame number */
int32_t poc; /*!< \brief Picture order count */
int8_t gop_offset; /*!< \brief Offset in the gop structure */
int8_t QP; //!< \brief Quantization parameter
double QP_factor; //!< \brief Quantization factor
/**
* \brief Frame-level quantization parameter
*
* \see encoder_state_t::qp
*/
int8_t QP;
//! \brief quantization factor
double QP_factor;
//Current picture available references
image_list_t *ref;
int8_t ref_list;
@ -84,10 +114,33 @@ typedef struct encoder_state_config_frame_t {
//! Number of bits targeted for the current GOP.
double cur_gop_target_bits;
//! Number of bits targeted for the current picture.
double cur_pic_target_bits;
// Parameters used in rate control
double rc_alpha;
double rc_beta;
/**
* \brief Indicates that this encoder state is ready for encoding the
* next frame i.e. kvz_encoder_prepare has been called.
*/
bool prepared;
/**
* \brief Indicates that the previous frame has been encoded and the
* encoded data written and the encoding the next frame has not been
* started yet.
*/
bool done;
/**
* \brief Information about the coded LCUs.
*
* Used for rate control.
*/
lcu_stats_t *lcu_stats;
} encoder_state_config_frame_t;
typedef struct encoder_state_config_tile_t {
@ -185,21 +238,26 @@ typedef struct encoder_state_t {
bitstream_t stream;
cabac_data_t cabac;
/**
* \brief Indicates that this encoder state is ready for encoding the
* next frame i.e. kvz_encoder_prepare has been called.
*/
int prepared;
/**
* \brief Indicates that the previous frame has been encoded and the
* encoded data written and the encoding the next frame has not been
* started yet.
*/
int frame_done;
uint32_t stats_bitstream_length; //Bitstream length written in bytes
//! \brief Lambda for SSE
double lambda;
//! \brief Lambda for SAD and SATD
double lambda_sqrt;
//! \brief Quantization parameter for the current LCU
int8_t qp;
/**
* \brief Whether a QP delta value must be coded for the current LCU.
*/
bool must_code_qp_delta;
/**
* \brief Reference for computing QP delta for the next LCU that is coded
* next. Updated whenever a QP delta is coded.
*/
int8_t ref_qp;
//Jobs to wait for
threadqueue_job_t * tqj_recon_done; //Reconstruction is done
threadqueue_job_t * tqj_bitstream_written; //Bitstream is written
@ -218,6 +276,21 @@ void kvz_encoder_get_ref_lists(const encoder_state_t *const state,
int ref_list_len_out[2],
int ref_list_poc_out[2][16]);
lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y);
/**
* Whether the parameter sets should be written with the current frame.
*/
static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
{
const int32_t frame = state->frame->num;
const int32_t vps_period = state->encoder_control->vps_period;
return (vps_period > 0 && frame % vps_period == 0) ||
(vps_period >= 0 && frame == 0);
}
static const uint8_t g_group_idx[32] = {
0, 1, 2, 3, 4, 4, 5, 5, 6, 6,
6, 6, 7, 7, 7, 7, 8, 8, 8, 8,

View file

@ -247,6 +247,27 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir)
}
}
static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir)
{
if (state->encoder_control->cfg->target_bitrate <= 0) {
return state->qp;
}
int32_t qp_p;
if (dir == EDGE_HOR && y > 0) {
qp_p = kvz_cu_array_at_const(state->tile->frame->cu_array, x, y - 1)->qp;
} else if (dir == EDGE_VER && x > 0) {
qp_p = kvz_cu_array_at_const(state->tile->frame->cu_array, x - 1, y)->qp;
} else {
qp_p = state->frame->QP;
}
const int32_t qp_q =
kvz_cu_array_at_const(state->tile->frame->cu_array, x, y)->qp;
return (qp_p + qp_q + 1) >> 1;
}
/**
* \brief Apply the deblocking filter to luma pixels on a single edge.
*
@ -290,8 +311,9 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
kvz_pixel *orig_src = &frame->rec->y[x + y*stride];
kvz_pixel *src = orig_src;
const int32_t qp = get_qp_y_pred(state, x, y, dir);
int8_t strength = 0;
int32_t qp = state->frame->QP;
int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8);
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale;
@ -490,7 +512,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
};
int8_t strength = 2;
int32_t QP = kvz_g_chroma_scale[state->frame->QP];
const int32_t luma_qp = get_qp_y_pred(state, x << 1, y << 1, dir);
int32_t QP = kvz_g_chroma_scale[luma_qp];
int32_t bitdepth_scale = 1 << (encoder->bitdepth-8);
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale;

View file

@ -158,6 +158,7 @@ typedef int16_t coeff_t;
#define MRG_MAX_NUM_CANDS 5
/* Some tools */
#define ABS(a) ((a) >= 0 ? (a) : (-a))
#define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b))
#define CLIP(low,high,value) MAX((low),MIN((high),(value)))

View file

@ -213,7 +213,7 @@ static int kvazaar_encode(kvz_encoder *enc,
encoder_state_t *state = &enc->states[enc->cur_state_num];
if (!state->prepared) {
if (!state->frame->prepared) {
kvz_encoder_prepare(state);
}
@ -235,13 +235,13 @@ static int kvazaar_encode(kvz_encoder *enc,
return 1;
}
if (!state->frame_done) {
if (!state->frame->done) {
// We started encoding a frame; move to the next encoder state.
enc->cur_state_num = (enc->cur_state_num + 1) % (enc->num_encoder_states);
}
encoder_state_t *output_state = &enc->states[enc->out_state_num];
if (!output_state->frame_done &&
if (!output_state->frame->done &&
(pic_in == NULL || enc->cur_state_num == enc->out_state_num)) {
kvz_threadqueue_waitfor(enc->control->threadqueue, output_state->tqj_bitstream_written);
@ -256,8 +256,8 @@ static int kvazaar_encode(kvz_encoder *enc,
if (src_out) *src_out = kvz_image_copy_ref(output_state->tile->frame->source);
if (info_out) set_frame_info(info_out, output_state);
output_state->frame_done = 1;
output_state->prepared = 0;
output_state->frame->done = 1;
output_state->frame->prepared = 0;
enc->frames_done += 1;
enc->out_state_num = (enc->out_state_num + 1) % (enc->num_encoder_states);

View file

@ -27,45 +27,49 @@
static const int SMOOTHING_WINDOW = 40;
static const double MIN_LAMBDA = 0.1;
static const double MAX_LAMBDA = 10000;
/**
* \brief Clip lambda value to a valid range.
*/
static double clip_lambda(double lambda) {
if (isnan(lambda)) return MAX_LAMBDA;
return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda);
}
/**
* \brief Update alpha and beta parameters.
* \param state the main encoder state
*
* Sets global->rc_alpha and global->rc_beta of the encoder state.
* \param bits number of bits spent for coding the area
* \param pixels size of the area in pixels
* \param lambda_real lambda used for coding the area
* \param[in,out] alpha alpha parameter to update
* \param[in,out] beta beta parameter to update
*/
static void update_rc_parameters(encoder_state_t * state)
static void update_parameters(uint32_t bits,
uint32_t pixels,
double lambda_real,
double *alpha,
double *beta)
{
const encoder_control_t * const encoder = state->encoder_control;
const double pixels_per_picture = encoder->in.width * encoder->in.height;
const double bpp = state->stats_bitstream_length * 8 / pixels_per_picture;
const double log_bpp = log(bpp);
const double alpha_old = state->frame->rc_alpha;
const double beta_old = state->frame->rc_beta;
// lambda computed from real bpp
const double lambda_comp = CLIP(0.1, 10000, alpha_old * pow(bpp, beta_old));
// lambda used in encoding
const double lambda_real = state->frame->cur_lambda_cost;
const double bpp = bits / (double)pixels;
const double lambda_comp = clip_lambda(*alpha * pow(bpp, *beta));
const double lambda_log_ratio = log(lambda_real) - log(lambda_comp);
const double alpha = alpha_old + 0.1 * lambda_log_ratio * alpha_old;
state->frame->rc_alpha = CLIP(0.05, 20, alpha);
*alpha += 0.10 * lambda_log_ratio * (*alpha);
*alpha = CLIP(0.05, 20, *alpha);
const double beta = beta_old + 0.05 * lambda_log_ratio * CLIP(-5, 1, log_bpp);
state->frame->rc_beta = CLIP(-3, -0.1, beta);
*beta += 0.05 * lambda_log_ratio * CLIP(-5.0, -1.0, log(bpp));
*beta = CLIP(-3, -0.1, *beta);
}
/**
* \brief Allocate bits for the current GOP.
* \param state the main encoder state
*
* If GOPs are not used, allocates bits for a single picture.
*
* Sets the cur_gop_target_bits of the encoder state.
* \param state the main encoder state
* \return target number of bits
*/
static void gop_allocate_bits(encoder_state_t * const state)
static double gop_allocate_bits(encoder_state_t * const state)
{
const encoder_control_t * const encoder = state->encoder_control;
@ -83,111 +87,232 @@ static void gop_allocate_bits(encoder_state_t * const state)
pictures_coded -= gop_offset + 1;
}
// Equation 12 from https://doi.org/10.1109/TIP.2014.2336550
double gop_target_bits =
(encoder->target_avg_bppic * (pictures_coded + SMOOTHING_WINDOW) - bits_coded)
* MAX(1, encoder->cfg->gop_len) / SMOOTHING_WINDOW;
state->frame->cur_gop_target_bits = MAX(200, gop_target_bits);
// Allocate at least 200 bits for each GOP like HM does.
return MAX(200, gop_target_bits);
}
/**
* Estimate number of bits used for headers of the current picture.
* \param state the main encoder state
* \return number of header bits
*/
static uint64_t pic_header_bits(encoder_state_t * const state)
{
const kvz_config* cfg = state->encoder_control->cfg;
// nal type and slice header
uint64_t bits = 48 + 24;
// entry points
bits += 12 * state->encoder_control->in.height_in_lcu;
switch (cfg->hash) {
case KVZ_HASH_CHECKSUM:
bits += 168;
break;
case KVZ_HASH_MD5:
bits += 456;
break;
case KVZ_HASH_NONE:
break;
}
if (encoder_state_must_write_vps(state)) {
bits += 613;
}
if (state->frame->num == 0 && cfg->add_encoder_info) {
bits += 1392;
}
return bits;
}
/**
* Allocate bits for the current picture.
* \param state the main encoder state
* \return target number of bits
* \param state the main encoder state
* \return target number of bits, excluding headers
*/
static double pic_allocate_bits(const encoder_state_t * const state)
static double pic_allocate_bits(encoder_state_t * const state)
{
const encoder_control_t * const encoder = state->encoder_control;
if (encoder->cfg->gop_len == 0 ||
state->frame->gop_offset == 0 ||
state->frame->num == 0)
{
// A new GOP starts at this frame.
state->frame->cur_gop_target_bits = gop_allocate_bits(state);
state->frame->cur_gop_bits_coded = 0;
} else {
state->frame->cur_gop_target_bits =
state->previous_encoder_state->frame->cur_gop_target_bits;
}
if (encoder->cfg->gop_len <= 0) {
return state->frame->cur_gop_target_bits;
}
const double pic_weight = encoder->gop_layer_weights[
encoder->cfg->gop[state->frame->gop_offset].layer - 1];
double pic_target_bits = state->frame->cur_gop_target_bits * pic_weight;
const double pic_target_bits =
state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state);
// Allocate at least 100 bits for each picture like HM does.
return MAX(100, pic_target_bits);
}
/**
* \brief Select a lambda value for encoding the next picture
* \param state the main encoder state
* \return lambda for the next picture
*
* Rate control must be enabled (i.e. cfg->target_bitrate > 0) when this
* function is called.
*/
double kvz_select_picture_lambda(encoder_state_t * const state)
{
const encoder_control_t * const encoder = state->encoder_control;
assert(encoder->cfg->target_bitrate > 0);
if (state->frame->num > encoder->owf) {
// At least one frame has been written.
update_rc_parameters(state);
}
if (encoder->cfg->gop_len == 0 ||
state->frame->gop_offset == 0 ||
state->frame->num == 0)
{
// A new GOP begins at this frame.
gop_allocate_bits(state);
} else {
state->frame->cur_gop_target_bits =
state->previous_encoder_state->frame->cur_gop_target_bits;
}
// TODO: take the picture headers into account
const double target_bits_current_picture = pic_allocate_bits(state);
const double target_bits_per_pixel =
target_bits_current_picture / encoder->in.pixels_per_pic;
const double lambda =
state->frame->rc_alpha * pow(target_bits_per_pixel, state->frame->rc_beta);
return CLIP(0.1, 10000, lambda);
}
int8_t kvz_lambda_to_QP(const double lambda)
static int8_t lambda_to_qp(const double lambda)
{
const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5;
return CLIP(0, 51, qp);
}
/**
* \brief Select a lambda value according to current QP value
* \brief Allocate bits and set lambda and QP for the current picture.
* \param state the main encoder state
* \return lambda for the next picture
*
* This function should be used to select lambda when rate control is
* disabled.
*/
double kvz_select_picture_lambda_from_qp(encoder_state_t const * const state)
void kvz_set_picture_lambda_and_qp(encoder_state_t * const state)
{
const int gop_len = state->encoder_control->cfg->gop_len;
const int intra_period = state->encoder_control->cfg->intra_period;
const int keyframe_period = gop_len > 0 ? gop_len : intra_period;
double lambda = pow(2.0, (state->frame->QP - 12) / 3.0);
const encoder_control_t * const ctrl = state->encoder_control;
if (state->frame->slicetype == KVZ_SLICE_I) {
lambda *= 0.57;
// Reduce lambda for I-frames according to the number of references.
if (keyframe_period == 0) {
lambda *= 0.5;
} else {
lambda *= 1.0 - CLIP(0.0, 0.5, 0.05 * (keyframe_period - 1));
if (ctrl->cfg->target_bitrate > 0) {
// Rate control enabled
if (state->frame->num > ctrl->owf) {
// At least one frame has been written.
update_parameters(state->stats_bitstream_length * 8,
ctrl->in.pixels_per_pic,
state->frame->lambda,
&state->frame->rc_alpha,
&state->frame->rc_beta);
}
} else if (gop_len > 0) {
lambda *= state->frame->QP_factor;
const double pic_target_bits = pic_allocate_bits(state);
const double target_bpp = pic_target_bits / ctrl->in.pixels_per_pic;
double lambda = state->frame->rc_alpha * pow(target_bpp, state->frame->rc_beta);
lambda = clip_lambda(lambda);
state->frame->lambda = lambda;
state->frame->QP = lambda_to_qp(lambda);
state->frame->cur_pic_target_bits = pic_target_bits;
} else {
lambda *= 0.4624;
// Rate control disabled
kvz_gop_config const * const gop = &ctrl->cfg->gop[state->frame->gop_offset];
const int gop_len = ctrl->cfg->gop_len;
const int period = gop_len > 0 ? gop_len : ctrl->cfg->intra_period;
state->frame->QP = ctrl->cfg->qp;
if (gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) {
state->frame->QP += gop->qp_offset;
}
double lambda = pow(2.0, (state->frame->QP - 12) / 3.0);
if (state->frame->slicetype == KVZ_SLICE_I) {
lambda *= 0.57;
// Reduce lambda for I-frames according to the number of references.
if (period == 0) {
lambda *= 0.5;
} else {
lambda *= 1.0 - CLIP(0.0, 0.5, 0.05 * (period - 1));
}
} else if (gop_len > 0) {
lambda *= gop->qp_factor;
} else {
lambda *= 0.4624;
}
// Increase lambda if not key-frame.
if (period > 0 && state->frame->poc % period != 0) {
lambda *= CLIP(2.0, 4.0, (state->frame->QP - 12) / 6.0);
}
state->frame->lambda = lambda;
}
}
/**
* \brief Allocate bits for a LCU.
* \param state the main encoder state
* \param pos location of the LCU as number of LCUs from top left
* \return number of bits allocated for the LCU
*/
static double lcu_allocate_bits(encoder_state_t * const state,
vector2d_t pos)
{
double lcu_weight;
if (state->frame->num > state->encoder_control->owf) {
lcu_weight = kvz_get_lcu_stats(state, pos.x, pos.y)->weight;
} else {
const uint32_t num_lcus = state->encoder_control->in.width_in_lcu *
state->encoder_control->in.height_in_lcu;
lcu_weight = 1.0 / num_lcus;
}
// Increase lambda if not key-frame.
if (keyframe_period > 0 && state->frame->poc % keyframe_period != 0) {
lambda *= CLIP(2.0, 4.0, (state->frame->QP - 12) / 6.0);
}
return lambda;
// Target number of bits for the current LCU.
const double lcu_target_bits = state->frame->cur_pic_target_bits * lcu_weight;
// Allocate at least one bit for each LCU.
return MAX(1, lcu_target_bits);
}
void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
vector2d_t pos)
{
const encoder_control_t * const ctrl = state->encoder_control;
if (ctrl->cfg->target_bitrate > 0) {
lcu_stats_t *lcu = kvz_get_lcu_stats(state, pos.x, pos.y);
const uint32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) *
MIN(LCU_WIDTH, state->tile->frame->height - LCU_WIDTH * pos.y);
if (state->frame->num > ctrl->owf) {
update_parameters(lcu->bits,
pixels,
lcu->lambda,
&lcu->rc_alpha,
&lcu->rc_beta);
} else {
lcu->rc_alpha = state->frame->rc_alpha;
lcu->rc_beta = state->frame->rc_beta;
}
const double target_bits = lcu_allocate_bits(state, pos);
const double target_bpp = target_bits / pixels;
double lambda = clip_lambda(lcu->rc_alpha * pow(target_bpp, lcu->rc_beta));
// Clip lambda according to the equations 24 and 26 in
// https://doi.org/10.1109/TIP.2014.2336550
if (state->frame->num > ctrl->owf) {
const double bpp = lcu->bits / (double)pixels;
const double lambda_comp = clip_lambda(lcu->rc_alpha * pow(bpp, lcu->rc_beta));
lambda = CLIP(lambda_comp * 0.7937005259840998,
lambda_comp * 1.2599210498948732,
lambda);
}
lambda = CLIP(state->frame->lambda * 0.6299605249474366,
state->frame->lambda * 1.5874010519681994,
lambda);
lambda = clip_lambda(lambda);
lcu->lambda = lambda;
state->lambda = lambda;
state->lambda_sqrt = sqrt(lambda);
state->qp = lambda_to_qp(lambda);
} else {
state->qp = state->frame->QP;
state->lambda = state->frame->lambda;
state->lambda_sqrt = sqrt(state->frame->lambda);
}
}

View file

@ -30,11 +30,9 @@
#include "encoderstate.h"
void kvz_set_picture_lambda_and_qp(encoder_state_t * const state);
double kvz_select_picture_lambda(encoder_state_t * const state);
int8_t kvz_lambda_to_QP(const double lambda);
double kvz_select_picture_lambda_from_qp(encoder_state_t const * const state);
void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
vector2d_t pos);
#endif // RATE_CONTROL_H_

View file

@ -257,7 +257,7 @@ uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost
cabac_ctx_t* base_sig_model = type?(cabac->ctx.cu_sig_model_chroma):(cabac->ctx.cu_sig_model_luma);
if( !last && max_abs_level < 3 ) {
*coded_cost_sig = state->frame->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
*coded_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
*coded_cost = *coded_cost0 + *coded_cost_sig;
if (max_abs_level == 0) return best_abs_level;
} else {
@ -265,13 +265,13 @@ uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost
}
if( !last ) {
cur_cost_sig = state->frame->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
cur_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
}
min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
double err = (double)(level_double - ( abs_level << q_bits ) );
double cur_cost = err * err * temp + state->frame->cur_lambda_cost *
double cur_cost = err * err * temp + state->lambda *
kvz_get_ic_rate( state, abs_level, ctx_num_one, ctx_num_abs,
abs_go_rice, c1_idx, c2_idx, type);
cur_cost += cur_cost_sig;
@ -308,7 +308,7 @@ static double get_rate_last(const encoder_state_t * const state,
if( ctx_y > 3 ) {
uiCost += 32768.0 * ((ctx_y-2)>>1);
}
return state->frame->cur_lambda_cost*uiCost;
return state->lambda * uiCost;
}
static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t height, int8_t type,
@ -358,7 +358,7 @@ void kvz_rdoq_sign_hiding(const encoder_state_t *const state,
int64_t rd_factor = (int64_t)(
kvz_g_inv_quant_scales[qp_scaled % 6] * kvz_g_inv_quant_scales[qp_scaled % 6] * (1 << (2 * (qp_scaled / 6)))
/ state->frame->cur_lambda_cost / 16 / (1 << (2 * (encoder->bitdepth - 8)))
/ state->lambda / 16 / (1 << (2 * (encoder->bitdepth - 8)))
+ 0.5);
int32_t lastCG = -1;
int32_t absSum = 0;
@ -467,7 +467,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
uint32_t max_num_coeff = width * height;
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth - 8) * 6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
@ -669,7 +669,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
if (sig_coeffgroup_flag[cg_blkpos] == 0) {
uint32_t ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
cg_pos_y, width);
cost_coeffgroup_sig[cg_scanpos] = state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
cost_coeffgroup_sig[cg_scanpos] = state->lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
base_cost += cost_coeffgroup_sig[cg_scanpos] - rd_stats.sig_cost;
} else {
if (cg_scanpos < cg_last_scanpos){
@ -686,9 +686,9 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
cg_pos_y, width);
cost_coeffgroup_sig[cg_scanpos] = state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
base_cost += cost_coeffgroup_sig[cg_scanpos];
cost_zero_cg += state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
cost_zero_cg += state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
// try to convert the current coeff group from non-zero to all-zero
cost_zero_cg += rd_stats.uncoded_dist; // distortion for resetting non-zero levels to zero levels
@ -701,7 +701,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
sig_coeffgroup_flag[cg_blkpos] = 0;
base_cost = cost_zero_cg;
cost_coeffgroup_sig[cg_scanpos] = state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
// reset coeffs to 0 in this block
for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
@ -728,13 +728,13 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
int32_t best_last_idx_p1 = 0;
if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
best_cost = block_uncoded_cost + state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
base_cost += state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
best_cost = block_uncoded_cost + state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
base_cost += state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
} else {
cabac_ctx_t* base_cbf_model = type?(cabac->ctx.qt_cbf_model_chroma):(cabac->ctx.qt_cbf_model_luma);
ctx_cbf = ( type ? tr_depth : !tr_depth);
best_cost = block_uncoded_cost + state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
base_cost += state->frame->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
best_cost = block_uncoded_cost + state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
base_cost += state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
}
for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
@ -1006,5 +1006,5 @@ int kvz_calc_mvd_cost_cabac(encoder_state_t * const state, int x, int y, int mv_
*bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
// Store bitcost before restoring cabac
return *bitcost * (int32_t)(state->frame->cur_lambda_cost_sqrt + 0.5);
return *bitcost * (int32_t)(state->lambda_sqrt + 0.5);
}

View file

@ -501,7 +501,7 @@ static void sao_search_edge_sao(const encoder_state_t * const state,
{
float mode_bits = sao_mode_bits_edge(state, edge_class, edge_offset, sao_top, sao_left, buf_cnt);
sum_ddistortion += (int)((double)mode_bits*state->frame->cur_lambda_cost+0.5);
sum_ddistortion += (int)((double)mode_bits*state->lambda +0.5);
}
// SAO is not applied for category 0.
edge_offset[SAO_EO_CAT0] = 0;
@ -545,7 +545,7 @@ static void sao_search_band_sao(const encoder_state_t * const state, const kvz_p
}
temp_rate = sao_mode_bits_band(state, sao_out->band_position, temp_offsets, sao_top, sao_left, buf_cnt);
ddistortion += (int)((double)temp_rate*state->frame->cur_lambda_cost + 0.5);
ddistortion += (int)((double)temp_rate*state->lambda + 0.5);
// Select band sao over edge sao when distortion is lower
if (ddistortion < sao_out->ddistortion) {
@ -589,7 +589,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_
{
float mode_bits = sao_mode_bits_edge(state, edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left, buf_cnt);
int ddistortion = (int)(mode_bits * state->frame->cur_lambda_cost + 0.5);
int ddistortion = (int)(mode_bits * state->lambda + 0.5);
unsigned buf_i;
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
@ -603,7 +603,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_
{
float mode_bits = sao_mode_bits_band(state, band_sao.band_position, band_sao.offsets, sao_top, sao_left, buf_cnt);
int ddistortion = (int)(mode_bits * state->frame->cur_lambda_cost + 0.5);
int ddistortion = (int)(mode_bits * state->lambda + 0.5);
unsigned buf_i;
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
@ -626,7 +626,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_
// Choose between SAO and doing nothing, taking into account the
// rate-distortion cost of coding do nothing.
{
int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->frame->cur_lambda_cost + 0.5);
int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->lambda + 0.5);
if (sao_out->ddistortion >= cost_of_nothing) {
sao_out->type = SAO_TYPE_NONE;
merge_cost[0] = cost_of_nothing;
@ -643,7 +643,7 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_
if (merge_cand) {
unsigned buf_i;
float mode_bits = sao_mode_bits_merge(state, i + 1);
int ddistortion = (int)(mode_bits * state->frame->cur_lambda_cost + 0.5);
int ddistortion = (int)(mode_bits * state->lambda + 0.5);
switch (merge_cand->type) {
case SAO_TYPE_EDGE:

View file

@ -321,7 +321,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
return sum + tr_tree_bits * state->frame->cur_lambda_cost;
return sum + tr_tree_bits * state->lambda;
}
// Add transform_tree cbf_luma bit cost.
@ -353,7 +353,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
}
double bits = tr_tree_bits + coeff_bits;
return (double)ssd * LUMA_MULT + bits * state->frame->cur_lambda_cost;
return (double)ssd * LUMA_MULT + bits * state->lambda;
}
@ -398,7 +398,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
return sum + tr_tree_bits * state->frame->cur_lambda_cost;
return sum + tr_tree_bits * state->lambda;
}
// Chroma SSD
@ -428,7 +428,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
}
double bits = tr_tree_bits + coeff_bits;
return (double)ssd * CHROMA_MULT + bits * state->frame->cur_lambda_cost;
return (double)ssd * CHROMA_MULT + bits * state->lambda;
}
@ -682,7 +682,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
mode_bits = inter_bitcost;
}
cost += mode_bits * state->frame->cur_lambda_cost;
cost += mode_bits * state->lambda;
}
// Recursively split all the way to max search depth.
@ -695,15 +695,15 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// Add cost of cu_split_flag.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->frame->cur_lambda_cost;
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->frame->cur_lambda_cost;
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
}
if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
// Add cost of intra part_size.
const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]);
cost += CTX_ENTROPY_FBITS(ctx, 1) * state->frame->cur_lambda_cost; // 2Nx2N
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->frame->cur_lambda_cost; // NxN
cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN
}
// If skip mode was selected for the block, skip further search.
@ -750,11 +750,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// Add the cost of coding no-split.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->frame->cur_lambda_cost;
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
// Add the cost of coding intra mode only once.
double mode_bits = calc_mode_bits(state, &work_tree[depth], cur_cu, x, y);
cost += mode_bits * state->frame->cur_lambda_cost;
cost += mode_bits * state->lambda;
}
}
@ -949,7 +949,10 @@ void kvz_search_lcu(encoder_state_t * const state, const int x, const int y, con
}
// Start search from depth 0.
search_cu(state, x, y, 0, work_tree);
double cost = search_cu(state, x, y, 0, work_tree);
// Save squared cost for rate control.
kvz_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight = cost * cost;
// The best decisions through out the LCU got propagated back to depth 0,
// so copy those back to the frame.

View file

@ -253,7 +253,7 @@ static int calc_mvd_cost(encoder_state_t * const state, int x, int y, int mv_shi
temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
}
*bitcost = temp_bitcost;
return temp_bitcost*(int32_t)(state->frame->cur_lambda_cost_sqrt+0.5);
return temp_bitcost*(int32_t)(state->lambda_sqrt + 0.5);
}

View file

@ -117,7 +117,7 @@ static double get_cost(encoder_state_t * const state,
trskip_bits += 2.0 * (CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0));
}
double sad_cost = TRSKIP_RATIO * sad_func(pred, orig_block) + state->frame->cur_lambda_cost_sqrt * trskip_bits;
double sad_cost = TRSKIP_RATIO * sad_func(pred, orig_block) + state->lambda_sqrt * trskip_bits;
if (sad_cost < satd_cost) {
return sad_cost;
}
@ -164,7 +164,7 @@ static void get_cost_dual(encoder_state_t * const state,
double sad_costs[PARALLEL_BLKS] = { 0 };
sad_twin_func(preds, orig_block, PARALLEL_BLKS, unsigned_sad_costs);
for (int i = 0; i < PARALLEL_BLKS; ++i) {
sad_costs[i] = TRSKIP_RATIO * (double)unsigned_sad_costs[i] + state->frame->cur_lambda_cost_sqrt * trskip_bits;
sad_costs[i] = TRSKIP_RATIO * (double)unsigned_sad_costs[i] + state->lambda_sqrt * trskip_bits;
if (sad_costs[i] < (double)satd_costs[i]) {
costs_out[i] = sad_costs[i];
}
@ -254,7 +254,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
// max_depth.
// - Min transform size hasn't been reached (MAX_PU_DEPTH).
if (depth < max_depth && depth < MAX_PU_DEPTH) {
split_cost = 3 * state->frame->cur_lambda_cost;
split_cost = 3 * state->lambda;
split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu);
if (split_cost < nosplit_cost) {
@ -296,7 +296,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
}
double bits = tr_split_bit + cbf_bits;
split_cost += bits * state->frame->cur_lambda_cost;
split_cost += bits * state->lambda;
} else {
assert(width <= TR_MAX_WIDTH);
}
@ -529,7 +529,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
// Add prediction mode coding cost as the last thing. We don't want this
// affecting the halving search.
int lambda_cost = (int)(state->frame->cur_lambda_cost_sqrt + 0.5);
int lambda_cost = (int)(state->lambda_sqrt + 0.5);
for (int mode_i = 0; mode_i < modes_selected; ++mode_i) {
costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds);
}
@ -600,7 +600,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds);
costs[rdo_mode] = rdo_bitcost * (int)(state->frame->cur_lambda_cost + 0.5);
costs[rdo_mode] = rdo_bitcost * (int)(state->lambda + 0.5);
// Perform transform split search and save mode RD cost for the best one.
cu_info_t pred_cu;
@ -701,7 +701,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode);
chroma.cost += mode_bits * state->frame->cur_lambda_cost;
chroma.cost += mode_bits * state->lambda;
if (chroma.cost < best_chroma.cost) {
best_chroma = chroma;

View file

@ -52,7 +52,7 @@ void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coe
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth - 8) * 6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2;
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6];
@ -457,7 +457,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
int32_t n;
int32_t transform_shift = 15 - encoder->bitdepth - (kvz_g_convert_to_bit[ width ] + 2);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth-8)*6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6);
shift = 20 - QUANT_SHIFT - transform_shift;

View file

@ -41,7 +41,7 @@ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth - 8) * 6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
const uint32_t log2_tr_size = kvz_g_convert_to_bit[width] + 2;
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_size - 2][scalinglist_type][qp_scaled % 6];
@ -286,7 +286,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
int32_t n;
int32_t transform_shift = 15 - encoder->bitdepth - (kvz_g_convert_to_bit[ width ] + 2);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->frame->QP, (encoder->bitdepth-8)*6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6);
shift = 20 - QUANT_SHIFT - transform_shift;

View file

@ -232,7 +232,7 @@ int kvz_quantize_residual_trskip(
int has_coeffs;
} skip, noskip, *best;
const int bit_cost = (int)(state->frame->cur_lambda_cost+0.5);
const int bit_cost = (int)(state->lambda + 0.5);
noskip.has_coeffs = kvz_quantize_residual(
state, cur_cu, width, color, scan_order,