Update cabac context during search

Create a separate cabac that is only used during the search. It should hold
the state that the actual cabac end after encoding said CU. Only implemented
for intra so far.

TODO: 4×4 PUs probably still have some problems
This commit is contained in:
Joose Sainio 2021-12-03 09:09:57 +02:00
parent d5e4e831f4
commit 53264bc764
7 changed files with 97 additions and 40 deletions

View file

@ -60,7 +60,8 @@ typedef struct
uint32_t buffered_byte;
int32_t num_buffered_bytes;
int32_t bits_left;
int8_t only_count;
int8_t only_count : 4;
int8_t update : 4;
bitstream_t *stream;
// CONTEXTS

View file

@ -731,6 +731,8 @@ static void encoder_state_worker_encode_lcu(void * opaque)
kvz_bitstream_align_zero(state->cabac.stream);
kvz_cabac_start(&state->cabac);
memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t));
state->search_cabac.only_count = 1;
kvz_crypto_delete(&state->crypto_hdl);
}
@ -1214,6 +1216,8 @@ static void encoder_state_init_children(encoder_state_t * const state) {
//Leaf states have cabac and context
kvz_cabac_start(&state->cabac);
kvz_init_contexts(state, state->encoder_control->cfg.set_qp_in_cu ? 26 : state->frame->QP, state->frame->slicetype);
memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t));
state->search_cabac.only_count = 1;
}
//Clear the jobs

View file

@ -294,6 +294,7 @@ typedef struct encoder_state_t {
bitstream_t stream;
cabac_data_t cabac;
cabac_data_t search_cabac;
// Crypto stuff
crypto_handle_t *crypto_hdl;

View file

@ -253,12 +253,12 @@ static INLINE uint32_t get_coeff_cabac_cost(
// Take a copy of the CABAC so that we don't overwrite the contexts when
// counting the bits.
cabac_data_t cabac_copy;
memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy));
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
// Clear bytes and bits and set mode to "count"
cabac_copy.only_count = 1;
cabac_copy.num_buffered_bytes = 0;
cabac_copy.bits_left = 23;
int num_buffered_bytes = cabac_copy.num_buffered_bytes;
int bits_left = cabac_copy.bits_left;
// Execute the coding function.
// It is safe to drop the const modifier since state won't be modified
@ -270,8 +270,11 @@ static INLINE uint32_t get_coeff_cabac_cost(
type,
scan_mode,
0);
if(cabac_copy.update) {
return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy));
}
return (bits_left - cabac_copy.bits_left) + ((cabac_copy.num_buffered_bytes - num_buffered_bytes) << 3);
}
static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t ccc)

View file

@ -52,7 +52,7 @@ static void init_sao_info(sao_info_t *sao) {
static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_data_t * const cabac = &state->search_cabac;
const cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
@ -74,7 +74,7 @@ static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t
static float sao_mode_bits_merge(const encoder_state_t * const state,
int8_t merge_cand) {
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_data_t * const cabac = &state->search_cabac;
const cabac_ctx_t *ctx = NULL;
// FL coded merges.
ctx = &(cabac->ctx.sao_merge_flag_model);
@ -91,7 +91,7 @@ static float sao_mode_bits_edge(const encoder_state_t * const state,
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_data_t * const cabac = &state->search_cabac;
const cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
@ -131,7 +131,7 @@ static float sao_mode_bits_band(const encoder_state_t * const state,
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_data_t * const cabac = &state->search_cabac;
const cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {

View file

@ -245,7 +245,7 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
* Takes into account SSD of reconstruction and the cost of encoding whatever
* prediction unit data needs to be coded.
*/
double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
double kvz_cu_rd_cost_luma(encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu,
@ -271,8 +271,12 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
&& width > TR_MIN_WIDTH
&& !intra_split_flag)
{
const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]);
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.trans_subdiv_model[5 - (6 - depth)]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, tr_depth > 0);
if (state->search_cabac.update) {
state->search_cabac.cur_ctx = ctx;
CABAC_BIN(&state->search_cabac, tr_depth > 0, "tr_split_search");
}
*bit_cost += tr_tree_bits;
}
@ -294,9 +298,14 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
cbf_is_set(tr_cu->cbf, depth, COLOR_U) ||
cbf_is_set(tr_cu->cbf, depth, COLOR_V))
{
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[!tr_depth]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y));
*bit_cost += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y));
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.qt_cbf_model_luma[!tr_depth]);
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, is_set);
if (state->search_cabac.update) {
state->search_cabac.cur_ctx = ctx;
CABAC_BIN(&state->search_cabac, is_set, "luma_cbf_search");
}
*bit_cost += CTX_ENTROPY_FBITS(ctx, is_set);
}
@ -346,7 +355,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
if (depth < MAX_PU_DEPTH) {
const int tr_depth = depth - pred_cu->depth;
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_chroma[tr_depth]);
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
}
@ -494,6 +503,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
double inter_zero_coeff_cost = MAX_DOUBLE;
uint32_t inter_bitcost = MAX_INT;
cu_info_t *cur_cu;
cabac_data_t pre_search_cabac;
memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
struct {
int32_t min;
@ -699,24 +710,31 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
double bits = 0;
cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu, &bits);
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu, & bits);
}
state->search_cabac.update = 1;
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
cabac_ctx_t* ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
state->search_cabac.cur_ctx = ctx;
// TODO: intra 4x4 PUs use different method
bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_BIN(&state->search_cabac, 0, "no_split_search");
double mode_bits;
if (cur_cu->type == CU_INTRA) {
mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y);
} else {
}
else {
mode_bits = inter_bitcost;
}
bits += mode_bits;
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t* ctx = &(state->cabac.ctx.split_flag_model[split_model]);
// bits += CTX_ENTROPY_FBITS(ctx, 0);
FILE_BITS(bits, x, y, depth, "final rd bits");
cost = mode_bits * state->lambda;
cost += mode_bits * state->lambda;
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu, &bits);
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu, & bits);
}
FILE_BITS(bits, x, y, depth, "final rd bits");
if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
cost = inter_zero_coeff_cost;
@ -739,7 +757,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->cbf = 0;
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
}
}
state->search_cabac.update = 0;
}
bool can_split_cu =
// If the CU is partially outside the frame, we need to split it even
@ -754,24 +773,27 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int half_cu = cu_width / 2;
double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
cabac_data_t post_seach_cabac;
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
if (depth < MAX_DEPTH) {
// Add cost of cu_split_flag.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 0), x, y, depth, "not split");
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
state->search_cabac.cur_ctx = ctx;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
CABAC_BIN(&state->search_cabac, 1, "split_search");
}
if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
// Add cost of intra part_size.
const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]);
cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "not split");
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]);
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN
state->search_cabac.cur_ctx = ctx;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
CABAC_BIN(&state->search_cabac, 1, "split_search");
}
// If skip mode was selected for the block, skip further search.
@ -826,7 +848,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
FILE_BITS(bits, x, y, depth, "merged intra bits");
// Add the cost of coding no-split.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
// Add the cost of coding intra mode only once.
@ -845,6 +867,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
} else if (depth > 0) {
// Copy this CU's mode all the way down for use in adjacent CUs mode
// search.
memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac));
work_tree_copy_down(x_local, y_local, depth, work_tree);
}
} else if (depth >= 0 && depth < MAX_PU_DEPTH) {

View file

@ -98,11 +98,11 @@ static double get_cost(encoder_state_t * const state,
// Add the offset bit costs of signaling 'luma and chroma use trskip',
// versus signaling 'luma and chroma don't use trskip' to the SAD cost.
const cabac_ctx_t *ctx = &state->cabac.ctx.transform_skip_model_luma;
const cabac_ctx_t *ctx = &state->search_cabac.ctx.transform_skip_model_luma;
double trskip_bits = CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0);
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
ctx = &state->cabac.ctx.transform_skip_model_chroma;
ctx = &state->search_cabac.ctx.transform_skip_model_chroma;
trskip_bits += 2.0 * (CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0));
}
@ -269,7 +269,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
// Add bits for split_transform_flag = 1, because transform depth search bypasses
// the normal recursion in the cost functions.
if (depth >= 1 && depth <= 3) {
const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]);
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.trans_subdiv_model[5 - (6 - depth)]);
tr_split_bit += CTX_ENTROPY_FBITS(ctx, 1);
*bit_cost += tr_split_bit;
}
@ -283,7 +283,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
const uint8_t tr_depth = depth - pred_cu->depth;
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_chroma[tr_depth]);
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
cbf_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
}
@ -647,8 +647,9 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
}
double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds)
double kvz_luma_mode_bits(encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds)
{
cabac_data_t* cabac = &state->search_cabac;
double mode_bits;
bool mode_in_preds = false;
@ -658,8 +659,23 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const
}
}
const cabac_ctx_t *ctx = &(state->cabac.ctx.intra_mode_model);
const cabac_ctx_t *ctx = &(cabac->ctx.intra_mode_model);
mode_bits = CTX_ENTROPY_FBITS(ctx, mode_in_preds);
if (state->search_cabac.update) {
state->search_cabac.cur_ctx = ctx;
CABAC_BIN(&state->search_cabac, mode_in_preds, "prev_intra_luma_pred_flag_search");
if(mode_in_preds) {
CABAC_BIN_EP(cabac, !(luma_mode == intra_preds[0]), "mpm_idx");
if(luma_mode != intra_preds[0]) {
CABAC_BIN_EP(cabac, !(luma_mode == intra_preds[1]), "mpm_idx");
}
}
else {
// This value should be transformed for actual coding,
// but here the value does not actually matter, just that we write 5 bits
CABAC_BINS_EP(cabac, luma_mode, 5, "rem_intra_luma_pred_mode");
}
}
if (mode_in_preds) {
mode_bits += ((luma_mode == intra_preds[0]) ? 1 : 2);
@ -673,13 +689,22 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const
double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, int8_t luma_mode)
{
const cabac_ctx_t *ctx = &(state->cabac.ctx.chroma_pred_model[0]);
cabac_data_t* cabac = &state->search_cabac;
const cabac_ctx_t *ctx = &(cabac->ctx.chroma_pred_model[0]);
double mode_bits;
if (chroma_mode == luma_mode) {
mode_bits = CTX_ENTROPY_FBITS(ctx, 0);
} else {
mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1);
}
if(cabac->update) {
cabac->cur_ctx = ctx;
CABAC_BIN(cabac, chroma_mode != luma_mode, "intra_chroma_pred_mode");
if(chroma_mode != luma_mode) {
// Again it does not matter what we actually write here
CABAC_BINS_EP(cabac, 0, 2, "intra_chroma_pred_mode");
}
}
return mode_bits;
}