diff --git a/src/cabac.h b/src/cabac.h index fa17c799..7dd65a54 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -60,7 +60,8 @@ typedef struct uint32_t buffered_byte; int32_t num_buffered_bytes; int32_t bits_left; - int8_t only_count; + int8_t only_count : 4; + int8_t update : 4; bitstream_t *stream; // CONTEXTS diff --git a/src/encoderstate.c b/src/encoderstate.c index 483dfb6a..012476df 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -731,6 +731,8 @@ static void encoder_state_worker_encode_lcu(void * opaque) kvz_bitstream_align_zero(state->cabac.stream); kvz_cabac_start(&state->cabac); + memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t)); + state->search_cabac.only_count = 1; kvz_crypto_delete(&state->crypto_hdl); } @@ -1214,6 +1216,8 @@ static void encoder_state_init_children(encoder_state_t * const state) { //Leaf states have cabac and context kvz_cabac_start(&state->cabac); kvz_init_contexts(state, state->encoder_control->cfg.set_qp_in_cu ? 26 : state->frame->QP, state->frame->slicetype); + memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t)); + state->search_cabac.only_count = 1; } //Clear the jobs diff --git a/src/encoderstate.h b/src/encoderstate.h index 00885aa4..ac62a5a7 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -294,6 +294,7 @@ typedef struct encoder_state_t { bitstream_t stream; cabac_data_t cabac; + cabac_data_t search_cabac; // Crypto stuff crypto_handle_t *crypto_hdl; diff --git a/src/rdo.c b/src/rdo.c index 5403fa61..6b8960ee 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -253,12 +253,12 @@ static INLINE uint32_t get_coeff_cabac_cost( // Take a copy of the CABAC so that we don't overwrite the contexts when // counting the bits. cabac_data_t cabac_copy; - memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy)); + memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); // Clear bytes and bits and set mode to "count" cabac_copy.only_count = 1; - cabac_copy.num_buffered_bytes = 0; - cabac_copy.bits_left = 23; + int num_buffered_bytes = cabac_copy.num_buffered_bytes; + int bits_left = cabac_copy.bits_left; // Execute the coding function. // It is safe to drop the const modifier since state won't be modified @@ -270,8 +270,11 @@ static INLINE uint32_t get_coeff_cabac_cost( type, scan_mode, 0); + if(cabac_copy.update) { - return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3); + memcpy(&state->search_cabac, &cabac_copy, sizeof(cabac_copy)); + } + return (bits_left - cabac_copy.bits_left) + ((cabac_copy.num_buffered_bytes - num_buffered_bytes) << 3); } static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t ccc) diff --git a/src/sao.c b/src/sao.c index e9fab518..35be7176 100644 --- a/src/sao.c +++ b/src/sao.c @@ -52,7 +52,7 @@ static void init_sao_info(sao_info_t *sao) { static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left) { float mode_bits = 0.0; - const cabac_data_t * const cabac = &state->cabac; + const cabac_data_t * const cabac = &state->search_cabac; const cabac_ctx_t *ctx = NULL; // FL coded merges. if (sao_left != NULL) { @@ -74,7 +74,7 @@ static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t static float sao_mode_bits_merge(const encoder_state_t * const state, int8_t merge_cand) { float mode_bits = 0.0; - const cabac_data_t * const cabac = &state->cabac; + const cabac_data_t * const cabac = &state->search_cabac; const cabac_ctx_t *ctx = NULL; // FL coded merges. ctx = &(cabac->ctx.sao_merge_flag_model); @@ -91,7 +91,7 @@ static float sao_mode_bits_edge(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt) { float mode_bits = 0.0; - const cabac_data_t * const cabac = &state->cabac; + const cabac_data_t * const cabac = &state->search_cabac; const cabac_ctx_t *ctx = NULL; // FL coded merges. if (sao_left != NULL) { @@ -131,7 +131,7 @@ static float sao_mode_bits_band(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt) { float mode_bits = 0.0; - const cabac_data_t * const cabac = &state->cabac; + const cabac_data_t * const cabac = &state->search_cabac; const cabac_ctx_t *ctx = NULL; // FL coded merges. if (sao_left != NULL) { diff --git a/src/search.c b/src/search.c index 1fc47a06..2cb34608 100644 --- a/src/search.c +++ b/src/search.c @@ -245,7 +245,7 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree, * Takes into account SSD of reconstruction and the cost of encoding whatever * prediction unit data needs to be coded. */ -double kvz_cu_rd_cost_luma(const encoder_state_t *const state, +double kvz_cu_rd_cost_luma(encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, lcu_t *const lcu, @@ -271,8 +271,12 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, && width > TR_MIN_WIDTH && !intra_split_flag) { - const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.trans_subdiv_model[5 - (6 - depth)]); tr_tree_bits += CTX_ENTROPY_FBITS(ctx, tr_depth > 0); + if (state->search_cabac.update) { + state->search_cabac.cur_ctx = ctx; + CABAC_BIN(&state->search_cabac, tr_depth > 0, "tr_split_search"); + } *bit_cost += tr_tree_bits; } @@ -294,9 +298,14 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, cbf_is_set(tr_cu->cbf, depth, COLOR_U) || cbf_is_set(tr_cu->cbf, depth, COLOR_V)) { - const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[!tr_depth]); - tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y)); - *bit_cost += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y)); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.qt_cbf_model_luma[!tr_depth]); + int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); + tr_tree_bits += CTX_ENTROPY_FBITS(ctx, is_set); + if (state->search_cabac.update) { + state->search_cabac.cur_ctx = ctx; + CABAC_BIN(&state->search_cabac, is_set, "luma_cbf_search"); + } + *bit_cost += CTX_ENTROPY_FBITS(ctx, is_set); } @@ -346,7 +355,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, if (depth < MAX_PU_DEPTH) { const int tr_depth = depth - pred_cu->depth; - const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_chroma[tr_depth]); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]); if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U)); } @@ -494,6 +503,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, double inter_zero_coeff_cost = MAX_DOUBLE; uint32_t inter_bitcost = MAX_INT; cu_info_t *cur_cu; + cabac_data_t pre_search_cabac; + memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac)); struct { int32_t min; @@ -699,24 +710,31 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) { double bits = 0; - cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu, &bits); - if (state->encoder_control->chroma_format != KVZ_CSP_400) { - cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu, & bits); - } + state->search_cabac.update = 1; + + uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); + cabac_ctx_t* ctx = &(state->search_cabac.ctx.split_flag_model[split_model]); + state->search_cabac.cur_ctx = ctx; + // TODO: intra 4x4 PUs use different method + bits += CTX_ENTROPY_FBITS(ctx, 0); + CABAC_BIN(&state->search_cabac, 0, "no_split_search"); double mode_bits; if (cur_cu->type == CU_INTRA) { mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y); - } else { + } + else { mode_bits = inter_bitcost; } bits += mode_bits; - uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); - const cabac_ctx_t* ctx = &(state->cabac.ctx.split_flag_model[split_model]); - // bits += CTX_ENTROPY_FBITS(ctx, 0); - FILE_BITS(bits, x, y, depth, "final rd bits"); + cost = mode_bits * state->lambda; - cost += mode_bits * state->lambda; + cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu, &bits); + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu, & bits); + } + + FILE_BITS(bits, x, y, depth, "final rd bits"); if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) { cost = inter_zero_coeff_cost; @@ -739,7 +757,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, cur_cu->cbf = 0; lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); } - } + state->search_cabac.update = 0; + } bool can_split_cu = // If the CU is partially outside the frame, we need to split it even @@ -754,24 +773,27 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, int half_cu = cu_width / 2; double split_cost = 0.0; int cbf = cbf_is_set_any(cur_cu->cbf, depth); + cabac_data_t post_seach_cabac; + memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); + memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac)); if (depth < MAX_DEPTH) { // Add cost of cu_split_flag. uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); - const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]); - cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; - FILE_BITS(CTX_ENTROPY_FBITS(ctx, 0), x, y, depth, "not split"); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]); split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; + state->search_cabac.cur_ctx = ctx; FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split"); + CABAC_BIN(&state->search_cabac, 1, "split_search"); } if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) { // Add cost of intra part_size. - const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]); - cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N - FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "not split"); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]); split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN + state->search_cabac.cur_ctx = ctx; FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split"); + CABAC_BIN(&state->search_cabac, 1, "split_search"); } // If skip mode was selected for the block, skip further search. @@ -826,7 +848,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, FILE_BITS(bits, x, y, depth, "merged intra bits"); // Add the cost of coding no-split. uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); - const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]); cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // Add the cost of coding intra mode only once. @@ -845,6 +867,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, } else if (depth > 0) { // Copy this CU's mode all the way down for use in adjacent CUs mode // search. + memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac)); work_tree_copy_down(x_local, y_local, depth, work_tree); } } else if (depth >= 0 && depth < MAX_PU_DEPTH) { diff --git a/src/search_intra.c b/src/search_intra.c index bd259e22..ccf1ca91 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -98,11 +98,11 @@ static double get_cost(encoder_state_t * const state, // Add the offset bit costs of signaling 'luma and chroma use trskip', // versus signaling 'luma and chroma don't use trskip' to the SAD cost. - const cabac_ctx_t *ctx = &state->cabac.ctx.transform_skip_model_luma; + const cabac_ctx_t *ctx = &state->search_cabac.ctx.transform_skip_model_luma; double trskip_bits = CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0); if (state->encoder_control->chroma_format != KVZ_CSP_400) { - ctx = &state->cabac.ctx.transform_skip_model_chroma; + ctx = &state->search_cabac.ctx.transform_skip_model_chroma; trskip_bits += 2.0 * (CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0)); } @@ -269,7 +269,7 @@ static double search_intra_trdepth(encoder_state_t * const state, // Add bits for split_transform_flag = 1, because transform depth search bypasses // the normal recursion in the cost functions. if (depth >= 1 && depth <= 3) { - const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.trans_subdiv_model[5 - (6 - depth)]); tr_split_bit += CTX_ENTROPY_FBITS(ctx, 1); *bit_cost += tr_split_bit; } @@ -283,7 +283,7 @@ static double search_intra_trdepth(encoder_state_t * const state, if (state->encoder_control->chroma_format != KVZ_CSP_400) { const uint8_t tr_depth = depth - pred_cu->depth; - const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_chroma[tr_depth]); + const cabac_ctx_t *ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]); if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { cbf_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U)); } @@ -647,8 +647,9 @@ static int8_t search_intra_rdo(encoder_state_t * const state, } -double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds) +double kvz_luma_mode_bits(encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds) { + cabac_data_t* cabac = &state->search_cabac; double mode_bits; bool mode_in_preds = false; @@ -658,8 +659,23 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const } } - const cabac_ctx_t *ctx = &(state->cabac.ctx.intra_mode_model); + const cabac_ctx_t *ctx = &(cabac->ctx.intra_mode_model); mode_bits = CTX_ENTROPY_FBITS(ctx, mode_in_preds); + if (state->search_cabac.update) { + state->search_cabac.cur_ctx = ctx; + CABAC_BIN(&state->search_cabac, mode_in_preds, "prev_intra_luma_pred_flag_search"); + if(mode_in_preds) { + CABAC_BIN_EP(cabac, !(luma_mode == intra_preds[0]), "mpm_idx"); + if(luma_mode != intra_preds[0]) { + CABAC_BIN_EP(cabac, !(luma_mode == intra_preds[1]), "mpm_idx"); + } + } + else { + // This value should be transformed for actual coding, + // but here the value does not actually matter, just that we write 5 bits + CABAC_BINS_EP(cabac, luma_mode, 5, "rem_intra_luma_pred_mode"); + } + } if (mode_in_preds) { mode_bits += ((luma_mode == intra_preds[0]) ? 1 : 2); @@ -673,13 +689,22 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, int8_t luma_mode) { - const cabac_ctx_t *ctx = &(state->cabac.ctx.chroma_pred_model[0]); + cabac_data_t* cabac = &state->search_cabac; + const cabac_ctx_t *ctx = &(cabac->ctx.chroma_pred_model[0]); double mode_bits; if (chroma_mode == luma_mode) { mode_bits = CTX_ENTROPY_FBITS(ctx, 0); } else { mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1); } + if(cabac->update) { + cabac->cur_ctx = ctx; + CABAC_BIN(cabac, chroma_mode != luma_mode, "intra_chroma_pred_mode"); + if(chroma_mode != luma_mode) { + // Again it does not matter what we actually write here + CABAC_BINS_EP(cabac, 0, 2, "intra_chroma_pred_mode"); + } + } return mode_bits; }