From 6a2658cc74226ccf962f42fef6579415bdd9f557 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 21 Aug 2015 09:11:30 +0300 Subject: [PATCH] Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors Conflicts: src/rdo.h src/search_inter.c Conflicts: src/rdo.c --- src/rdo.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ src/rdo.h | 9 +- src/search_inter.c | 19 ++-- 3 files changed, 247 insertions(+), 11 deletions(-) diff --git a/src/rdo.c b/src/rdo.c index eb687ef5..b4cc0c69 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -32,6 +32,7 @@ #include "cabac.h" #include "transform.h" #include "strategies/strategies-quant.h" +#include "inter.h" #define QUANT_SHIFT 14 @@ -890,3 +891,232 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff width, coef, dest_coeff); } } + +static uint32_t get_ep_ex_golomb_bitcost(uint32_t symbol, uint32_t count) { + int32_t num_bins = 0; + while (symbol >= (uint32_t)(1 << count)) { + ++num_bins; + symbol -= 1 << count; + ++count; + } + num_bins++; + + return num_bins; +} + +static uint32_t get_mvd_coding_cost(vector2d_t *mvd) { + uint32_t bitcost = 0; + const int32_t mvd_hor = mvd->x; + const int32_t mvd_ver = mvd->y; + const int8_t hor_abs_gr0 = mvd_hor != 0; + const int8_t ver_abs_gr0 = mvd_ver != 0; + const uint32_t mvd_hor_abs = abs(mvd_hor); + const uint32_t mvd_ver_abs = abs(mvd_ver); + + // Greater than 0 for x/y + bitcost += 2; + + if (hor_abs_gr0) { + if (mvd_hor_abs > 1) { + bitcost += get_ep_ex_golomb_bitcost(mvd_hor_abs - 2, 1) - 2; // TODO: tune the costs + } + // Greater than 1 + sign + bitcost += 2; + } + + if (ver_abs_gr0) { + if (mvd_ver_abs > 1) { + bitcost += get_ep_ex_golomb_bitcost(mvd_ver_abs - 2, 1) - 2; // TODO: tune the costs + } + // Greater than 1 + sign + bitcost += 2; + } + + return bitcost; +} + +int kvz_calc_mvd_cost_cabac(const encoder_state_t * const state, int x, int y, int mv_shift, + int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, int32_t ref_idx, uint32_t *bitcost) { + + cabac_data_t state_cabac_copy; + cabac_data_t* cabac; + uint32_t temp_bitcost = 0; + uint32_t merge_idx; + int cand1_cost, cand2_cost; + vector2d_t mvd_temp1, mvd_temp2, mvd; + int8_t merged = 0; + int8_t cur_mv_cand = 0; + + x <<= mv_shift; + y <<= mv_shift; + + // Check every candidate to find a match + for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) { + if (merge_cand[merge_idx].dir == 3) continue; + if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x && + merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y && + merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) { + merged = 1; + break; + } + } + + if (!merged) { + mvd_temp1.x = x - mv_cand[0][0]; + mvd_temp1.y = y - mv_cand[0][1]; + cand1_cost = get_mvd_coding_cost(&mvd_temp1); + + mvd_temp2.x = x - mv_cand[1][0]; + mvd_temp2.y = y - mv_cand[1][1]; + cand2_cost = get_mvd_coding_cost(&mvd_temp2); + + // Select candidate 1 if it has lower cost + if (cand2_cost < cand1_cost) { + cur_mv_cand = 1; + mvd = mvd_temp2; + } else { + mvd = mvd_temp1; + } + } + + // Store cabac state and contexts + memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t)); + + // Clear bytes and bits and set mode to "count" + state_cabac_copy.only_count = 1; + state_cabac_copy.num_buffered_bytes = 0; + state_cabac_copy.bits_left = 23; + + cabac = &state_cabac_copy; + cabac->stream = NULL; + + cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model); + + CABAC_BIN(cabac, merged, "MergeFlag"); + num_cand = MRG_MAX_NUM_CANDS; + if (merged) { //merge + if (num_cand > 1) { + int32_t ui; + for (ui = 0; ui < num_cand - 1; ui++) { + int32_t symbol = (ui != merge_idx); + if (ui == 0) { + cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model); + CABAC_BIN(cabac, symbol, "MergeIndex"); + } else { + CABAC_BIN_EP(cabac, symbol, "MergeIndex"); + } + if (symbol == 0) break; + } + } + } else { + uint32_t ref_list_idx; + uint32_t j; + int ref_list[2] = { 0, 0 }; + for (j = 0; j < state->global->ref->used_size; j++) { + if (state->global->ref->pocs[j] < state->global->poc) { + ref_list[0]++; + } else { + ref_list[1]++; + } + } + + // Void TEncSbac::codeInterDir( TComDataCU* pcCU, UInt uiAbsPartIdx ) + /* + if (state->global->slicetype == SLICE_B) { + // Code Inter Dir + uint8_t inter_dir = cur_cu->inter.mv_dir - 1; + uint8_t ctx = depth; + + + if (cur_cu->part_size == SIZE_2Nx2N || (LCU_WIDTH >> depth) != 8) { + cabac->cur_ctx = &(cabac->ctx.inter_dir[ctx]); + CABAC_BIN(cabac, (inter_dir == 2), "inter_pred_idc"); + } + if (inter_dir < 2) { + cabac->cur_ctx = &(cabac->ctx.inter_dir[4]); + CABAC_BIN(cabac, inter_dir, "inter_pred_idc"); + } + }*/ + + for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) { + if (/*cur_cu->inter.mv_dir*/ 1 & (1 << ref_list_idx)) { + if (ref_list[ref_list_idx] > 1) { + // parseRefFrmIdx + int32_t ref_frame = ref_idx /*cur_cu->inter.mv_ref_coded[ref_list_idx]*/; + + cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]); + CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX"); + + if (ref_frame > 0) { + int32_t i; + int32_t ref_num = ref_list[ref_list_idx] - 2; + + cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]); + ref_frame--; + + for (i = 0; i < ref_num; ++i) { + const uint32_t symbol = (i == ref_frame) ? 0 : 1; + + if (i == 0) { + CABAC_BIN(cabac, symbol, "ref_idx_lX"); + } else { + CABAC_BIN_EP(cabac, symbol, "ref_idx_lX"); + } + if (symbol == 0) break; + } + } + } + + if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ state->global->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) { + const int32_t mvd_hor = mvd.x; + const int32_t mvd_ver = mvd.y; + const int8_t hor_abs_gr0 = mvd_hor != 0; + const int8_t ver_abs_gr0 = mvd_ver != 0; + const uint32_t mvd_hor_abs = abs(mvd_hor); + const uint32_t mvd_ver_abs = abs(mvd_ver); + + cabac->cur_ctx = &(cabac->ctx.cu_mvd_model[0]); + CABAC_BIN(cabac, (mvd_hor != 0), "abs_mvd_greater0_flag_hor"); + CABAC_BIN(cabac, (mvd_ver != 0), "abs_mvd_greater0_flag_ver"); + + cabac->cur_ctx = &(cabac->ctx.cu_mvd_model[1]); + + if (hor_abs_gr0) { + CABAC_BIN(cabac, (mvd_hor_abs > 1), "abs_mvd_greater1_flag_hor"); + } + + if (ver_abs_gr0) { + CABAC_BIN(cabac, (mvd_ver_abs > 1), "abs_mvd_greater1_flag_ver"); + } + + if (hor_abs_gr0) { + if (mvd_hor_abs > 1) { + cabac_write_ep_ex_golomb(cabac, mvd_hor_abs - 2, 1); + } + + CABAC_BIN_EP(cabac, (mvd_hor > 0) ? 0 : 1, "mvd_sign_flag_hor"); + } + + if (ver_abs_gr0) { + if (mvd_ver_abs > 1) { + cabac_write_ep_ex_golomb(cabac, mvd_ver_abs - 2, 1); + } + + CABAC_BIN_EP(cabac, (mvd_ver > 0) ? 0 : 1, "mvd_sign_flag_ver"); + } + } + + // Signal which candidate MV to use + cabac_write_unary_max_symbol(cabac, cabac->ctx.mvp_idx_model, /*mv_cand[ref_list_idx]*/cur_mv_cand, 1, + AMVP_MAX_NUM_CANDS - 1); + } + + } + } + + *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3); + + // Store bitcost before restoring cabac + return *bitcost * (int32_t)(state->global->cur_lambda_cost_sqrt + 0.5); +} diff --git a/src/rdo.h b/src/rdo.h index c3364e99..5824a9b4 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -29,6 +29,7 @@ #include "encoder.h" #include "encoderstate.h" +#include "inter.h" extern const uint32_t kvz_g_go_rice_range[5]; @@ -52,7 +53,11 @@ uint32_t kvz_get_coded_level(encoder_state_t * state, double* coded_cost, double uint32_t c1_idx, uint32_t c2_idx, int32_t q_bits,double temp, int8_t last, int8_t type); -extern const float kvz_f_entropy_bits[128]; -#define CTX_ENTROPY_FBITS(ctx,val) kvz_f_entropy_bits[(ctx)->uc_state ^ (val)] +int kvz_calc_mvd_cost_cabac(const encoder_state_t * const state, int x, int y, int mv_shift, + int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, int32_t ref_idx, uint32_t *bitcost); + +extern const float f_entropy_bits[128]; +#define CTX_ENTROPY_FBITS(ctx,val) f_entropy_bits[(ctx)->uc_state ^ (val)] #endif diff --git a/src/search_inter.c b/src/search_inter.c index 29f4ed83..73b1718d 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -29,6 +29,7 @@ #include "inter.h" #include "strategies/strategies-picture.h" #include "strategies/strategies-ipol.h" +#include "rdo.h" // Temporarily for debugging. @@ -553,7 +554,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, block_width, block_width, max_lcu_below); - best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + best_cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); best_bitcost = bitcost; best_index = num_cand; @@ -577,7 +578,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -610,7 +611,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, @@ -653,7 +654,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, @@ -685,7 +686,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, @@ -855,7 +856,7 @@ static unsigned search_frac(const encoder_state_t * const state, cost = satd(tmp_pic,tmp_filtered); - cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -891,7 +892,7 @@ static unsigned search_frac(const encoder_state_t * const state, cost = satd(tmp_pic,tmp_filtered); - cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -1109,8 +1110,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d cost = satd(tmp_pic, tmp_block); - cost += calc_mvd_cost(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]); - cost += calc_mvd_cost(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]); + cost += calc_mvd_cost_cabac(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]); + cost += calc_mvd_cost_cabac(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]); if (cost < cur_cu->inter.cost) {