From ec2f4e0bac18f9c5b077713168fb91495ab5e17a Mon Sep 17 00:00:00 2001 From: Ari Lemmetti Date: Fri, 26 Nov 2021 19:32:45 +0200 Subject: [PATCH] Use double for RD costs in most places --- src/rdo.c | 20 ++++++++--------- src/search.c | 4 ++-- src/search_inter.c | 53 +++++++++++++++++++++++----------------------- src/search_inter.h | 2 +- src/search_intra.c | 5 ++--- src/transform.c | 8 +++---- 6 files changed, 45 insertions(+), 47 deletions(-) diff --git a/src/rdo.c b/src/rdo.c index ec713603..5403fa61 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -1029,15 +1029,15 @@ uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state, * \returns int * Calculates Motion Vector cost and related costs using CABAC coding */ -uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state, - int x, - int y, - int mv_shift, - int16_t mv_cand[2][2], - inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, - int32_t ref_idx, - uint32_t *bitcost) +double kvz_calc_mvd_cost_cabac(const encoder_state_t * state, + int x, + int y, + int mv_shift, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + uint32_t *bitcost) { cabac_data_t state_cabac_copy; cabac_data_t* cabac; @@ -1174,7 +1174,7 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state, *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3); // Store bitcost before restoring cabac - return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5); + return *bitcost * state->lambda_sqrt; } void kvz_close_rdcost_outfiles(void) diff --git a/src/search.c b/src/search.c index 909e7aa5..4345ad75 100644 --- a/src/search.c +++ b/src/search.c @@ -462,8 +462,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, const encoder_control_t* ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; int cu_width = LCU_WIDTH >> depth; - double cost = MAX_INT; - double inter_zero_coeff_cost = MAX_INT; + double cost = MAX_DOUBLE; + double inter_zero_coeff_cost = MAX_DOUBLE; uint32_t inter_bitcost = MAX_INT; cu_info_t *cur_cu; diff --git a/src/search_inter.c b/src/search_inter.c index 216bbb49..1b705e4f 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -85,7 +85,7 @@ typedef struct { /** * \brief Cost of best_mv */ - uint32_t best_cost; + double best_cost; /** * \brief Bit cost of best_mv */ @@ -390,15 +390,15 @@ static int select_mv_cand(const encoder_state_t *state, } -static uint32_t calc_mvd_cost(const encoder_state_t *state, - int x, - int y, - int mv_shift, - int16_t mv_cand[2][2], - inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, - int32_t ref_idx, - uint32_t *bitcost) +static double calc_mvd_cost(const encoder_state_t *state, + int x, + int y, + int mv_shift, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + uint32_t *bitcost) { uint32_t temp_bitcost = 0; uint32_t merge_idx; @@ -428,7 +428,7 @@ static uint32_t calc_mvd_cost(const encoder_state_t *state, temp_bitcost += mvd_cost; } *bitcost = temp_bitcost; - return temp_bitcost*(int32_t)(state->lambda_sqrt + 0.5); + return temp_bitcost * state->lambda_sqrt; } @@ -624,7 +624,7 @@ static void tz_search(inter_search_info_t *info, vector2d_t extra_mv) const bool use_star_refinement = true; // enable step 4 mode 2 (only one mode will be executed) int best_dist = 0; - info->best_cost = UINT32_MAX; + info->best_cost = MAX_DOUBLE; // Select starting point from among merge candidates. These should // include both mv_cand vectors and (0, 0). @@ -732,7 +732,7 @@ static void hexagon_search(inter_search_info_t *info, vector2d_t extra_mv, uint3 { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; - info->best_cost = UINT32_MAX; + info->best_cost = MAX_DOUBLE; // Select starting point from among merge candidates. These should // include both mv_cand vectors and (0, 0). @@ -832,7 +832,7 @@ static void diamond_search(inter_search_info_t *info, vector2d_t extra_mv, uint3 {0, 0} }; - info->best_cost = UINT32_MAX; + info->best_cost = MAX_DOUBLE; // Select starting point from among merge candidates. These should // include both mv_cand vectors and (0, 0). @@ -997,11 +997,12 @@ static void search_frac(inter_search_info_t *info) // Set mv to pixel precision vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 }; - unsigned best_cost = UINT32_MAX; + double best_cost = MAX_DOUBLE; uint32_t best_bitcost = 0; uint32_t bitcosts[4] = { 0 }; unsigned best_index = 0; +// Keep this as unsigned until SAD / SATD functions are updated unsigned costs[4] = { 0 }; ALIGNED(64) kvz_pixel filtered[4][LCU_LUMA_SIZE]; @@ -1338,7 +1339,7 @@ static void search_pu_inter_ref(inter_search_info_t *info, default: break; } - info->best_cost = UINT32_MAX; + info->best_cost = MAX_DOUBLE; switch (cfg->ime_algorithm) { case KVZ_IME_TZ: @@ -1365,7 +1366,7 @@ static void search_pu_inter_ref(inter_search_info_t *info, if (cfg->fme_level > 0 && info->best_cost < *inter_cost) { search_frac(info); - } else if (info->best_cost < UINT32_MAX) { + } else if (info->best_cost < MAX_DOUBLE) { // Recalculate inter cost with SATD. info->best_cost = kvz_image_calc_satd( info->state->tile->frame->source, @@ -1376,7 +1377,7 @@ static void search_pu_inter_ref(inter_search_info_t *info, info->state->tile->offset_y + info->origin.y + (info->best_mv.y >> 2), info->width, info->height); - info->best_cost += info->best_bitcost * (int)(info->state->lambda_sqrt + 0.5); + info->best_cost += info->best_bitcost * info->state->lambda_sqrt; } mv = info->best_mv; @@ -1504,7 +1505,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info, const kvz_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)]; const kvz_pixel *src = &frame->source->y[x + y * frame->source->width]; - uint32_t cost = + double cost = kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, frame->source->width); uint32_t bitcost[2] = { 0, 0 }; @@ -1529,7 +1530,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info, merge_cand[j].ref[1] }; const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */; - cost += info->state->lambda_sqrt * extra_bits + 0.5; + cost += info->state->lambda_sqrt * extra_bits; if (cost < *inter_cost) { cur_cu->inter.mv_dir = 3; @@ -1630,7 +1631,7 @@ static void search_pu_inter(encoder_state_t * const state, double *inter_cost, uint32_t *inter_bitcost) { - *inter_cost = MAX_INT; + *inter_cost = MAX_DOUBLE; *inter_bitcost = MAX_INT; const kvz_config *cfg = &state->encoder_control->cfg; @@ -1826,7 +1827,7 @@ static void search_pu_inter(encoder_state_t * const state, const kvz_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)]; const kvz_pixel *src = &lcu->ref.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)]; - uint32_t cost = + double cost = kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH); uint32_t bitcost[2] = { 0, 0 }; @@ -1851,7 +1852,7 @@ static void search_pu_inter(encoder_state_t * const state, unipreds[1].inter.mv_ref[1] }; const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */; - cost += info.state->lambda_sqrt * extra_bits + 0.5; + cost += info.state->lambda_sqrt * extra_bits; if (cost < *inter_cost) { cur_cu->inter.mv_dir = 3; @@ -2056,14 +2057,14 @@ void kvz_search_cu_smp(encoder_state_t * const state, cur_pu->depth = depth; cur_pu->qp = state->qp; - double cost = MAX_INT; + double cost = MAX_DOUBLE; uint32_t bitcost = MAX_INT; search_pu_inter(state, x, y, depth, part_mode, i, lcu, &cost, &bitcost); - if (cost >= MAX_INT) { + if (cost == MAX_DOUBLE) { // Could not find any motion vector. - *inter_cost = MAX_INT; + *inter_cost = MAX_DOUBLE; *inter_bitcost = MAX_INT; return; } diff --git a/src/search_inter.h b/src/search_inter.h index 0d7fb81b..8b4b16f2 100644 --- a/src/search_inter.h +++ b/src/search_inter.h @@ -64,7 +64,7 @@ enum hpel_position { HPEL_POS_DIA = 2 }; -typedef uint32_t kvz_mvd_cost_func(const encoder_state_t *state, +typedef double kvz_mvd_cost_func(const encoder_state_t *state, int x, int y, int mv_shift, int16_t mv_cand[2][2], diff --git a/src/search_intra.c b/src/search_intra.c index 9cf984db..6d3aa141 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -524,9 +524,8 @@ static int8_t search_intra_rough(encoder_state_t * const state, // Add prediction mode coding cost as the last thing. We don't want this // affecting the halving search. - int lambda_cost = (int)(state->lambda_sqrt + 0.5); for (int mode_i = 0; mode_i < modes_selected; ++mode_i) { - costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds); + costs[mode_i] += state->lambda_sqrt * kvz_luma_mode_bits(state, modes[mode_i], intra_preds); } #undef PARALLEL_BLKS @@ -595,7 +594,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state, for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds); - costs[rdo_mode] = rdo_bitcost * (int)(state->lambda + 0.5); + costs[rdo_mode] = rdo_bitcost * state->lambda; // Perform transform split search and save mode RD cost for the best one. cu_info_t pred_cu; diff --git a/src/transform.c b/src/transform.c index f8e6325f..7a339e27 100644 --- a/src/transform.c +++ b/src/transform.c @@ -250,25 +250,23 @@ int kvz_quantize_residual_trskip( struct { kvz_pixel rec[4*4]; coeff_t coeff[4*4]; - uint32_t cost; + double cost; int has_coeffs; } skip, noskip, *best; - - const int bit_cost = (int)(state->lambda + 0.5); noskip.has_coeffs = kvz_quantize_residual( state, cur_cu, width, color, scan_order, 0, in_stride, 4, ref_in, pred_in, noskip.rec, noskip.coeff, false); noskip.cost = kvz_pixels_calc_ssd(ref_in, noskip.rec, in_stride, 4, 4); - noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost; + noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * state->lambda; skip.has_coeffs = kvz_quantize_residual( state, cur_cu, width, color, scan_order, 1, in_stride, 4, ref_in, pred_in, skip.rec, skip.coeff, false); skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, 4, 4); - skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost; + skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * state->lambda; if (noskip.cost <= skip.cost) { *trskip_out = 0;