Use double for RD costs in most places

This commit is contained in:
Ari Lemmetti 2021-11-26 19:32:45 +02:00
parent d6b2ec5814
commit ec2f4e0bac
6 changed files with 45 additions and 47 deletions

View file

@ -1029,15 +1029,15 @@ uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding
*/
uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
int16_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
double kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
int16_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
{
cabac_data_t state_cabac_copy;
cabac_data_t* cabac;
@ -1174,7 +1174,7 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
*bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
// Store bitcost before restoring cabac
return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
return *bitcost * state->lambda_sqrt;
}
void kvz_close_rdcost_outfiles(void)

View file

@ -462,8 +462,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
const encoder_control_t* ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
int cu_width = LCU_WIDTH >> depth;
double cost = MAX_INT;
double inter_zero_coeff_cost = MAX_INT;
double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE;
uint32_t inter_bitcost = MAX_INT;
cu_info_t *cur_cu;

View file

@ -85,7 +85,7 @@ typedef struct {
/**
* \brief Cost of best_mv
*/
uint32_t best_cost;
double best_cost;
/**
* \brief Bit cost of best_mv
*/
@ -390,15 +390,15 @@ static int select_mv_cand(const encoder_state_t *state,
}
static uint32_t calc_mvd_cost(const encoder_state_t *state,
int x,
int y,
int mv_shift,
int16_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
static double calc_mvd_cost(const encoder_state_t *state,
int x,
int y,
int mv_shift,
int16_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
{
uint32_t temp_bitcost = 0;
uint32_t merge_idx;
@ -428,7 +428,7 @@ static uint32_t calc_mvd_cost(const encoder_state_t *state,
temp_bitcost += mvd_cost;
}
*bitcost = temp_bitcost;
return temp_bitcost*(int32_t)(state->lambda_sqrt + 0.5);
return temp_bitcost * state->lambda_sqrt;
}
@ -624,7 +624,7 @@ static void tz_search(inter_search_info_t *info, vector2d_t extra_mv)
const bool use_star_refinement = true; // enable step 4 mode 2 (only one mode will be executed)
int best_dist = 0;
info->best_cost = UINT32_MAX;
info->best_cost = MAX_DOUBLE;
// Select starting point from among merge candidates. These should
// include both mv_cand vectors and (0, 0).
@ -732,7 +732,7 @@ static void hexagon_search(inter_search_info_t *info, vector2d_t extra_mv, uint3
{ -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 }
};
info->best_cost = UINT32_MAX;
info->best_cost = MAX_DOUBLE;
// Select starting point from among merge candidates. These should
// include both mv_cand vectors and (0, 0).
@ -832,7 +832,7 @@ static void diamond_search(inter_search_info_t *info, vector2d_t extra_mv, uint3
{0, 0}
};
info->best_cost = UINT32_MAX;
info->best_cost = MAX_DOUBLE;
// Select starting point from among merge candidates. These should
// include both mv_cand vectors and (0, 0).
@ -997,11 +997,12 @@ static void search_frac(inter_search_info_t *info)
// Set mv to pixel precision
vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 };
unsigned best_cost = UINT32_MAX;
double best_cost = MAX_DOUBLE;
uint32_t best_bitcost = 0;
uint32_t bitcosts[4] = { 0 };
unsigned best_index = 0;
// Keep this as unsigned until SAD / SATD functions are updated
unsigned costs[4] = { 0 };
ALIGNED(64) kvz_pixel filtered[4][LCU_LUMA_SIZE];
@ -1338,7 +1339,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
default: break;
}
info->best_cost = UINT32_MAX;
info->best_cost = MAX_DOUBLE;
switch (cfg->ime_algorithm) {
case KVZ_IME_TZ:
@ -1365,7 +1366,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
if (cfg->fme_level > 0 && info->best_cost < *inter_cost) {
search_frac(info);
} else if (info->best_cost < UINT32_MAX) {
} else if (info->best_cost < MAX_DOUBLE) {
// Recalculate inter cost with SATD.
info->best_cost = kvz_image_calc_satd(
info->state->tile->frame->source,
@ -1376,7 +1377,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
info->state->tile->offset_y + info->origin.y + (info->best_mv.y >> 2),
info->width,
info->height);
info->best_cost += info->best_bitcost * (int)(info->state->lambda_sqrt + 0.5);
info->best_cost += info->best_bitcost * info->state->lambda_sqrt;
}
mv = info->best_mv;
@ -1504,7 +1505,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
const kvz_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
const kvz_pixel *src = &frame->source->y[x + y * frame->source->width];
uint32_t cost =
double cost =
kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, frame->source->width);
uint32_t bitcost[2] = { 0, 0 };
@ -1529,7 +1530,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
merge_cand[j].ref[1]
};
const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */;
cost += info->state->lambda_sqrt * extra_bits + 0.5;
cost += info->state->lambda_sqrt * extra_bits;
if (cost < *inter_cost) {
cur_cu->inter.mv_dir = 3;
@ -1630,7 +1631,7 @@ static void search_pu_inter(encoder_state_t * const state,
double *inter_cost,
uint32_t *inter_bitcost)
{
*inter_cost = MAX_INT;
*inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT;
const kvz_config *cfg = &state->encoder_control->cfg;
@ -1826,7 +1827,7 @@ static void search_pu_inter(encoder_state_t * const state,
const kvz_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
const kvz_pixel *src = &lcu->ref.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
uint32_t cost =
double cost =
kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH);
uint32_t bitcost[2] = { 0, 0 };
@ -1851,7 +1852,7 @@ static void search_pu_inter(encoder_state_t * const state,
unipreds[1].inter.mv_ref[1]
};
const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */;
cost += info.state->lambda_sqrt * extra_bits + 0.5;
cost += info.state->lambda_sqrt * extra_bits;
if (cost < *inter_cost) {
cur_cu->inter.mv_dir = 3;
@ -2056,14 +2057,14 @@ void kvz_search_cu_smp(encoder_state_t * const state,
cur_pu->depth = depth;
cur_pu->qp = state->qp;
double cost = MAX_INT;
double cost = MAX_DOUBLE;
uint32_t bitcost = MAX_INT;
search_pu_inter(state, x, y, depth, part_mode, i, lcu, &cost, &bitcost);
if (cost >= MAX_INT) {
if (cost == MAX_DOUBLE) {
// Could not find any motion vector.
*inter_cost = MAX_INT;
*inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT;
return;
}

View file

@ -64,7 +64,7 @@ enum hpel_position {
HPEL_POS_DIA = 2
};
typedef uint32_t kvz_mvd_cost_func(const encoder_state_t *state,
typedef double kvz_mvd_cost_func(const encoder_state_t *state,
int x, int y,
int mv_shift,
int16_t mv_cand[2][2],

View file

@ -524,9 +524,8 @@ static int8_t search_intra_rough(encoder_state_t * const state,
// Add prediction mode coding cost as the last thing. We don't want this
// affecting the halving search.
int lambda_cost = (int)(state->lambda_sqrt + 0.5);
for (int mode_i = 0; mode_i < modes_selected; ++mode_i) {
costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds);
costs[mode_i] += state->lambda_sqrt * kvz_luma_mode_bits(state, modes[mode_i], intra_preds);
}
#undef PARALLEL_BLKS
@ -595,7 +594,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds);
costs[rdo_mode] = rdo_bitcost * (int)(state->lambda + 0.5);
costs[rdo_mode] = rdo_bitcost * state->lambda;
// Perform transform split search and save mode RD cost for the best one.
cu_info_t pred_cu;

View file

@ -250,25 +250,23 @@ int kvz_quantize_residual_trskip(
struct {
kvz_pixel rec[4*4];
coeff_t coeff[4*4];
uint32_t cost;
double cost;
int has_coeffs;
} skip, noskip, *best;
const int bit_cost = (int)(state->lambda + 0.5);
noskip.has_coeffs = kvz_quantize_residual(
state, cur_cu, width, color, scan_order,
0, in_stride, 4,
ref_in, pred_in, noskip.rec, noskip.coeff, false);
noskip.cost = kvz_pixels_calc_ssd(ref_in, noskip.rec, in_stride, 4, 4);
noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost;
noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * state->lambda;
skip.has_coeffs = kvz_quantize_residual(
state, cur_cu, width, color, scan_order,
1, in_stride, 4,
ref_in, pred_in, skip.rec, skip.coeff, false);
skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, 4, 4);
skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost;
skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * state->lambda;
if (noskip.cost <= skip.cost) {
*trskip_out = 0;