Use double for RD costs in most places

2024-11-27 19:24:06 +00:00 · 2021-11-26 19:32:45 +02:00 · 2021-11-26 19:32:45 +02:00 · ec2f4e0bac
parent d6b2ec5814
commit ec2f4e0bac
6 changed files with 45 additions and 47 deletions
--- a/src/rdo.c
+++ b/src/rdo.c
@ -1029,15 +1029,15 @@ uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
 * \returns int
 * Calculates Motion Vector cost and related costs using CABAC coding
 */
-uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
-                                 int x,
-                                 int y,
-                                 int mv_shift,
-                                 int16_t mv_cand[2][2],
-                                 inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
-                                 int16_t num_cand,
-                                 int32_t ref_idx,
-                                 uint32_t *bitcost)
+double kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
+                               int x,
+                               int y,
+                               int mv_shift,
+                               int16_t mv_cand[2][2],
+                               inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
+                               int16_t num_cand,
+                               int32_t ref_idx,
+                               uint32_t *bitcost)
 {
  cabac_data_t state_cabac_copy;
  cabac_data_t* cabac;
@ -1174,7 +1174,7 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
  *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);

  // Store bitcost before restoring cabac
-  return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
+  return *bitcost * state->lambda_sqrt;
 }

 void kvz_close_rdcost_outfiles(void)
--- a/src/search.c
+++ b/src/search.c
@ -462,8 +462,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
  const encoder_control_t* ctrl = state->encoder_control;
  const videoframe_t * const frame = state->tile->frame;
  int cu_width = LCU_WIDTH >> depth;
-  double cost = MAX_INT;
-  double inter_zero_coeff_cost = MAX_INT;
+  double cost = MAX_DOUBLE;
+  double inter_zero_coeff_cost = MAX_DOUBLE;
  uint32_t inter_bitcost = MAX_INT;
  cu_info_t *cur_cu;

--- a/src/search_inter.c
+++ b/src/search_inter.c
@ -85,7 +85,7 @@ typedef struct {
  /**
   * \brief Cost of best_mv
   */
-  uint32_t best_cost;
+  double best_cost;
  /**
   * \brief Bit cost of best_mv
   */
@ -390,15 +390,15 @@ static int select_mv_cand(const encoder_state_t *state,
 }


-static uint32_t calc_mvd_cost(const encoder_state_t *state,
-                              int x,
-                              int y,
-                              int mv_shift,
-                              int16_t mv_cand[2][2],
-                              inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
-                              int16_t num_cand,
-                              int32_t ref_idx,
-                              uint32_t *bitcost)
+static double calc_mvd_cost(const encoder_state_t *state,
+                            int x,
+                            int y,
+                            int mv_shift,
+                            int16_t mv_cand[2][2],
+                            inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
+                            int16_t num_cand,
+                            int32_t ref_idx,
+                            uint32_t *bitcost)
 {
  uint32_t temp_bitcost = 0;
  uint32_t merge_idx;
@ -428,7 +428,7 @@ static uint32_t calc_mvd_cost(const encoder_state_t *state,
    temp_bitcost += mvd_cost;
  }
  *bitcost = temp_bitcost;
-  return temp_bitcost*(int32_t)(state->lambda_sqrt + 0.5);
+  return temp_bitcost * state->lambda_sqrt;
 }


@ -624,7 +624,7 @@ static void tz_search(inter_search_info_t *info, vector2d_t extra_mv)
  const bool use_star_refinement = true;   // enable step 4 mode 2 (only one mode will be executed)

  int best_dist = 0;
-  info->best_cost = UINT32_MAX;
+  info->best_cost = MAX_DOUBLE;

  // Select starting point from among merge candidates. These should
  // include both mv_cand vectors and (0, 0).
@ -732,7 +732,7 @@ static void hexagon_search(inter_search_info_t *info, vector2d_t extra_mv, uint3
      { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 }
  };

-  info->best_cost = UINT32_MAX;
+  info->best_cost = MAX_DOUBLE;

  // Select starting point from among merge candidates. These should
  // include both mv_cand vectors and (0, 0).
@ -832,7 +832,7 @@ static void diamond_search(inter_search_info_t *info, vector2d_t extra_mv, uint3
    {0, 0}
  };

-  info->best_cost = UINT32_MAX;
+  info->best_cost = MAX_DOUBLE;

  // Select starting point from among merge candidates. These should
  // include both mv_cand vectors and (0, 0).
@ -997,11 +997,12 @@ static void search_frac(inter_search_info_t *info)
  // Set mv to pixel precision
  vector2d_t mv = { info->best_mv.x >> 2, info->best_mv.y >> 2 };

-  unsigned best_cost = UINT32_MAX;
+  double best_cost = MAX_DOUBLE;
  uint32_t best_bitcost = 0;
  uint32_t bitcosts[4] = { 0 };
  unsigned best_index = 0;

+// Keep this as unsigned until SAD / SATD functions are updated
  unsigned costs[4] = { 0 };

  ALIGNED(64) kvz_pixel filtered[4][LCU_LUMA_SIZE];
@ -1338,7 +1339,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
    default: break;
  }

-  info->best_cost = UINT32_MAX;
+  info->best_cost = MAX_DOUBLE;

  switch (cfg->ime_algorithm) {
    case KVZ_IME_TZ:
@ -1365,7 +1366,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
  if (cfg->fme_level > 0 && info->best_cost < *inter_cost) {
    search_frac(info);

-  } else if (info->best_cost < UINT32_MAX) {
+  } else if (info->best_cost < MAX_DOUBLE) {
    // Recalculate inter cost with SATD.
    info->best_cost = kvz_image_calc_satd(
        info->state->tile->frame->source,
@ -1376,7 +1377,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
        info->state->tile->offset_y + info->origin.y + (info->best_mv.y >> 2),
        info->width,
        info->height);
-    info->best_cost += info->best_bitcost * (int)(info->state->lambda_sqrt + 0.5);
+    info->best_cost += info->best_bitcost * info->state->lambda_sqrt;
  }

  mv = info->best_mv;
@ -1504,7 +1505,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,

    const kvz_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
    const kvz_pixel *src = &frame->source->y[x + y * frame->source->width];
-    uint32_t cost =
+    double cost =
      kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, frame->source->width);

    uint32_t bitcost[2] = { 0, 0 };
@ -1529,7 +1530,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
      merge_cand[j].ref[1]
    };
    const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */;
-    cost += info->state->lambda_sqrt * extra_bits + 0.5;
+    cost += info->state->lambda_sqrt * extra_bits;

    if (cost < *inter_cost) {
      cur_cu->inter.mv_dir = 3;
@ -1630,7 +1631,7 @@ static void search_pu_inter(encoder_state_t * const state,
                            double *inter_cost,
                            uint32_t *inter_bitcost)
 {
-  *inter_cost = MAX_INT;
+  *inter_cost = MAX_DOUBLE;
  *inter_bitcost = MAX_INT;

  const kvz_config *cfg = &state->encoder_control->cfg;
@ -1826,7 +1827,7 @@ static void search_pu_inter(encoder_state_t * const state,

      const kvz_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
      const kvz_pixel *src = &lcu->ref.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
-      uint32_t cost =
+      double cost =
        kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH);

      uint32_t bitcost[2] = { 0, 0 };
@ -1851,7 +1852,7 @@ static void search_pu_inter(encoder_state_t * const state,
        unipreds[1].inter.mv_ref[1]
      };
      const int extra_bits = mv_ref_coded[0] + mv_ref_coded[1] + 2 /* mv dir cost */;
-      cost += info.state->lambda_sqrt * extra_bits + 0.5;
+      cost += info.state->lambda_sqrt * extra_bits;

      if (cost < *inter_cost) {
        cur_cu->inter.mv_dir = 3;
@ -2056,14 +2057,14 @@ void kvz_search_cu_smp(encoder_state_t * const state,
    cur_pu->depth     = depth;
    cur_pu->qp        = state->qp;

-    double cost      = MAX_INT;
+    double cost      = MAX_DOUBLE;
    uint32_t bitcost = MAX_INT;

    search_pu_inter(state, x, y, depth, part_mode, i, lcu, &cost, &bitcost);

-    if (cost >= MAX_INT) {
+    if (cost == MAX_DOUBLE) {
      // Could not find any motion vector.
-      *inter_cost    = MAX_INT;
+      *inter_cost    = MAX_DOUBLE;
      *inter_bitcost = MAX_INT;
      return;
    }
--- a/src/search_inter.h
+++ b/src/search_inter.h
@ -64,7 +64,7 @@ enum hpel_position {
  HPEL_POS_DIA = 2
 };

-typedef uint32_t kvz_mvd_cost_func(const encoder_state_t *state,
+typedef double kvz_mvd_cost_func(const encoder_state_t *state,
                                  int x, int y,
                                  int mv_shift,
                                  int16_t mv_cand[2][2],
--- a/src/search_intra.c
+++ b/src/search_intra.c
@ -524,9 +524,8 @@ static int8_t search_intra_rough(encoder_state_t * const state,

  // Add prediction mode coding cost as the last thing. We don't want this
  // affecting the halving search.
-  int lambda_cost = (int)(state->lambda_sqrt + 0.5);
  for (int mode_i = 0; mode_i < modes_selected; ++mode_i) {
-    costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds);
+    costs[mode_i] += state->lambda_sqrt * kvz_luma_mode_bits(state, modes[mode_i], intra_preds);
  }

  #undef PARALLEL_BLKS
@ -595,7 +594,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,

  for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
    int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds);
-    costs[rdo_mode] = rdo_bitcost * (int)(state->lambda + 0.5);
+    costs[rdo_mode] = rdo_bitcost * state->lambda;

    // Perform transform split search and save mode RD cost for the best one.
    cu_info_t pred_cu;
--- a/src/transform.c
+++ b/src/transform.c
@ -250,25 +250,23 @@ int kvz_quantize_residual_trskip(
  struct {
    kvz_pixel rec[4*4];
    coeff_t coeff[4*4];
-    uint32_t cost;
+    double cost;
    int has_coeffs;
  } skip, noskip, *best;
  
-  const int bit_cost = (int)(state->lambda + 0.5);
-  
  noskip.has_coeffs = kvz_quantize_residual(
      state, cur_cu, width, color, scan_order,
      0, in_stride, 4,
      ref_in, pred_in, noskip.rec, noskip.coeff, false);
  noskip.cost = kvz_pixels_calc_ssd(ref_in, noskip.rec, in_stride, 4, 4);
-  noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost;
+  noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * state->lambda;

  skip.has_coeffs = kvz_quantize_residual(
    state, cur_cu, width, color, scan_order,
    1, in_stride, 4,
    ref_in, pred_in, skip.rec, skip.coeff, false);
  skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, 4, 4);
-  skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost;
+  skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * state->lambda;

  if (noskip.cost <= skip.cost) {
    *trskip_out = 0;