Preparation for accurately counting inter bits

2024-11-27 19:24:06 +00:00 · 2021-12-13 10:43:19 +02:00 · 2021-12-13 10:43:19 +02:00 · d8648fe1de
parent 311fceade7
commit d8648fe1de
11 changed files with 96 additions and 80 deletions
--- a/src/encode_coding_tree.c
+++ b/src/encode_coding_tree.c
@ -308,11 +308,11 @@ static void encode_transform_coeff(encoder_state_t * const state,
  }
 }

-static void encode_inter_prediction_unit(encoder_state_t * const state,
+void kvz_encode_inter_prediction_unit(encoder_state_t * const state,
                                         cabac_data_t * const cabac,
                                         const cu_info_t * const cur_cu,
                                         int x, int y, int width, int height,
-                                         int depth)
+                                         int depth, lcu_t* lcu)
 {
  // Mergeflag
  int16_t num_cand = 0;
@ -385,10 +385,20 @@ static void encode_inter_prediction_unit(encoder_state_t * const state,
      if (state->frame->ref_list != REF_PIC_LIST_1 || cur_cu->inter.mv_dir != 3) {

        int16_t mv_cand[2][2];
-        kvz_inter_get_mv_cand_cua(
+        if (lcu) {
+          kvz_inter_get_mv_cand(
            state, 
            x, y, width, height,
-            mv_cand, cur_cu, ref_list_idx);
+            mv_cand, cur_cu, 
+            lcu, ref_list_idx);
+        }
+        else {
+          kvz_inter_get_mv_cand_cua(
+            state,
+            x, y, width, height,
+            mv_cand, cur_cu, ref_list_idx
+          );
+        }

        uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx);
        const int32_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0];
@ -855,7 +865,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
      const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i);
      const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y);

-      encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth);
+      kvz_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, NULL);
    }

    {
--- a/src/encode_coding_tree.h
+++ b/src/encode_coding_tree.h
@ -51,6 +51,12 @@ void kvz_encode_mvd(encoder_state_t * const state,
                    int32_t mvd_hor,
                    int32_t mvd_ver);

+void kvz_encode_inter_prediction_unit(encoder_state_t* const state,
+  cabac_data_t* const cabac,
+  const cu_info_t* const cur_cu,
+  int x, int y, int width, int height,
+  int depth, lcu_t* lcu);
+
 void kvz_encode_last_significant_xy(cabac_data_t * const cabac,
                                    uint8_t lastpos_x, uint8_t lastpos_y,
                                    uint8_t width, uint8_t height,
--- a/src/fast_coeff_cost.c
+++ b/src/fast_coeff_cost.c
@ -40,7 +40,7 @@ static uint16_t to_q88(float f)
  return (uint16_t)(f * 256.0f + 0.5f);
 }

-static uint64_t to_4xq88(const float f[4])
+static uint64_t to_4xq88(const double f[4])
 {
  int i;
  uint64_t result = 0;
@ -58,9 +58,9 @@ int kvz_fast_coeff_table_parse(fast_coeff_table_t *fast_coeff_table, FILE *fast_
  uint64_t *wts_by_qp = fast_coeff_table->wts_by_qp;

  for (i = 0; i < MAX_FAST_COEFF_COST_QP; i++) {
-    float curr_wts[4];
+    double curr_wts[4];

-    if (fscanf(fast_coeff_table_f, "%f %f %f %f\n", curr_wts + 0,
+    if (fscanf(fast_coeff_table_f, "%lf %lf %lf %lf\n", curr_wts + 0,
                                                    curr_wts + 1,
                                                    curr_wts + 2,
                                                    curr_wts + 3) != 4) {
--- a/src/fast_coeff_cost.h
+++ b/src/fast_coeff_cost.h
@ -45,7 +45,7 @@ typedef struct {

 // Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from
 // 0 to MAX_FAST_COEFF_COST_QP
-static const float default_fast_coeff_cost_wts[][4] = {
+static const double default_fast_coeff_cost_wts[][4] = {
  // Just extend it by stretching the first actual values..
  {0.164240, 4.161530, 3.509033, 6.928047},
  {0.164240, 4.161530, 3.509033, 6.928047},
--- a/src/global.h
+++ b/src/global.h
@ -110,7 +110,7 @@

 typedef int16_t coeff_t;

-// #define VERBOSE 1
+#define VERBOSE 1

 /* CONFIG VARIABLES */

--- a/src/inter.c
+++ b/src/inter.c
@ -1228,7 +1228,7 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
                                        int32_t width,
                                        int32_t height,
                                        const merge_candidates_t *merge_cand,
-                                        const cu_info_t *cur_cu,
+                                        const cu_info_t * const cur_cu,
                                        int8_t reflist,
                                        int16_t mv_cand[2][2])
 {
@ -1335,7 +1335,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
                           int32_t width,
                           int32_t height,
                           int16_t mv_cand[2][2],
-                           cu_info_t* cur_cu,
+                           const cu_info_t  * const cur_cu,
                           lcu_t *lcu,
                           int8_t reflist)
 {
--- a/src/inter.h
+++ b/src/inter.h
@ -88,7 +88,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
                           int32_t width,
                           int32_t height,
                           int16_t mv_cand[2][2],
-                           cu_info_t* cur_cu,
+                           const cu_info_t* cur_cu,
                           lcu_t *lcu,
                           int8_t reflist);

--- a/src/search.c
+++ b/src/search.c
@ -318,7 +318,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
      // However, if we have different tr_depth, the bits cannot be written in correct
      // order anyways so do not touch the chroma cbf here.
      if (state->encoder_control->chroma_format != KVZ_CSP_400) {
-        const cabac_ctx_t* cr_ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]);
+        cabac_ctx_t* cr_ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]);
        cabac->cur_ctx = cr_ctx;
        int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
        int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
@ -536,7 +536,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
  int cu_width = LCU_WIDTH >> depth;
  double cost = MAX_DOUBLE;
  double inter_zero_coeff_cost = MAX_DOUBLE;
-  uint32_t inter_bitcost = MAX_INT;
+  double inter_bitcost = MAX_INT;
  cu_info_t *cur_cu;
  cabac_data_t pre_search_cabac;
  memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
@ -600,7 +600,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,

    if (can_use_inter) {
      double mode_cost;
-      uint32_t mode_bitcost;
+      double mode_bitcost;
      kvz_search_cu_inter(state,
                          x, y,
                          depth,
@ -760,7 +760,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
    }
    else if(depth == MAX_DEPTH && cur_cu->type == CU_INTRA) {
      // Add cost of intra part_size.
-      const cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
+      cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
      bits += CTX_ENTROPY_FBITS(ctx, 1);  // NxN
      state->search_cabac.cur_ctx = ctx;
      FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
@ -835,7 +835,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
    if (depth < MAX_DEPTH) {
      // Add cost of cu_split_flag.
      uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
-      const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
+      cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
      split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
      state->search_cabac.cur_ctx = ctx;
      FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
@ -844,7 +844,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,

    if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
      // Add cost of intra part_size.
-      const cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]);
+      cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]);
      split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;  // NxN
      state->search_cabac.cur_ctx = ctx;
      FILE_BITS(CTX_ENTROPY_FBITS(ctx, 0), x, y, depth, "split");
@ -893,7 +893,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
        }
        else if (depth == MAX_DEPTH && cur_cu->type == CU_INTRA) {
          // Add cost of intra part_size.
-          const cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
+          cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
          bits += CTX_ENTROPY_FBITS(ctx, 1);  // NxN
          state->search_cabac.cur_ctx = ctx;
          FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
--- a/src/search.h
+++ b/src/search.h
@ -59,7 +59,7 @@ typedef struct unit_stats_map_t {

  cu_info_t unit[MAX_UNIT_STATS_MAP_SIZE]; //!< list of searched units
  double    cost[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching RD costs
-  uint32_t  bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs  
+  double    bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs  
  int8_t    keys[MAX_UNIT_STATS_MAP_SIZE]; //!< list of keys (indices) to elements in the other arrays
  int       size;                    //!< number of active elements in the lists
 } unit_stats_map_t;
--- a/src/search_inter.c
+++ b/src/search_inter.c
@ -199,15 +199,15 @@ static INLINE bool intmv_within_tile(const inter_search_info_t *info, int x, int
 * \return true if best_mv was changed, false otherwise
 */
 static bool check_mv_cost(inter_search_info_t *info,
-  int x,
-  int y,
-  double *best_cost,
-  uint32_t *best_bits,
-  vector2d_t *best_mv)
+                          int x,
+                          int y,
+                          double *best_cost,
+                          double* best_bits,
+                          vector2d_t *best_mv)
 {
  if (!intmv_within_tile(info, x, y)) return false;

-  uint32_t bitcost = 0;
+  double bitcost = 0;
  double cost = kvz_image_calc_sad(
      info->pic,
      info->ref,
@ -292,10 +292,10 @@ static bool mv_in_merge(const inter_search_info_t *info, vector2d_t mv)
 * best_mv to the best one.
 */
 static void select_starting_point(inter_search_info_t *info,
-  vector2d_t extra_mv,
-  double *best_cost,
-  uint32_t *best_bits,
-  vector2d_t *best_mv)
+                                  vector2d_t extra_mv,
+                                  double *best_cost,
+                                  double* best_bits,
+                                  vector2d_t *best_mv)
 {
  // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list.
  check_mv_cost(info, 0, 0, best_cost, best_bits, best_mv);
@ -394,9 +394,9 @@ static double calc_mvd_cost(const encoder_state_t *state,
                            inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
                            int16_t num_cand,
                            int32_t ref_idx,
-                            uint32_t *bitcost)
+                            double* bitcost)
 {
-  uint32_t temp_bitcost = 0;
+  double temp_bitcost = 0;
  uint32_t merge_idx;
  int8_t merged      = 0;

@ -429,9 +429,9 @@ static double calc_mvd_cost(const encoder_state_t *state,


 static bool early_terminate(inter_search_info_t *info,
-  double *best_cost,
-  uint32_t *best_bits,
-  vector2d_t *best_mv)
+                            double *best_cost,
+                            double* best_bits,
+                            vector2d_t *best_mv)
 {
  static const vector2d_t small_hexbs[7] = {
      { 0, -1 }, { -1, 0 }, { 0, 1 }, { 1, 0 },
@ -485,7 +485,7 @@ void kvz_tz_pattern_search(inter_search_info_t *info,
                           vector2d_t mv,
                           int *best_dist,
                           double *best_cost,
-                           uint32_t *best_bits,
+                           double* best_bits,
                           vector2d_t *best_mv)
 {
  assert(pattern_type < 4);
@ -603,7 +603,7 @@ void kvz_tz_raster_search(inter_search_info_t *info,
                          int iSearchRange,
                          int iRaster,
                          double *best_cost,
-                          uint32_t *best_bits,
+                          double* best_bits,
                          vector2d_t *best_mv)
 {
  const vector2d_t mv = { best_mv->x >> 2, best_mv->y >> 2 };
@ -618,10 +618,10 @@ void kvz_tz_raster_search(inter_search_info_t *info,


 static void tz_search(inter_search_info_t *info,
-  vector2d_t extra_mv,
-  double *best_cost,
-  uint32_t *best_bits,
-  vector2d_t *best_mv)
+                      vector2d_t extra_mv,
+                      double *best_cost,
+                      double* best_bits,
+                      vector2d_t *best_mv)
 {
  //TZ parameters
  const int iSearchRange = 96;  // search range for each stage
@ -705,11 +705,11 @@ static void tz_search(inter_search_info_t *info,
 * points like 0,0 might be used, such as vectors from top or left.
 */
 static void hexagon_search(inter_search_info_t *info,
-  vector2d_t extra_mv,
-  uint32_t steps,
-  double *best_cost,
-  uint32_t *best_bits,
-  vector2d_t *best_mv)
+                           vector2d_t extra_mv,
+                           uint32_t steps,
+                           double *best_cost,
+                           double* best_bits,
+                           vector2d_t *best_mv)
 {
  // The start of the hexagonal pattern has been repeated at the end so that
  // the indices between 1-6 can be used as the start of a 3-point list of new
@ -803,11 +803,11 @@ static void hexagon_search(inter_search_info_t *info,
 * points like 0,0 might be used, such as vectors from top or left.
 **/
 static void diamond_search(inter_search_info_t *info,
-  vector2d_t extra_mv,
-  uint32_t steps,
-  double *best_cost,
-  uint32_t *best_bits,
-  vector2d_t *best_mv)
+                           vector2d_t extra_mv,
+                           uint32_t steps,
+                           double *best_cost,
+                           double* best_bits,
+                           vector2d_t *best_mv)
 {
  enum diapos {
    DIA_UP = 0,
@ -888,7 +888,7 @@ static void search_mv_full(inter_search_info_t *info,
                           int32_t search_range,
                           vector2d_t extra_mv,
                           double *best_cost,
-                           uint32_t *best_bits,
+                           double* best_bits,
                           vector2d_t *best_mv)
 {
  // Search around the 0-vector.
@ -968,7 +968,7 @@ static void search_mv_full(inter_search_info_t *info,
 */
 static void search_frac(inter_search_info_t *info,
                        double *best_cost,
-                        uint32_t *best_bits,
+                        double *best_bits,
                        vector2d_t *best_mv)
 {
  // Map indexes to relative coordinates in the following way:
@ -985,8 +985,8 @@ static void search_frac(inter_search_info_t *info,
  vector2d_t mv = { best_mv->x >> 2, best_mv->y >> 2 };

  double cost = MAX_DOUBLE;
-  uint32_t bitcost = 0;
-  uint32_t bitcosts[4] = { 0 };
+  double bitcost = 0;
+  double bitcosts[4] = { 0 };
  unsigned best_index = 0;

 // Keep this as unsigned until SAD / SATD functions are updated
@ -1314,7 +1314,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
  }

  double best_cost = MAX_DOUBLE;
-  uint32_t best_bits = MAX_INT;
+  double best_bits = MAX_INT;

  // Select starting point from among merge candidates. These should
  // include both mv_cand vectors and (0, 0).
@ -1338,12 +1338,12 @@ static void search_pu_inter_ref(inter_search_info_t *info,

      case KVZ_IME_DIA:
        diamond_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps,
-          &best_cost, &best_bits, &best_mv);
+                       &best_cost, &best_bits, &best_mv);
        break;

      default:
        hexagon_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps,
-          &best_cost, &best_bits, &best_mv);
+                       &best_cost, &best_bits, &best_mv);
        break;
    }
  }
@ -1484,7 +1484,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
    double cost =
      kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, frame->source->width);

-    uint32_t bitcost[2] = { 0, 0 };
+    double bitcost[2] = { 0, 0 };

    cost += info->mvd_cost_func(info->state,
                               merge_cand[i].mv[0][0],
@ -1827,7 +1827,7 @@ static void search_pu_inter(encoder_state_t * const state,
          list);

        double     frac_cost = MAX_DOUBLE;
-        uint32_t   frac_bits = MAX_INT;
+        double   frac_bits = MAX_INT;
        vector2d_t frac_mv = { unipred_pu->inter.mv[list][0], unipred_pu->inter.mv[list][1] };

        search_frac(info, &frac_cost, &frac_bits, &frac_mv);
@ -1917,7 +1917,7 @@ static void search_pu_inter(encoder_state_t * const state,
      best_bipred_cost =
        kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH);

-      uint32_t bitcost[2] = { 0, 0 };
+      double bitcost[2] = { 0, 0 };

      best_bipred_cost += info->mvd_cost_func(info->state,
        bipred_pu->inter.mv[0][0],
@ -1990,10 +1990,10 @@ static void search_pu_inter(encoder_state_t * const state,
 * \param inter_bitcost Return inter bitcost
 */
 void kvz_cu_cost_inter_rd2(encoder_state_t * const state,
-  int x, int y, int depth,
-  lcu_t *lcu,
-  double   *inter_cost,
-  uint32_t *inter_bitcost){
+                           int x, int y, int depth,
+                           lcu_t *lcu,
+                           double   *inter_cost,
+                           double* inter_bitcost){

  cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
  int tr_depth = MAX(1, depth);
@ -2040,7 +2040,7 @@ void kvz_search_cu_inter(encoder_state_t * const state,
                         int x, int y, int depth,
                         lcu_t *lcu,
                         double   *inter_cost,
-                         uint32_t *inter_bitcost)
+                         double* inter_bitcost)
 {
  *inter_cost = MAX_DOUBLE;
  *inter_bitcost = MAX_INT;
@ -2108,10 +2108,10 @@ void kvz_search_cu_inter(encoder_state_t * const state,
  // Calculate more accurate cost when needed
  if (state->encoder_control->cfg.rdo >= 2) {
    kvz_cu_cost_inter_rd2(state,
-      x, y, depth,
-      lcu,
-      inter_cost,
-      inter_bitcost);
+                          x, y, depth,
+                          lcu,
+                          inter_cost,
+                          inter_bitcost);
  }

  if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) {
@ -2146,7 +2146,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
                       part_mode_t part_mode,
                       lcu_t *lcu,
                       double *inter_cost,
-                       uint32_t *inter_bitcost)
+                       double* inter_bitcost)
 {
  *inter_cost = MAX_DOUBLE;
  *inter_bitcost = MAX_INT;
@ -2173,7 +2173,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
    const int height_pu = PU_GET_H(part_mode, width, i);

    double cost      = MAX_DOUBLE;
-    uint32_t bitcost = MAX_INT;
+    double bitcost = MAX_INT;

    search_pu_inter(state, x, y, depth, part_mode, i, lcu, amvp, &merge, &info);

@ -2250,10 +2250,10 @@ void kvz_search_cu_smp(encoder_state_t * const state,
  // Calculate more accurate cost when needed
  if (state->encoder_control->cfg.rdo >= 2) {
    kvz_cu_cost_inter_rd2(state,
-      x, y, depth,
-      lcu,
-      inter_cost,
-      inter_bitcost);
+                          x, y, depth,
+                          lcu,
+                          inter_cost,
+                          inter_bitcost);
  } else {
    *inter_cost += state->lambda_sqrt * smp_extra_bits;
  }
--- a/src/search_inter.h
+++ b/src/search_inter.h
@ -71,13 +71,13 @@ typedef double kvz_mvd_cost_func(const encoder_state_t *state,
                                  inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
                                  int16_t num_cand,
                                  int32_t ref_idx,
-                                  uint32_t *bitcost);
+                                  double *bitcost);

 void kvz_search_cu_inter(encoder_state_t * const state,
                         int x, int y, int depth,
                         lcu_t *lcu,
                         double *inter_cost,
-                         uint32_t *inter_bitcost);
+                         double* inter_bitcost);

 void kvz_search_cu_smp(encoder_state_t * const state,
                       int x, int y,
@ -85,7 +85,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
                       part_mode_t part_mode,
                       lcu_t *lcu,
                       double *inter_cost,
-                       uint32_t *inter_bitcost);
+                       double* inter_bitcost);


 unsigned kvz_inter_satd_cost(const encoder_state_t* state,