Enable usage of MV RDO with a config parameter (in hexbs, tz, frac, bipred)

2024-12-18 03:04:06 +00:00 · 2015-11-05 12:24:03 +02:00 · 2015-11-05 12:24:03 +02:00 · 4e7e9eefbf
parent 9a535e1c56
commit 4e7e9eefbf
2 changed files with 61 additions and 13 deletions
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -175,6 +175,8 @@ typedef struct kvz_config
  kvz_gop_config gop[KVZ_MAX_GOP_LENGTH];  /*!< \brief Array of GOP settings */

  int32_t target_bitrate;
+
+  int8_t mv_rdo;            /*!< \brief MV RDO calculation in search (0: estimation, 1: RDO). */
 } kvz_config;

 /**
--- a/src/search_inter.c
+++ b/src/search_inter.c
@ -141,6 +141,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
  
  vector2d_t mv_best = { 0, 0 };

+
+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
  assert(pattern_type < 4);

  //implemented search patterns
@ -252,7 +260,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
                            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
                            (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
                            block_width, block_width, max_lcu_below);
-      cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+      cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

      PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
        (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
@ -293,6 +301,13 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
  int k;

  vector2d_t mv_best = { 0, 0 };
+
+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
  
  //compute SAD values for every point in the iRaster downsampled version of the current search area
  for (i = iSearchRange; i >= -iSearchRange; i -= iRaster)
@ -309,7 +324,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
          (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
          (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
          block_width, block_width, max_lcu_below);
-        cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+        cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

        PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
          (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
@ -362,6 +377,13 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
  unsigned best_index = num_cand;
  int max_lcu_below = -1;

+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
  if (state->encoder_control->owf) {
    max_lcu_below = 1;
  }
@ -377,7 +399,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
                                        (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                        (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                        block_width, block_width, max_lcu_below);
-    best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
+    best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);

    PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -402,7 +424,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
                                   (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                   (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                   block_width, block_width, max_lcu_below);
-    cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

    PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -531,6 +553,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
  unsigned i;
  unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
  int max_lcu_below = -1;
+
+  int (*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
  
  if (state->encoder_control->owf) {
    max_lcu_below = 1;
@ -554,7 +584,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                                        (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                        (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                        block_width, block_width, max_lcu_below);
-    best_cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
    best_bitcost = bitcost;
    best_index = num_cand; 

@ -578,7 +608,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                                   (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                   (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                   block_width, block_width, max_lcu_below);
-    cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

    PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -611,7 +641,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, 
                             (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
                             block_width, block_width, max_lcu_below);
-      cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+      cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

      PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                              (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, 
@ -654,7 +684,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                               (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
                               (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
                               block_width, block_width, max_lcu_below);
-        cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+        cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
        PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
              (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, 
              (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, 
@ -686,7 +716,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
                             (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
                             block_width, block_width, max_lcu_below);
-      cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+      cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
      PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, 
            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, 
@ -823,6 +853,13 @@ static unsigned search_frac(const encoder_state_t * const state,
  kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
  kvz_pixel* dst_off = &dst[dst_stride*4+4];

+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
  kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1,
                state->tile->lcu_offset_x * LCU_WIDTH,
                state->tile->lcu_offset_y * LCU_WIDTH,
@ -856,7 +893,7 @@ static unsigned search_frac(const encoder_state_t * const state,

    cost = satd(tmp_pic,tmp_filtered);

-    cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

    if (cost < best_cost) {
      best_cost    = cost;
@ -892,7 +929,7 @@ static unsigned search_frac(const encoder_state_t * const state,

    cost = satd(tmp_pic,tmp_filtered);

-    cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

    if (cost < best_cost) {
      best_cost    = cost;
@ -1060,6 +1097,15 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
    static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
    static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
    uint8_t cutoff = num_cand;
+
+
+    int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+      int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+      int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+    if (state->encoder_control->cfg->mv_rdo) {
+      calc_mvd = kvz_calc_mvd_cost_cabac;
+    }
+
    for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) {
      uint8_t i = priorityList0[idx];
      uint8_t j = priorityList1[idx];
@ -1110,8 +1156,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d

          cost = satd(tmp_pic, tmp_block);

-          cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
-          cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
+          cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
+          cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);

          if (cost < cur_cu->inter.cost) {