Combine tile mv constraint with OWF mv constraint

This also fixes movement vectors in tiles when OWF is on. The OWF mv constraint assumed WPP, so it didn't work with tiles.
2024-11-24 02:24:07 +00:00 · 2016-02-24 18:10:32 +02:00 · 2016-02-24 18:10:32 +02:00 · f4ebff12b0
parent 7981609cd0
commit f4ebff12b0
1 changed files with 55 additions and 46 deletions
--- a/src/search_inter.c
+++ b/src/search_inter.c
@ -28,20 +28,21 @@
 #include "rdo.h"


-
 /**
 * \return  True if referred block is within current tile.
 */
-static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width)
+static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int wpp_limit)
 {
-  if (state->encoder_control->cfg->mv_constraint != KVZ_MV_CONSTRAIN_FRAME_AND_TILE) return true;
+  if (state->encoder_control->cfg->mv_constraint != KVZ_MV_CONSTRAIN_FRAME_AND_TILE) {
+    return (wpp_limit == -1 || y + (width << 2) <= (wpp_limit << 2));
+  };

  // TODO implement KVZ_MV_CONSTRAIN_FRAM and KVZ_MV_CONSTRAIN_TILE.
-
-  vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y };
+  const vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y };

  if (abs_mv.x >= 0 && abs_mv.x + (width << 2) <= (state->tile->frame->width << 2) &&
-      abs_mv.y >= 0 && abs_mv.y + (width << 2) <= (state->tile->frame->height << 2))
+      abs_mv.y >= 0 && abs_mv.y + (width << 2) <= (state->tile->frame->height << 2) &&
+      (wpp_limit == -1 || y + (width << 2) <= (wpp_limit << 2)))
  {
    return true;
  } else {
@ -53,9 +54,9 @@ static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector
 /**
 * \return  True if referred block is within current tile.
 */
-static INLINE bool intmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width)
+static INLINE bool intmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int wpp_limit)
 {
-  return fracmv_within_tile(state, orig, x << 2, y << 2, width);
+  return fracmv_within_tile(state, orig, x << 2, y << 2, width, wpp_limit);
 }


@ -156,7 +157,7 @@ static int calc_mvd_cost(const encoder_state_t * const state, int x, int y, int
 unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref, unsigned pattern_type,
                           const vector2d_t *orig, const int iDist, vector2d_t *mv, unsigned best_cost, int *best_dist,
                           int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
-                           int width, int height, int max_px_below_lcu)
+                           int width, int height, int wpp_limit)
 {
  int n_points;
  int best_index = -1;
@ -274,7 +275,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
  for (i = 0; i < n_points; i++)
  {
    vector2d_t *current = &pattern[pattern_type][i];
-    if (!intmv_within_tile(state, orig, mv->x + current->x, mv->y + current->y, width)) {
+    if (!intmv_within_tile(state, orig, mv->x + current->x, mv->y + current->y, width, wpp_limit)) {
      continue;
    }

@ -286,7 +287,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
      cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
                            (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
-                            width, height, max_px_below_lcu);
+                            width, height, -1);
      cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

      PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
@ -322,7 +323,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
 unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref,
                          const vector2d_t *orig, vector2d_t *mv, unsigned best_cost,
                          int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
-                          int width, int height, int iSearchRange, int iRaster, int max_px_below_lcu)
+                          int width, int height, int iSearchRange, int iRaster, int wpp_limit)
 {
  int i;
  int k;
@ -342,7 +343,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
    for (k = -iSearchRange; k <= iSearchRange; k += iRaster)
    {
      vector2d_t current = { k, i };
-      if (!intmv_within_tile(state, orig, mv->x + current.x, mv->y + current.y, width)) {
+      if (!intmv_within_tile(state, orig, mv->x + current.x, mv->y + current.y, width, wpp_limit)) {
        continue;
      }

@ -354,7 +355,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
        cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
          (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
          (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
-          width, height, max_px_below_lcu);
+          width, height, -1);
        cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

        PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
@ -405,7 +406,7 @@ static unsigned tz_search(const encoder_state_t * const state,
  int iDist;
  int best_dist = 0;
  unsigned best_index = num_cand;
-  int max_px_below_lcu = -1;
+  int wpp_limit = -1;

  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
@ -415,28 +416,28 @@ static unsigned tz_search(const encoder_state_t * const state,
  }

  if (state->encoder_control->owf) {
-    max_px_below_lcu = LCU_WIDTH;
+    wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
    if (state->encoder_control->fme_level > 0) {
      // Fractional motion estimation can change the mv by at most 1 pixel.
-      max_px_below_lcu -= 1;
+      wpp_limit -= 1;
    }
    if (state->encoder_control->deblock_enable) {
      // Strong deblock filter modifies 3 pixels.
-      max_px_below_lcu -= 3;
+      wpp_limit -= 3;
    }
  }

  //step 1, compare (0,0) vector to predicted vectors
  
  // Check whatever input vector we got, unless its (0, 0) which will be checked later.
-  if ((mv.x || mv.y) && intmv_within_tile(state, orig, mv.x, mv.y, width))
+  if ((mv.x || mv.y) && intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit))
  {
    PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);

    best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                                        (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                        (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
-                                        width, height, max_px_below_lcu);
+                                        width, height, -1);
    best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);

    PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
@ -454,7 +455,7 @@ static unsigned tz_search(const encoder_state_t * const state,
    if (merge_cand[i].dir == 3) continue;
    mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
    mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
-    if (!intmv_within_tile(state, orig, mv.x, mv.y, width)) {
+    if (!intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit)) {
      continue;
    }

@ -464,7 +465,7 @@ static unsigned tz_search(const encoder_state_t * const state,
    unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                                   (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                   (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
-                                   width, height, max_px_below_lcu);
+                                   width, height, -1);
    cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

    PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
@ -492,7 +493,7 @@ static unsigned tz_search(const encoder_state_t * const state,
  for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
  {
    best_cost = kvz_tz_pattern_search(state, pic, ref, step2_type, orig, iDist, &mv, best_cost, &best_dist,
-                                  mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, max_px_below_lcu);
+                                  mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
  }

  //step 3, raster scan
@ -501,7 +502,7 @@ static unsigned tz_search(const encoder_state_t * const state,
    best_dist = iRaster;

    best_cost = kvz_tz_raster_search(state, pic, ref, orig, &mv, best_cost, mv_cand, merge_cand,
-                                 num_cand, ref_idx, &best_bitcost, width, height, iSearchRange, iRaster, max_px_below_lcu);
+                                 num_cand, ref_idx, &best_bitcost, width, height, iSearchRange, iRaster, wpp_limit);
  }

  //step 4
@ -513,7 +514,7 @@ static unsigned tz_search(const encoder_state_t * const state,
    while (iDist > 0)
    {
      best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
-                                   mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, max_px_below_lcu);
+                                   mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);

      iDist = iDist >> 1;
    }
@ -525,7 +526,7 @@ static unsigned tz_search(const encoder_state_t * const state,
    for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
    {
      best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
-                                   mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, max_px_below_lcu);
+                                   mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
    }
  }

@ -594,7 +595,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
  uint32_t best_bitcost = 0, bitcost;
  unsigned i;
  unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
-  int max_px_below_lcu = -1;
+  int wpp_limit = -1;

  int (*calc_mvd)(const encoder_state_t * const, int, int, int,
    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
@ -603,16 +604,15 @@ static unsigned hexagon_search(const encoder_state_t * const state,
    calc_mvd = kvz_calc_mvd_cost_cabac;
  }

-  
  if (state->encoder_control->owf) {
-    max_px_below_lcu = LCU_WIDTH;
+    wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
    if (state->encoder_control->fme_level > 0) {
      // Fractional motion estimation can change the mv by at most 1 pixel.
-      max_px_below_lcu -= 1;
+      wpp_limit -= 1;
    }
    if (state->encoder_control->deblock_enable) {
      // Strong deblock filter modifies 3 pixels.
-      max_px_below_lcu -= 3;
+      wpp_limit -= 3;
    }
  }

@ -628,14 +628,14 @@ static unsigned hexagon_search(const encoder_state_t * const state,
  }

  if (!mv_in_merge_cand &&
-      intmv_within_tile(state, orig, mv.x, mv.y, width))
+    intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit))
  {
    PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);

    best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                                        (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                        (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
-                                        width, height, max_px_below_lcu);
+                                        width, height, -1);
    best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
    best_bitcost = bitcost;
    best_index = num_cand; 
@ -653,7 +653,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
    if (merge_cand[i].dir == 3) continue;
    mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
    mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
-    if (!intmv_within_tile(state, orig, mv.x, mv.y, width)) {
+    if (!intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit)) {
      continue;
    }

@ -662,7 +662,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
    unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                                   (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                   (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
-                                   width, height, max_px_below_lcu);
+                                   width, height, -1);
    cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

    PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
@ -689,7 +689,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
  best_index = 0;
  for (i = 0; i < 7; ++i) {
    const vector2d_t *pattern = &large_hexbs[i];
-    if (!intmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width)) {
+    if (!intmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, wpp_limit)) {
      continue;
    }

@ -699,7 +699,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
      cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, 
                             (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
-                             width, height, max_px_below_lcu);
+                             width, height, -1);
      cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);

      PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
@ -736,7 +736,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
    // Iterate through the next 3 points.
    for (i = 0; i < 3; ++i) {
      const vector2d_t *offset = &large_hexbs[start + i];
-      if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width)) {
+      if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, wpp_limit)) {
        continue;
      }

@ -746,7 +746,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
        cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                               (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
                               (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
-                               width, height, max_px_below_lcu);
+                               width, height, -1);
        cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
        PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
              (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, 
@ -772,7 +772,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
  // Do the final step of the search with a small pattern.
  for (i = 1; i < 5; ++i) {
    const vector2d_t *offset = &small_hexbs[i];
-    if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width)) {
+    if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, wpp_limit)) {
      continue;
    }

@ -782,7 +782,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
      cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
                             (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
-                             width, height, max_px_below_lcu);
+                             width, height, -1);
      cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
      PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
            (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, 
@ -906,6 +906,15 @@ static unsigned search_frac(const encoder_state_t * const state,
      { -1, -1 }, { 0, -1 }, { 1, -1 }
  };

+  int wpp_limit = -1;
+  if (state->encoder_control->owf) {
+    wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
+    if (state->encoder_control->deblock_enable) {
+      // Strong deblock filter modifies 3 pixels.
+      wpp_limit -= 3;
+    }
+  }
+
  //Set mv to halfpel precision
  vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
  unsigned best_cost = UINT32_MAX;
@ -955,7 +964,7 @@ static unsigned search_frac(const encoder_state_t * const state,
  // Search halfpel positions around best integer mv
  for (i = 0; i < 9; ++i) {
    const vector2d_t *pattern = &square[i];
-    if (!fracmv_within_tile(state, orig, (mv.x + pattern->x) << 1, (mv.y + pattern->y) << 1, width)) {
+    if (!fracmv_within_tile(state, orig, (mv.x + pattern->x) << 1, (mv.y + pattern->y) << 1, width, wpp_limit)) {
      continue;
    }

@ -995,7 +1004,7 @@ static unsigned search_frac(const encoder_state_t * const state,
  //Search quarterpel points around best halfpel mv
  for (i = 0; i < 9; ++i) {
    const vector2d_t *pattern = &square[i];
-    if (!fracmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width)) {
+    if (!fracmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, wpp_limit)) {
      continue;
    }

@ -1132,8 +1141,8 @@ static void search_pu_inter_ref(const encoder_state_t * const state,
      break;
  }

-  assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width));
-
+  assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width, -1));
+  
  if (state->encoder_control->cfg->fme_level > 0) {
    temp_cost = search_frac(state,
                            width, height,
@ -1146,7 +1155,7 @@ static void search_pu_inter_ref(const encoder_state_t * const state,
                            num_cand,
                            ref_idx,
                            &temp_bitcost);
-    assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width));
+    assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width, -1));
  }
  
  merged = 0;