Merge branch 'smp-merge-analysis'

2024-11-27 19:24:06 +00:00 · 2019-09-03 16:47:07 +03:00 · 2019-09-03 16:47:07 +03:00 · 4e94d60552
parent 557bcbc6aa 147378e1f9
commit 4e94d60552
3 changed files with 144 additions and 107 deletions
--- a/src/inter.c
+++ b/src/inter.c
@ -519,48 +519,75 @@ void kvz_inter_recon_cu(const encoder_state_t * const state,
                        bool predict_chroma)
 {
  cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
-
  const int num_pu = kvz_part_mode_num_parts[cu->part_size];
  for (int i = 0; i < num_pu; ++i) {
-    const int pu_x = PU_GET_X(cu->part_size, width, x, i);
-    const int pu_y = PU_GET_Y(cu->part_size, width, y, i);
-    const int pu_w = PU_GET_W(cu->part_size, width, i);
-    const int pu_h = PU_GET_H(cu->part_size, width, i);
+    kvz_inter_pred_pu(state, lcu, x, y, width, predict_luma, predict_chroma, i);
+  }
+}

-    cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
+/**
+ * Predict a single PU.
+ *
+ * The PU may use either uniprediction or biprediction.
+ *
+ * \param state          encoder state
+ * \param lcu            containing LCU
+ * \param x              x-coordinate of the CU in pixels
+ * \param y              y-coordinate of the CU in pixels
+ * \param width          CU width
+ * \param predict_luma   Enable or disable luma prediction for this call.
+ * \param predict_chroma Enable or disable chroma prediction for this call.
+ * \param i_pu           Index of the PU. Always zero for 2Nx2N. Used for SMP+AMP.
+ */
+void kvz_inter_pred_pu(const encoder_state_t * const state,
+                       lcu_t *lcu,
+                       int32_t x,
+                       int32_t y,
+                       int32_t width,
+                       bool predict_luma,
+                       bool predict_chroma,
+                       int i_pu)

-    if (pu->inter.mv_dir == 3) {
-      const kvz_picture *const refs[2] = {
-        state->frame->ref->images[
-          state->frame->ref_LX[0][
-            pu->inter.mv_ref[0]]],
-        state->frame->ref->images[
-          state->frame->ref_LX[1][
-            pu->inter.mv_ref[1]]],
-      };
-      kvz_inter_recon_bipred(state,
-                             refs[0], refs[1],
-                             pu_x, pu_y,
-                             pu_w, pu_h,
-                             pu->inter.mv,
-                             lcu,
-                             predict_luma, predict_chroma);
-    } else {
-      const int mv_idx = pu->inter.mv_dir - 1;
-      const kvz_picture *const ref =
-        state->frame->ref->images[
-          state->frame->ref_LX[mv_idx][
-            pu->inter.mv_ref[mv_idx]]];
+{
+  cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
+  const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
+  const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
+  const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
+  const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
+  cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));

-      inter_recon_unipred(state,
-                          ref,
-                          pu_x, pu_y,
-                          pu_w, pu_h,
-                          pu->inter.mv[mv_idx],
-                          lcu,
-                          NULL,
-                          predict_luma, predict_chroma);
-    }
+  if (pu->inter.mv_dir == 3) {
+    const kvz_picture *const refs[2] = {
+      state->frame->ref->images[
+        state->frame->ref_LX[0][
+          pu->inter.mv_ref[0]]],
+      state->frame->ref->images[
+        state->frame->ref_LX[1][
+          pu->inter.mv_ref[1]]],
+    };
+    kvz_inter_recon_bipred(state,
+      refs[0], refs[1],
+      pu_x, pu_y,
+      pu_w, pu_h,
+      pu->inter.mv,
+      lcu,
+      predict_luma, predict_chroma);
+  }
+  else {
+    const int mv_idx = pu->inter.mv_dir - 1;
+    const kvz_picture *const ref =
+      state->frame->ref->images[
+        state->frame->ref_LX[mv_idx][
+          pu->inter.mv_ref[mv_idx]]];
+
+    inter_recon_unipred(state,
+      ref,
+      pu_x, pu_y,
+      pu_w, pu_h,
+      pu->inter.mv[mv_idx],
+      lcu,
+      NULL,
+      predict_luma, predict_chroma);
  }
 }

--- a/src/inter.h
+++ b/src/inter.h
@ -48,6 +48,15 @@ void kvz_inter_recon_cu(const encoder_state_t * const state,
                        bool predict_luma,
                        bool predict_chroma);

+void kvz_inter_pred_pu(const encoder_state_t * const state,
+  lcu_t *lcu,
+  int32_t x,
+  int32_t y,
+  int32_t width,
+  bool predict_luma,
+  bool predict_chroma,
+  int i_pu);
+
 void kvz_inter_recon_bipred(const encoder_state_t * const state,
                            const kvz_picture * ref1,
                            const kvz_picture * ref2,
--- a/src/search_inter.c
+++ b/src/search_inter.c
@ -1551,87 +1551,88 @@ static void search_pu_inter(encoder_state_t * const state,
    mrg_costs[i] = MAX_DOUBLE;
  }

-  if (cur_cu->part_size == SIZE_2Nx2N) {
+  int num_rdo_cands = 0;

-    int num_rdo_cands = 0;
+  // Check motion vector constraints and perform rough search
+  for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) {

-    // Check motion vector constraints and perform rough search
-    for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) {
+    inter_merge_cand_t *cur_cand = &info.merge_cand[merge_idx];
+    cur_cu->inter.mv_dir = cur_cand->dir;
+    cur_cu->inter.mv_ref[0] = cur_cand->ref[0];
+    cur_cu->inter.mv_ref[1] = cur_cand->ref[1];
+    cur_cu->inter.mv[0][0] = cur_cand->mv[0][0];
+    cur_cu->inter.mv[0][1] = cur_cand->mv[0][1];
+    cur_cu->inter.mv[1][0] = cur_cand->mv[1][0];
+    cur_cu->inter.mv[1][1] = cur_cand->mv[1][1];

-      inter_merge_cand_t *cur_cand = &info.merge_cand[merge_idx];
-      cur_cu->inter.mv_dir = cur_cand->dir;
-      cur_cu->inter.mv_ref[0] = cur_cand->ref[0];
-      cur_cu->inter.mv_ref[1] = cur_cand->ref[1];
-      cur_cu->inter.mv[0][0] = cur_cand->mv[0][0];
-      cur_cu->inter.mv[0][1] = cur_cand->mv[0][1];
-      cur_cu->inter.mv[1][0] = cur_cand->mv[1][0];
-      cur_cu->inter.mv[1][1] = cur_cand->mv[1][1];
+    // If bipred is not enabled, do not try candidates with mv_dir == 3.
+    // Bipred is also forbidden for 4x8 and 8x4 blocks by the standard. 
+    if (cur_cu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
+    if (cur_cu->inter.mv_dir == 3 && !(width + height > 12)) continue;

-      if (cur_cu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
-      bool is_duplicate = merge_candidate_in_list(info.merge_cand, cur_cand,
-        mrg_cands, 
-        num_rdo_cands);
+    bool is_duplicate = merge_candidate_in_list(info.merge_cand, cur_cand,
+      mrg_cands, 
+      num_rdo_cands);

-      // Don't try merge candidates that don't satisfy mv constraints.
-      // Don't add duplicates to list
-      if (!fracmv_within_tile(&info, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]) ||
-          !fracmv_within_tile(&info, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]) ||
-          is_duplicate)
-      {
-        continue;
-      }
-
-      kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
-      mrg_costs[num_rdo_cands] = kvz_satd_any_size(width, height,
-        lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
-        lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
-
-      mrg_cands[num_rdo_cands] = merge_idx;
-      num_rdo_cands++;
+    // Don't try merge candidates that don't satisfy mv constraints.
+    // Don't add duplicates to list
+    if (!fracmv_within_tile(&info, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]) ||
+        !fracmv_within_tile(&info, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]) ||
+        is_duplicate)
+    {
+      continue;
    }

-    // Sort candidates by cost
-    kvz_sort_modes(mrg_cands, mrg_costs, num_rdo_cands);
+    kvz_inter_pred_pu(state, lcu, x_cu, y_cu, width_cu, true, false, i_pu);
+    mrg_costs[num_rdo_cands] = kvz_satd_any_size(width, height,
+      lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
+      lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);

-    // Limit by availability
-    // TODO: Do not limit to just 1
-    num_rdo_cands = MIN(1, num_rdo_cands);
+    mrg_cands[num_rdo_cands] = merge_idx;
+    num_rdo_cands++;
+  }
+
+  // Sort candidates by cost
+  kvz_sort_modes(mrg_cands, mrg_costs, num_rdo_cands);
+
+  // Limit by availability
+  // TODO: Do not limit to just 1
+  num_rdo_cands = MIN(1, num_rdo_cands);
    
-    // Early Skip Mode Decision
-    bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
-    if (cfg->early_skip) {
-      for (int merge_rdo_idx = 0; merge_rdo_idx < num_rdo_cands; ++merge_rdo_idx) {
+  // Early Skip Mode Decision
+  bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
+  if (cfg->early_skip && cur_cu->part_size == SIZE_2Nx2N) {
+    for (int merge_rdo_idx = 0; merge_rdo_idx < num_rdo_cands; ++merge_rdo_idx) {

-        // Reconstruct blocks with merge candidate.
-        // Check luma CBF. Then, check chroma CBFs if luma CBF is not set
-        // and chroma exists.
-        // Early terminate if merge candidate with zero CBF is found.
-        int merge_idx = mrg_cands[merge_rdo_idx];
-        cur_cu->inter.mv_dir = info.merge_cand[merge_idx].dir;
-        cur_cu->inter.mv_ref[0] = info.merge_cand[merge_idx].ref[0];
-        cur_cu->inter.mv_ref[1] = info.merge_cand[merge_idx].ref[1];
-        cur_cu->inter.mv[0][0] = info.merge_cand[merge_idx].mv[0][0];
-        cur_cu->inter.mv[0][1] = info.merge_cand[merge_idx].mv[0][1];
-        cur_cu->inter.mv[1][0] = info.merge_cand[merge_idx].mv[1][0];
-        cur_cu->inter.mv[1][1] = info.merge_cand[merge_idx].mv[1][1];
-        kvz_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth));
-        kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
-        kvz_quantize_lcu_residual(state, true, false, x, y, depth, cur_cu, lcu);
+      // Reconstruct blocks with merge candidate.
+      // Check luma CBF. Then, check chroma CBFs if luma CBF is not set
+      // and chroma exists.
+      // Early terminate if merge candidate with zero CBF is found.
+      int merge_idx = mrg_cands[merge_rdo_idx];
+      cur_cu->inter.mv_dir = info.merge_cand[merge_idx].dir;
+      cur_cu->inter.mv_ref[0] = info.merge_cand[merge_idx].ref[0];
+      cur_cu->inter.mv_ref[1] = info.merge_cand[merge_idx].ref[1];
+      cur_cu->inter.mv[0][0] = info.merge_cand[merge_idx].mv[0][0];
+      cur_cu->inter.mv[0][1] = info.merge_cand[merge_idx].mv[0][1];
+      cur_cu->inter.mv[1][0] = info.merge_cand[merge_idx].mv[1][0];
+      cur_cu->inter.mv[1][1] = info.merge_cand[merge_idx].mv[1][1];
+      kvz_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth));
+      kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
+      kvz_quantize_lcu_residual(state, true, false, x, y, depth, cur_cu, lcu);

-        if (cbf_is_set(cur_cu->cbf, depth, COLOR_Y)) {
-          continue;
-        }
-        else if (has_chroma) {
-          kvz_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
-          kvz_quantize_lcu_residual(state, false, has_chroma, x, y, depth, cur_cu, lcu);
-          if (!cbf_is_set_any(cur_cu->cbf, depth)) {
-            cur_cu->type = CU_INTER;
-            cur_cu->merge_idx = merge_idx;
-            cur_cu->skipped = true;
-            *inter_cost = 0.0;  // TODO: Check this
-            *inter_bitcost = 0; // TODO: Check this
-            return;
-          }
+      if (cbf_is_set(cur_cu->cbf, depth, COLOR_Y)) {
+        continue;
+      }
+      else if (has_chroma) {
+        kvz_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
+        kvz_quantize_lcu_residual(state, false, has_chroma, x, y, depth, cur_cu, lcu);
+        if (!cbf_is_set_any(cur_cu->cbf, depth)) {
+          cur_cu->type = CU_INTER;
+          cur_cu->merge_idx = merge_idx;
+          cur_cu->skipped = true;
+          *inter_cost = 0.0;  // TODO: Check this
+          *inter_bitcost = 0; // TODO: Check this
+          return;
        }
      }
    }