Implemented inter search and reconstruction with lcu structure

2024-11-23 18:14:06 +00:00 · 2014-03-03 14:51:36 +02:00 · 2014-03-03 14:51:36 +02:00 · 1f82239367
parent b5fca8b206
commit 1f82239367
3 changed files with 68 additions and 32 deletions
--- a/src/inter.c
+++ b/src/inter.c
@ -71,15 +71,15 @@ void inter_set_block(picture* pic, uint32_t x_cu, uint32_t y_cu, uint8_t depth,
 * \param ypos block y position
 * \param width block width
 * \param mv[2] motion vector
- * \param dst destination picture
+ * \param lcu destination lcu
 * \returns Void
 */
-void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], picture *dst)
+void inter_recon_lcu(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu)
 {
  int x,y,coord_x,coord_y;
  int16_t mv[2] = { mv_param[0], mv_param[1] };

-  int32_t dst_width_c = dst->width>>1; //!< Destination picture width in chroma pixels
+  int32_t dst_width_c = LCU_WIDTH>>1; //!< Destination picture width in chroma pixels
  int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels

  // negative overflow flag
@ -146,8 +146,10 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
    // Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x
    for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) {
      for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) {
-        dst->u_recdata[y*dst_width_c + x] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
-        dst->v_recdata[y*dst_width_c + x] = (uint8_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
+        int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
+        int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
+        lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
+        lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (uint8_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
      }
    }
  }
@ -157,6 +159,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
    // Copy Luma with boundary checking
    for (y = ypos; y < ypos + width; y++) {
      for (x = xpos; x < xpos + width; x++) {
+        int x_in_lcu = (x & ((LCU_WIDTH)-1));
+        int y_in_lcu = (y & ((LCU_WIDTH)-1));
+
        coord_x = x + mv[0];
        coord_y = y + mv[1];
        overflow_neg_x = (coord_x < 0)?1:0;
@ -178,9 +183,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
        } else if (overflow_pos_y) {
          coord_y = ref->height - 1;
        }
-
+        
        // set destination to (corrected) pixel value from the reference
-        dst->y_recdata[y * dst->width + x] = ref->y_recdata[coord_y*ref->width + coord_x];
+        lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y*ref->width + coord_x];
      }
    }

@ -189,6 +194,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
      // TODO: chroma fractional pixel interpolation
      for (y = ypos>>1; y < (ypos + width)>>1; y++) {
        for (x = xpos>>1; x < (xpos + width)>>1; x++) {
+          int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
+          int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
+
          coord_x = x + (mv[0]>>1);
          coord_y = y + (mv[1]>>1);

@ -213,8 +221,8 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
          }

          // set destinations to (corrected) pixel value from the reference
-          dst->u_recdata[y*dst_width_c + x] = ref->u_recdata[coord_y*ref_width_c + coord_x];
-          dst->v_recdata[y*dst_width_c + x] = ref->v_recdata[coord_y*ref_width_c + coord_x];
+          lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y*ref_width_c + coord_x];
+          lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y*ref_width_c + coord_x];

        }
      }
@ -222,9 +230,12 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
  } else { //If no overflow, we can copy without checking boundaries
    // Copy Luma
    for (y = ypos; y < ypos + width; y++) {
+      int y_in_lcu = (y & ((LCU_WIDTH)-1));
      coord_y = (y + mv[1]) * ref->width; // pre-calculate
      for (x = xpos; x < xpos + width; x++) {
-        dst->y_recdata[y * dst->width + x] = ref->y_recdata[coord_y + x + mv[0]];
+        int x_in_lcu = (x & ((LCU_WIDTH)-1));
+        
+        lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + x + mv[0]];
      }
    }

@ -232,10 +243,12 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
      // Copy Chroma
      // TODO: chroma fractional pixel interpolation
      for (y = ypos>>1; y < (ypos + width)>>1; y++) {
+        int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
        coord_y = (y + (mv[1]>>1)) * ref_width_c; // pre-calculate
        for (x = xpos>>1; x < (xpos + width)>>1; x++) {
-          dst->u_recdata[y*dst_width_c + x] = ref->u_recdata[coord_y + x + (mv[0]>>1)];
-          dst->v_recdata[y*dst_width_c + x] = ref->v_recdata[coord_y + x + (mv[0]>>1)];
+          int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
+          lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + x + (mv[0]>>1)];
+          lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + x + (mv[0]>>1)];
        }
      }
    }
--- a/src/inter.h
+++ b/src/inter.h
@ -31,7 +31,7 @@


 void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu);
-void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv[2], picture* dst);
+void inter_recon_lcu(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu);

 void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth,
                                        cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1);
--- a/src/search.c
+++ b/src/search.c
@ -279,22 +279,33 @@ static unsigned search_mv_full(unsigned depth,
 }
 #endif

-static void search_inter(encoder_control *encoder, uint16_t x_ctb,
-                         uint16_t y_ctb, uint8_t depth)
+
+
+/**
+ * Update lcu to have best modes at this depth.
+ * \return Cost of best mode.
+ */
+static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lcu_t *lcu)
 {
  picture *cur_pic = encoder->in.cur_pic;
  int32_t ref_idx = 0;
-  cu_info *cur_cu = &cur_pic->cu_array[depth][x_ctb + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)];
+  int x_local = (x&0x3f), y_local = (y&0x3f);
+  int x_cu = x>>3;
+  int y_cu = y>>3;
+  int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH;
+
+  cu_info *cur_cu = &lcu->cu[cu_pos];
+
  cur_cu->inter.cost = UINT_MAX;

  for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) {
    picture *ref_pic = encoder->ref->pics[ref_idx];
    unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
-    cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_ctb * width_in_scu + x_ctb];
+    cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu];
    uint32_t temp_cost = (int)(g_lambda_cost[encoder->QP] * ref_idx);
    vector2d orig, mv;
-    orig.x = x_ctb * CU_MIN_SIZE_PIXELS;
-    orig.y = y_ctb * CU_MIN_SIZE_PIXELS;
+    orig.x = x_cu * CU_MIN_SIZE_PIXELS;
+    orig.y = y_cu * CU_MIN_SIZE_PIXELS;
    mv.x = 0;
    mv.y = 0;
    if (ref_cu->type == CU_INTER) {
@ -303,7 +314,7 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
    }

  #if SEARCH_MV_FULL_RADIUS
-    cur_cu->inter.cost = search_mv_full(depth, cur_pic, ref_pic, &orig, &mv);
+    temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv);
  #else
    temp_cost += hexagon_search(depth, cur_pic, ref_pic, &orig, &mv);
  #endif
@ -316,9 +327,9 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
    }
  }

+  return cur_cu->inter.cost;
 }

-
 /**
 * Copy all non-reference CU data from depth+1 to depth.
 */
@ -453,15 +464,25 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
  }
 }

-
-/**
- * Update lcu to have best modes at this depth.
- * \return Cost of best mode.
- */
-static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lcu_t lcu)
+static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cur_cu)
 {
-  int cost = MAX_INT;
-  return cost;
+  const int width_cu = LCU_CU_WIDTH >> depth;
+  const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
+  const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
+  cu_info *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
+  int x, y;
+
+  // Set mode in every CU covered by part_mode in this depth.
+  for (y = y_cu; y < y_cu + width_cu; ++y) {
+    for (x = x_cu; x < x_cu + width_cu; ++x) {
+      cu_info *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
+      cu->depth = depth;
+      cu->type = CU_INTER;
+      cu->merged = cur_cu->merged;
+      cu->skipped = cur_cu->skipped;
+      memcpy(&cu->inter, &cur_cu->inter, sizeof(cu_info_inter));
+    }
+  }
 }


@ -575,6 +596,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo

  cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
  // Assign correct depth
+  cur_cu->skipped = 0; cur_cu->merged = 0;
  cur_cu->depth = depth; cur_cu->tr_depth = depth;
  cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N;
  // If the CU is completely inside the frame at this depth, search for
@ -588,8 +610,8 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
        depth >= MIN_INTER_SEARCH_DEPTH &&
        depth <= MAX_INTER_SEARCH_DEPTH)
    {
-      int mode_cost = search_cu_inter(encoder, x, y, depth, work_tree[depth]);
-      if (mode_cost < cost) {
+      int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]);
+      if (0&&mode_cost < cost) {
        cost = mode_cost;
        cur_cu->type = CU_INTER;
      }
@ -611,7 +633,8 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
      lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size);
      intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height);
    } else if (cur_cu->type == CU_INTER) {
-      // TODO
+      lcu_set_inter(&work_tree[depth], x, y, depth, cur_cu);            
+      inter_recon_lcu(encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
    }
  }