Try and get NxN to work again.

- Moved NxN search to be done on the same level as other searches, as it's really not any different from 2Nx2N. - Produces working bitstream but reconstruction is different.
2024-11-27 19:24:06 +00:00 · 2014-03-11 19:19:20 +02:00 · 2014-03-11 19:19:20 +02:00 · 409b094acf
parent d2d877933a
commit 409b094acf
4 changed files with 64 additions and 127 deletions
--- a/src/encoder.c
+++ b/src/encoder.c
@ -1944,7 +1944,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8

    // Check for non-zero coeffs
    cbf_y = 0;
-    memset(cur_cu->coeff_top_y, 0, MAX_PU_DEPTH + 4);
    for (i = 0; i < width * width; i++) {
      if (coeff_y[i] != 0) {
        // Found one, we can break here
@ -1955,8 +1954,12 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
            cur_cu->coeff_top_y[d] = 1;
          }
        } else {
-          int pu_index = x_pu&1 + 2 * (y_pu&1);
+          int pu_index = (x_pu & 1) + 2 * (y_pu & 1);
+          int d;
          cur_cu->coeff_top_y[depth + pu_index] = 1;
+          for (d = 0; d < depth; ++d) {
+            cur_cu->coeff_top_y[d] = 1;
+          }
        }
        break;
      }
@ -2014,7 +2017,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
      }

      transform_chroma(encoder, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block);
-      memset(cur_cu->coeff_top_u, 0, MAX_PU_DEPTH + 4);
      for (i = 0; i < chroma_size; i++) {
        if (coeff_u[i] != 0) {
          int d;
@ -2025,7 +2027,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
        }
      }
      transform_chroma(encoder, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block);
-      memset(cur_cu->coeff_top_v, 0, MAX_PU_DEPTH + 4);
      for (i = 0; i < chroma_size; i++) {
        if (coeff_v[i] != 0) {
          int d;
--- a/src/global.h
+++ b/src/global.h
@ -59,7 +59,7 @@ typedef int16_t coefficient;
 #define MAX_INTER_SEARCH_DEPTH 3
 #define MIN_INTER_SEARCH_DEPTH 0

-#define MAX_INTRA_SEARCH_DEPTH 3 /*!< Max search depth -> min block size (3 == 8x8) */
+#define MAX_INTRA_SEARCH_DEPTH 4 /*!< Max search depth -> min block size (3 == 8x8) */
 #define MIN_INTRA_SEARCH_DEPTH 1 /*!< Min search depth -> max block size (0 == 64x64) */


@ -99,6 +99,7 @@ typedef int16_t coefficient;
 #define NO_SCU_IN_LCU(no_lcu) ((no_lcu) << MAX_DEPTH)
 #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))
 #define UNREFERENCED_PARAMETER(p) (p)
+#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2)  + 2 * ((y_pu) % 2))

 #define LOG2_LCU_WIDTH 6
 // CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width
--- a/src/intra.c
+++ b/src/intra.c
@ -100,8 +100,6 @@ pixel intra_get_dc_pred(pixel *pic, uint16_t picwidth, uint8_t width)
  return (pixel)((sum + width) / (width + width));
 }

-#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2)  + 2 * ((y_pu) % 2))
-
 /**
 * \brief Function for deriving intra luma predictions
 * \param pic picture to use
@ -120,24 +118,24 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
  int8_t left_intra_dir  = 1;
  int8_t above_intra_dir = 1;

-  if (cur_cu->part_size == SIZE_NxN && (x & 7) == 1) {
+  if (x & 4) {
    // If current CU is NxN and PU is on the right half, take mode from the
    // left half of the same CU.
-    left_intra_dir = cur_cu->intra[PU_INDEX(0, y_cu<<1)].mode;
+    left_intra_dir = cur_cu->intra[PU_INDEX(0, y >> 2)].mode;
  } else if (left_cu && left_cu->type == CU_INTRA) {
    // Otherwise take the mode from the right side of the CU on the left.
-    left_intra_dir = left_cu->intra[PU_INDEX(1, y_cu<<1)].mode;
+    left_intra_dir = left_cu->intra[PU_INDEX(1, y >> 2)].mode;
  }

-  if (cur_cu->part_size == SIZE_NxN && (y & 7) == 1) {
+  if (y & 4) {
    // If current CU is NxN and PU is on the bottom half, take mode from the
    // top half of the same CU.
-    above_intra_dir = cur_cu->intra[PU_INDEX(x_cu<<1, 0)].mode;
+    above_intra_dir = cur_cu->intra[PU_INDEX(x >> 2, 0)].mode;
  } else if (above_cu && above_cu->type == CU_INTRA &&
             (y_cu * (LCU_WIDTH>>MAX_DEPTH)) % LCU_WIDTH != 0)
  {
    // Otherwise take the mode from the bottom half of the CU above.
-    above_intra_dir = above_cu->intra[PU_INDEX(x_cu<<1, 1)].mode;
+    above_intra_dir = above_cu->intra[PU_INDEX(x >> 2, 1)].mode;
  }

  // If the predictions are the same, add new predictions
@ -158,7 +156,7 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
    // add planar mode if it's not yet present
    if (left_intra_dir && above_intra_dir ) {
      preds[2] = 0; // PLANAR_IDX;
-    } else { // else we add 26 or 1
+    } else {  // Add DC mode if it's not present, otherwise 26.
      preds[2] =  (left_intra_dir+above_intra_dir)<2? 26 : 1;
    }
  }
@ -774,69 +772,50 @@ void intra_recon_lcu(encoder_control* encoder, int x, int y, int depth, lcu_t *l
  pixel *rec_shift  = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1];

  int8_t width = LCU_WIDTH >> depth;
-  int8_t width_c = LCU_WIDTH >> (depth + 1);
+  int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
  static vector2d offsets[4] = {{0,0},{1,0},{0,1},{1,1}};
  int num_pu = (cur_cu->part_size == SIZE_2Nx2N ? 1 : 4);
-  int i;
-
-  if (cur_cu->part_size == SIZE_NxN) {
-    width = width_c;
-  }
+  int i = PU_INDEX(x >> 2, y >> 2);

  cur_cu->intra[0].mode_chroma = 36; // TODO: Chroma intra prediction

-  // Reconstruct chroma
-  rec_shift  = &rec[width_c * 2 + 8 + 1];
-  intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
-                                   pic_width/2, pic_height/2, lcu);
-  intra_recon(rec_shift,
-              width_c * 2 + 8,
-              width_c,
-              recbase_u,
-              rec_stride >> 1,
-              cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
-              1);
+  // Reconstruct chroma.
+  if (!(x & 4 || y & 4)) {
+    rec_shift  = &rec[width_c * 2 + 8 + 1];
+    intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
+                                     pic_width/2, pic_height/2, lcu);
+    intra_recon(rec_shift,
+                width_c * 2 + 8,
+                width_c,
+                recbase_u,
+                rec_stride >> 1,
+                cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
+                1);

-  intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2,
-                                   pic_width/2, pic_height/2, lcu);
-  intra_recon(rec_shift,
-              width_c * 2 + 8,
-              width_c,
-              recbase_v,
-              rec_stride >> 1,
-              cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
-              2);
-
-  for (i = 0; i < num_pu; ++i) {
-    // Build reconstructed block to use in prediction with extrapolated borders
-    int x_off = offsets[i].x * width;
-    int y_off = offsets[i].y * width;
-    recbase_y = &lcu->rec.y[x_local + x_off + (y_local+y_off) * LCU_WIDTH];
-
-    rec_shift  = &rec[width * 2 + 8 + 1];
-    intra_build_reference_border(x+x_off, y+y_off,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
-                                 pic_width, pic_height, lcu);
-    intra_recon(rec_shift, width * 2 + 8,
-                width, recbase_y, rec_stride, cur_cu->intra[i].mode, 0);
-
-    // Filter DC-prediction
-    if (cur_cu->intra[i].mode == 1 && width < 32) {
-      intra_dc_pred_filtering(rec_shift, width * 2 + 8, recbase_y,
-                              rec_stride, width, width);
-    }
-
-    // Handle NxN mode by doing quant/transform and inverses for the next NxN block
-    if (cur_cu->part_size == SIZE_NxN) {
-      encode_transform_tree(encoder, x + x_off, y + y_off, depth+1, lcu);
-    }
+    intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2,
+                                     pic_width/2, pic_height/2, lcu);
+    intra_recon(rec_shift,
+                width_c * 2 + 8,
+                width_c,
+                recbase_v,
+                rec_stride >> 1,
+                cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
+                2);
  }

-  // If we coded NxN block, fetch the coded block flags to this level
-  if (cur_cu->part_size == SIZE_NxN) {
-    cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4];
-    cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1];
-    cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1];
-    return;
+  // Build reconstructed block to use in prediction with extrapolated borders
+  recbase_y = &lcu->rec.y[x_local + y_local * LCU_WIDTH];
+
+  rec_shift  = &rec[width * 2 + 8 + 1];
+  intra_build_reference_border(x, y,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
+                                pic_width, pic_height, lcu);
+  intra_recon(rec_shift, width * 2 + 8,
+              width, recbase_y, rec_stride, cur_cu->intra[i].mode, 0);
+
+  // Filter DC-prediction
+  if (cur_cu->intra[i].mode == 1 && width < 32) {
+    intra_dc_pred_filtering(rec_shift, width * 2 + 8, recbase_y,
+                            rec_stride, width, width);
  }

  encode_transform_tree(encoder, x, y, depth, lcu);
--- a/src/search.c
+++ b/src/search.c
@ -572,11 +572,11 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
  // NxN can only be applied to a single CU at a time.
  if (part_mode == SIZE_NxN) {
    cu_info *cu = &lcu_cu[x_cu + y_cu * LCU_T_CU_WIDTH];
-    cu->depth = depth;
+    cu->depth = MAX_DEPTH;
    cu->type = CU_INTRA;
    // It is assumed that cu->intra[].mode's are already set.
    cu->part_size = part_mode;
-    cu->tr_depth = depth + 1;
+    cu->tr_depth = depth;
    return;
  }

@ -691,62 +691,16 @@ static int search_cu_intra(encoder_control *encoder,
    uint32_t cost = -1;
    int16_t mode = -1;
    pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
+    unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
    mode = intra_prediction(ref_pixels, LCU_WIDTH,
                            cu_in_rec_buffer, cu_width * 2 + 8, cu_width,
                            pred_buffer, cu_width,
                            &cost, candidate_modes);
-    cur_cu->intra[0].mode = (int8_t)mode;
-    cur_cu->intra[0].cost = cost;
-    cur_cu->part_size = SIZE_2Nx2N;
+    cur_cu->intra[pu_index].mode = (int8_t)mode;
+    cur_cu->intra[pu_index].cost = cost;
  }

-  // Do search for NxN split.
-  if (0 && depth == MAX_DEPTH) { //TODO: reactivate NxN when _something_ is done to make it better
-    static const vector2d offsets[4] = {{0,0},{4,0},{0,4},{4,4}};
-    const int nxn_width = 4;
-
-    // Save 2Nx2N information to compare with NxN.
-    int nn_cost = cur_cu->intra[0].cost;
-    int8_t nn_mode = cur_cu->intra[0].mode;
-    int cost = (int)(g_cur_lambda_cost * 4.5);  // +0.5 to round to nearest
-
-    int nxn_i;
-
-    cu_in_rec_buffer = &rec_buffer[nxn_width * 2 + 8 + 1];
-
-    for (nxn_i = 0; nxn_i < 4; ++nxn_i) {
-      const vector2d nxn_px = { x_px + offsets[nxn_i].x,
-                                y_px + offsets[nxn_i].y };
-      intra_get_dir_luma_predictor(nxn_px.x, nxn_px.y, candidate_modes,
-                                   cur_cu, left_cu, above_cu);
-      intra_build_reference_border(nxn_px.x, nxn_px.y, nxn_width * 2 + 8,
-                                   rec_buffer, nxn_width * 2 + 8, 0,
-                                   encoder->in.cur_pic->width, encoder->in.cur_pic->height,
-                                   lcu);
-      {
-        uint32_t nxn_cost = -1;
-        int16_t nxn_mode = -1;
-        pixel *ref_pixels = &lcu->ref.y[nxn_px.x + nxn_px.y * LCU_WIDTH];
-        nxn_mode = intra_prediction(ref_pixels, encoder->in.width,
-                                    cu_in_rec_buffer, nxn_width * 2 + 8, nxn_width,
-                                    pred_buffer, nxn_width,
-                                    &nxn_cost, candidate_modes);
-        cur_cu->intra[nxn_i].mode = (int8_t)nxn_mode;
-        cost += nxn_cost;
-      }
-    }
-
-    // Choose between 2Nx2N and NxN.
-    if (nn_cost <= cost) {
-      cur_cu->intra[0].cost = nn_cost;
-      cur_cu->intra[0].mode = nn_mode;
-    } else {
-      cur_cu->intra[0].cost = cost;
-      cur_cu->part_size = SIZE_NxN;
-    }
-  }
-
-  return cur_cu->intra[0].cost;
+  return cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].cost;
 }


@ -775,8 +729,10 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo

  cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
  // Assign correct depth
-  cur_cu->depth = depth; cur_cu->tr_depth = depth ? depth : 1;
-  cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N;
+  cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
+  cur_cu->tr_depth = depth > 0 ? depth : 1;
+  cur_cu->type = CU_NOTSET;
+  cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
  // If the CU is completely inside the frame at this depth, search for
  // prediction modes at this depth.
  if (x + cu_width <= encoder->in.width &&
@ -808,7 +764,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
    // Reconstruct best mode because we need the reconstructed pixels for
    // mode search of adjacent CUs.
    if (cur_cu->type == CU_INTRA) {
-      lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size);
+      lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
      intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height);
    } else if (cur_cu->type == CU_INTER) {
      inter_recon_lcu(encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
@ -1046,11 +1002,11 @@ static void copy_lcu_to_cu_data(encoder_control *encoder, int x_px, int y_px, co
 */
 static void search_lcu(encoder_control *encoder, int x, int y)
 {
-  lcu_t work_tree[MAX_PU_DEPTH];
+  lcu_t work_tree[MAX_PU_DEPTH + 1];
  int depth;
-  memset(work_tree, 0, sizeof(lcu_t)*MAX_PU_DEPTH);
  // Initialize work tree.
-  for (depth = 0; depth < MAX_PU_DEPTH; ++depth) {
+  for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) {
+    memset(&work_tree[depth], 0, sizeof(work_tree[depth]));
    init_lcu_t(encoder, x, y, &work_tree[depth]);
  }