From b4181dd3984a35c2feb82e1f77968ecf6da8a961 Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Wed, 26 Feb 2014 14:57:57 +0200
Subject: [PATCH] Fixed problems with the new structure and commented out old
 search

---
 src/encoder.c | 39 ++++++++++++++++++---------
 src/encoder.h |  3 +--
 src/intra.c   | 23 +++-------------
 src/intra.h   |  8 +++---
 src/picture.h | 50 +++++++++++++++++++++++++++++++++++
 src/search.c  | 73 ++++++++++++++++++---------------------------------
 6 files changed, 111 insertions(+), 85 deletions(-)

diff --git a/src/encoder.c b/src/encoder.c
index 86a1938f..9b68bb69 100644
--- a/src/encoder.c
+++ b/src/encoder.c
@@ -1556,10 +1556,21 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb,
     // 5 EP bins with the full predmode
     for (j = 0; j < num_pred_units; ++j) {
       static const vector2d offset[4] = {{0,0},{1,0},{0,1},{1,1}};
-      intra_get_dir_luma_predictor(encoder->in.cur_pic,
-                                   x_ctb * 2 + offset[j].x,
-                                   y_ctb * 2 + offset[j].y,
-                                   intra_preds[j]);
+      cu_info *left_cu = 0;
+      cu_info *above_cu = 0;
+
+      if (y_ctb > 0) {
+        left_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb - 1 + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)];
+      }
+      // Don't take the above CU across the LCU boundary.
+      if (y_ctb > 0 && (y_ctb & 7) != 0) {
+        above_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb + (y_ctb - 1) * (encoder->in.width_in_lcu << MAX_DEPTH)];
+      }
+
+      intra_get_dir_luma_predictor((x_ctb<<3) + (offset[j].x<<2),
+                                   (y_ctb<<3) + (offset[j].y<<2),
+                                   intra_preds[j], cur_cu,
+                                   left_cu, above_cu);
       for (i = 0; i < 3; i++) {
         if (intra_preds[j][i] == intra_pred_mode[j]) {
           mpm_preds[j] = (int8_t)i;
@@ -1769,7 +1780,7 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
 {
   // we have 64>>depth transform size
   int x_local = (x&0x3f), y_local = (y&0x3f);
-  cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_CU_STRUCT_WIDTH];
+  cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
   
   int i;
   int8_t width = LCU_WIDTH>>depth;
@@ -1778,10 +1789,10 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
   // Split transform and increase depth
   if (depth == 0 || cur_cu->tr_depth > depth) {
     int offset = 1 << (MAX_DEPTH - (depth + 1));
-    encode_transform_tree_lcu(encoder, x,          y,          depth+1, lcu);
-    encode_transform_tree_lcu(encoder, x + offset, y,          depth+1, lcu);
-    encode_transform_tree_lcu(encoder, x,          y + offset, depth+1, lcu);
-    encode_transform_tree_lcu(encoder, x + offset, y + offset, depth+1, lcu);
+    encode_transform_tree(encoder, x,          y,          depth+1, lcu);
+    encode_transform_tree(encoder, x + offset, y,          depth+1, lcu);
+    encode_transform_tree(encoder, x,          y + offset, depth+1, lcu);
+    encode_transform_tree(encoder, x + offset, y + offset, depth+1, lcu);
 
     // Derive coded coeff flags from the next depth
     if (depth == MAX_DEPTH) {
@@ -1789,9 +1800,9 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
       cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1];
       cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1];
     } else {
-      cu_info *cu_a =  &lcu->cu[LCU_CU_OFFSET + (x_local + offset)>>3 + (y_local>>3)         *LCU_CU_STRUCT_WIDTH];
-      cu_info *cu_b =  &lcu->cu[LCU_CU_OFFSET + x_local>>3            + ((y_local+offset)>>3)*LCU_CU_STRUCT_WIDTH];
-      cu_info *cu_c =  &lcu->cu[LCU_CU_OFFSET + (x_local + offset)>>3 + ((y_local+offset)>>3)*LCU_CU_STRUCT_WIDTH];
+      cu_info *cu_a =  &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + (y_local>>3)         *LCU_T_CU_WIDTH];
+      cu_info *cu_b =  &lcu->cu[LCU_CU_OFFSET + (x_local>>3)            + ((y_local+offset)>>3)*LCU_T_CU_WIDTH];
+      cu_info *cu_c =  &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH];
       cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cu_a->coeff_top_y[depth+1] | cu_b->coeff_top_y[depth+1]
                                     | cu_c->coeff_top_y[depth+1];
       cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1] | cu_a->coeff_top_u[depth+1] | cu_b->coeff_top_u[depth+1]
@@ -2573,6 +2584,7 @@ void encode_last_significant_xy(encoder_control *encoder,
 /**
  * \brief This function reconstructs inter/intra predictions and produces coded residual to the buffer
  */
+/*
 void encode_block_residual(encoder_control *encoder,
                            uint16_t x_ctb, uint16_t y_ctb, uint8_t depth)
 {
@@ -2584,7 +2596,7 @@ void encode_block_residual(encoder_control *encoder,
   uint8_t border_y = ((encoder->in.height) < (y_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0;
   uint8_t border_split_x = ((encoder->in.width)  < ((x_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1;
   uint8_t border_split_y = ((encoder->in.height) < ((y_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1;
-  uint8_t border = border_x | border_y; /*!< are we in any border CU */
+  uint8_t border = border_x | border_y; //!< are we in any border CU
 
   // When not in MAX_DEPTH, insert split flag and split the blocks if needed
   if (depth != MAX_DEPTH) {
@@ -2783,3 +2795,4 @@ void encode_block_residual(encoder_control *encoder,
   }
 
 }
+*/
\ No newline at end of file
diff --git a/src/encoder.h b/src/encoder.h
index 57f425d0..99a1ec1b 100644
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -120,8 +120,7 @@ void encode_last_significant_xy(encoder_control *encoder, uint8_t lastpos_x,
                                 uint8_t type, uint8_t scan);
 void encode_coeff_nxn(encoder_control *encoder, int16_t *coeff, uint8_t width,
                       uint8_t type, int8_t scan_mode);
-void encode_transform_tree(encoder_control *encoder, int32_t x_cu, int32_t y_cu,
-                           uint8_t depth);
+void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8_t depth, lcu_t *lcu);
 void encode_transform_coeff(encoder_control *encoder, int32_t x_cu, int32_t y_cu,
                             int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v);
 void encode_block_residual(encoder_control *encoder,
diff --git a/src/intra.c b/src/intra.c
index 026197d9..751aea57 100644
--- a/src/intra.c
+++ b/src/intra.c
@@ -110,7 +110,8 @@ pixel intra_get_dc_pred(pixel *pic, uint16_t picwidth, uint8_t width)
  * \param preds output buffer for 3 predictions
  * \returns (predictions are found)?1:0
  */
-int8_t intra_get_dir_luma_predictor(lcu_t* lcu, uint32_t x, uint32_t y, int8_t* preds)
+int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
+                                    cu_info* cur_cu, cu_info* left_cu, cu_info* above_cu)
 {
   int x_cu = x>>3;
   int y_cu = y>>3;
@@ -119,22 +120,6 @@ int8_t intra_get_dir_luma_predictor(lcu_t* lcu, uint32_t x, uint32_t y, int8_t*
   int8_t left_intra_dir  = 1;
   int8_t above_intra_dir = 1;
     
-  int32_t cu_pos = ((x&0x3f)>>3) + ((y&0x3f)>>3) * LCU_CU_STRUCT_WIDTH;
-
-  cu_info* cur_cu = &lcu->cu[LCU_CU_OFFSET+cu_pos];
-  cu_info* left_cu = 0;
-  cu_info* above_cu = 0;
-
-  if (x_cu > 0) {
-    left_cu = &lcu->cu[LCU_CU_OFFSET + cu_pos - 1];
-  }
-  // Don't take the above CU across the LCU boundary.
-  if (y_cu > 0 &&
-      ((y_cu * (LCU_WIDTH>>MAX_DEPTH)) % LCU_WIDTH) != 0)
-  {
-    above_cu = &lcu->cu[LCU_CU_OFFSET + cu_pos - LCU_CU_STRUCT_WIDTH];
-  }
-
   if (cur_cu->part_size == SIZE_NxN && (x & 7) == 1) {
     // If current CU is NxN and PU is on the right half, take mode from the
     // left half of the same CU.
@@ -769,7 +754,7 @@ void intra_get_planar_pred(pixel* src, int32_t srcstride, uint32_t width, pixel*
 void intra_recon_lcu(encoder_control* encoder, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
 {
   int x_local = (x&0x3f), y_local = (y&0x3f);
-  cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + x_local>>3 + (y_local>>3)*LCU_CU_STRUCT_WIDTH];
+  cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
   
   // Pointers to reconstruction arrays
   pixel *recbase_y = &lcu->rec.y[x_local + y_local * LCU_WIDTH];
@@ -835,7 +820,7 @@ void intra_recon_lcu(encoder_control* encoder, int x, int y, int depth, lcu_t *l
 
     // Handle NxN mode by doing quant/transform and inverses for the next NxN block
     if (cur_cu->part_size == SIZE_NxN) {
-      //encode_transform_tree_lcu(encoder, x + x_off, y + y_off, depth+1, lcu);
+      encode_transform_tree(encoder, x + x_off, y + y_off, depth+1, lcu);
     }
   }
 
diff --git a/src/intra.h b/src/intra.h
index bc3dde79..1bf797bf 100644
--- a/src/intra.h
+++ b/src/intra.h
@@ -27,11 +27,12 @@
 #include "global.h"
 
 #include "picture.h"
-
+#include "encoder.h"
 
 void intra_set_block_mode(picture* pic,uint32_t x_ctb, uint32_t y_ctb, uint8_t depth, uint8_t mode, uint8_t part_mode);
 
-int8_t intra_get_dir_luma_predictor(lcu_t* lcu, uint32_t x, uint32_t y, int8_t* preds);
+int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
+                                    cu_info* cur_cu, cu_info* left_cu, cu_info* above_cu);
 void intra_dc_pred_filtering(pixel* src, int32_t src_stride, pixel* dst, int32_t dst_stride, int32_t width, int32_t height );
 
 void intra_build_reference_border(int32_t x_luma, int32_t y_luma, int16_t out_width, pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu);
@@ -46,6 +47,7 @@ void intra_get_planar_pred(pixel* src,int32_t srcstride, uint32_t width, pixel*
 void intra_get_angular_pred(pixel* src, int32_t src_stride, pixel* p_dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter);
 
 void intra_recon(pixel* rec, uint32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
-void intra_recon_lcu(encoder_control* encoder, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
+
+void intra_recon_lcu(encoder_control *encoder, int32_t x, int32_t y, int32_t depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
 
 #endif
diff --git a/src/picture.h b/src/picture.h
index 2c76ac23..0d1f31cb 100644
--- a/src/picture.h
+++ b/src/picture.h
@@ -146,6 +146,56 @@ typedef struct
 } picture_list;
 
 
+
+#define SUB_SCU_BIT_MASK (64 - 1);
+#define SUB_SCU(xy) (xy & SUB_SCU_BIT_MASK)
+#define LCU_CU_WIDTH 8
+#define LCU_T_CU_WIDTH 9
+#define LCU_CU_OFFSET 10
+
+// Width from top left of the LCU, so +1 for ref buffer size.
+#define LCU_REF_PX_WIDTH (LCU_WIDTH + LCU_WIDTH / 2)
+
+/**
+ * Top and left intra reference pixels for LCU.
+ * - Intra needs maximum of 32 to the right and down from LCU border.
+ * - First pixel is the top-left pixel.
+ */
+typedef struct {
+  pixel y[LCU_REF_PX_WIDTH + 1];
+  pixel u[LCU_REF_PX_WIDTH / 2 + 1];
+  pixel v[LCU_REF_PX_WIDTH / 2 + 1];
+} lcu_ref_px_t;
+
+typedef struct {
+  coefficient y[LCU_REF_PX_WIDTH];
+  coefficient u[LCU_REF_PX_WIDTH / 2];
+  coefficient v[LCU_REF_PX_WIDTH / 2];
+} lcu_coeff_t;
+
+typedef struct {
+  pixel y[LCU_LUMA_SIZE];
+  pixel u[LCU_CHROMA_SIZE];
+  pixel v[LCU_CHROMA_SIZE];
+} lcu_yuv_t;
+
+typedef struct {
+  lcu_ref_px_t top_ref;  //!< Reference pixels from adjacent LCUs.
+  lcu_ref_px_t left_ref; //!< Reference pixels from adjacent LCUs.
+  lcu_yuv_t ref; //!< LCU reference pixels
+  lcu_yuv_t rec; //!< LCU reconstructed pixels
+  lcu_coeff_t coeff; //!< LCU coefficients
+
+  /**
+   * A 9x9 CU array for the LCU, +1 CU.
+   * - Top reference CUs on row 0.
+   * - Left reference CUs on column 0.
+   * - All of LCUs CUs on 1:9, 1:9.
+   * - Top right reference CU on the last slot.
+   */
+  cu_info cu[9*9+1];
+} lcu_t;
+
 //////////////////////////////////////////////////////////////////////////
 // FUNCTIONS
 
diff --git a/src/search.c b/src/search.c
index cc3c26fd..1213deeb 100644
--- a/src/search.c
+++ b/src/search.c
@@ -34,12 +34,6 @@
 #include "inter.h"
 #include "filter.h"
 
-#define SUB_SCU_BIT_MASK (64 - 1);
-#define SUB_SCU(xy) (xy & SUB_SCU_BIT_MASK)
-#define LCU_CU_WIDTH 8
-#define LCU_T_CU_WIDTH 9
-#define LCU_CU_OFFSET 10
-
 // Temporarily for debugging.
 #define USE_INTRA_IN_P 1
 //#define RENDER_CU encoder->frame==2
@@ -324,41 +318,6 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
 
 }
 
-// Width from top left of the LCU, so +1 for ref buffer size.
-#define LCU_REF_PX_WIDTH (LCU_WIDTH + LCU_WIDTH / 2)
-
-/**
- * Top and left intra reference pixels for LCU.
- * - Intra needs maximum of 32 to the right and down from LCU border.
- * - First pixel is the top-left pixel.
- */
-typedef struct {
-  pixel y[LCU_REF_PX_WIDTH + 1];
-  pixel u[LCU_REF_PX_WIDTH / 2 + 1];
-  pixel v[LCU_REF_PX_WIDTH / 2 + 1];
-} lcu_ref_px_t;
-
-typedef struct {
-  pixel y[LCU_LUMA_SIZE];
-  pixel u[LCU_CHROMA_SIZE];
-  pixel v[LCU_CHROMA_SIZE];
-} lcu_yuv_t;
-
-typedef struct {
-  lcu_ref_px_t top_ref;  //!< Reference pixels from adjacent LCUs.
-  lcu_ref_px_t left_ref; //!< Reference pixels from adjacent LCUs.
-  lcu_yuv_t ref; //!< LCU reference pixels
-  lcu_yuv_t rec; //!< LCU reconstructed pixels
-
-  /**
-   * A 9x9 CU array for the LCU, +1 CU.
-   * - Top reference CUs on row 0.
-   * - Left reference CUs on column 0.
-   * - All of LCUs CUs on 1:9, 1:9.
-   * - Top right reference CU on the last slot.
-   */
-  cu_info cu[9*9+1];
-} lcu_t;
 
 
 /**
@@ -398,8 +357,11 @@ static int search_cu_intra(encoder_control *encoder, int x, int y, int depth, lc
 {
   int width = (LCU_WIDTH >> (depth));
   int x_local = (x&0x3f), y_local = (y&0x3f);
+  int x_cu = x>>3;
+  int y_cu = y>>3;
+  int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH;
 
-  cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
+  cu_info *cur_cu = &lcu->cu[cu_pos];
 
   // INTRAPREDICTION
   pixel pred[LCU_WIDTH * LCU_WIDTH + 1];
@@ -408,8 +370,20 @@ static int search_cu_intra(encoder_control *encoder, int x, int y, int depth, lc
 
   int8_t intra_preds[3];
 
+  cu_info* left_cu = 0;
+  cu_info* above_cu = 0;
+
+  if (x_cu > 0) {
+    left_cu = &lcu->cu[LCU_CU_OFFSET + cu_pos - 1];
+  }
+  // Don't take the above CU across the LCU boundary.
+  if (y_cu > 0 &&
+      ((y_cu * (LCU_WIDTH>>MAX_DEPTH)) % LCU_WIDTH) != 0) {
+    above_cu = &lcu->cu[LCU_CU_OFFSET + cu_pos - LCU_T_CU_WIDTH];
+  }
+
   // Get intra predictors
-  intra_get_dir_luma_predictor(lcu, x, y, intra_preds);
+  intra_get_dir_luma_predictor(x, y, intra_preds, cur_cu, left_cu, above_cu);
 
   // Build reconstructed block to use in prediction with extrapolated borders
   intra_build_reference_border(x, y,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
@@ -436,8 +410,7 @@ static int search_cu_intra(encoder_control *encoder, int x, int y, int depth, lc
     for (i = 0; i < 4; ++i) {
       int x_pos = x + offsets[i].x * width;
       int y_pos = y + offsets[i].y * width;
-      intra_get_dir_luma_predictor(lcu,x_pos,y_pos,
-                                   intra_preds);
+      intra_get_dir_luma_predictor(x_pos,y_pos, intra_preds, cur_cu, left_cu, above_cu);
       intra_build_reference_border(x_pos, y_pos,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
                                    encoder->in.cur_pic->width, encoder->in.cur_pic->height,
                                    lcu);
@@ -484,7 +457,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
     return 0;
   }
 
-  cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_CU_STRUCT_WIDTH];
+  cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
 
   // If the CU is completely inside the frame at this depth, search for
   // prediction modes at this depth.
@@ -713,7 +686,7 @@ static void search_frame(encoder_control *encoder)
   }
 }
 
-
+/*
 static void search_intra(encoder_control *encoder, uint16_t x_ctb,
                          uint16_t y_ctb, uint8_t depth)
 {
@@ -772,6 +745,7 @@ static void search_intra(encoder_control *encoder, uint16_t x_ctb,
     }
   }
 }
+*/
 
 /**
  * \brief Search best modes at each depth for the whole picture.
@@ -780,6 +754,7 @@ static void search_intra(encoder_control *encoder, uint16_t x_ctb,
  * with the best mode and it's cost for each CU at each depth for the whole
  * frame.
  */
+/*
 void search_tree(encoder_control *encoder,
                  int x, int y, uint8_t depth)
 {
@@ -835,6 +810,7 @@ void search_tree(encoder_control *encoder,
     search_tree(encoder, x + half_cu, y + half_cu, depth + 1);
   }
 }
+*/
 
 /**
  * \brief
@@ -902,7 +878,7 @@ void search_slice_data(encoder_control *encoder)
       }
     }
   }
-
+  /*
   // Loop through every LCU in the slice
   for (y_lcu = 0; y_lcu < encoder->in.height_in_lcu; y_lcu++) {
     for (x_lcu = 0; x_lcu < encoder->in.width_in_lcu; x_lcu++) {
@@ -918,5 +894,6 @@ void search_slice_data(encoder_control *encoder)
 
     }
   }
+  */
 #endif
 }