From 3af65b84778105ad2d30fc26cee592e0b68aceec Mon Sep 17 00:00:00 2001
From: Ari Koivula <ari@koivu.la>
Date: Tue, 12 Nov 2013 11:55:39 +0200
Subject: [PATCH] Add SAO searching and reconstruction for chroma.

One I frame and 99 P frames encoded with SAO off and on.
Processed 100 frames,    6693224 bits AVG PSNR: 30.7248 37.8978 37.8287
Processed 100 frames,    6295072 bits AVG PSNR: 32.2511 38.9373 38.9818
---
 src/encoder.c |  11 +++-
 src/sao.c     | 144 +++++++++++++++++++++++++++++++-------------------
 src/sao.h     |   5 +-
 3 files changed, 103 insertions(+), 57 deletions(-)

diff --git a/src/encoder.c b/src/encoder.c
index 15e2ce63..45e33f9a 100644
--- a/src/encoder.c
+++ b/src/encoder.c
@@ -876,7 +876,11 @@ void encode_slice_data(encoder_control* encoder)
 
   if (encoder->sao_enable) {
     pixel *new_y_data = MALLOC(pixel, pic->width * pic->height);
+    pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2);
+    pixel *new_v_data = MALLOC(pixel, (pic->width * pic->height) >> 2);
     memcpy(new_y_data, pic->y_recdata, sizeof(pixel) * pic->width * pic->height);
+    memcpy(new_u_data, pic->u_recdata, sizeof(pixel) * (pic->width * pic->height) >> 2);
+    memcpy(new_v_data, pic->v_recdata, sizeof(pixel) * (pic->width * pic->height) >> 2);
 
     for (y_ctb = 0; y_ctb < encoder->in.height_in_lcu; y_ctb++) {
       for (x_ctb = 0; x_ctb < encoder->in.width_in_lcu; x_ctb++) {
@@ -887,13 +891,18 @@ void encode_slice_data(encoder_control* encoder)
         init_sao_info(sao_chroma);
 
         sao_search_luma(encoder->in.cur_pic, x_ctb, y_ctb, sao_luma);
+        sao_search_chroma(encoder->in.cur_pic, x_ctb, y_ctb, sao_chroma);
         // sao_do_merge(encoder, x_ctb, y_ctb, sao_luma, sao_chroma);
         // sao_do_rdo(encoder, x_ctb, y_ctb, sao_luma, sao_chroma);
-        sao_reconstruct(encoder->in.cur_pic, new_y_data, x_ctb, y_ctb, sao_luma, sao_chroma);
+        sao_reconstruct(pic, new_y_data, x_ctb, y_ctb, sao_luma, COLOR_Y);
+        sao_reconstruct(pic, new_u_data, x_ctb, y_ctb, sao_chroma, COLOR_U);
+        sao_reconstruct(pic, new_v_data, x_ctb, y_ctb, sao_chroma, COLOR_V);
       }
     }
 
     free(new_y_data);
+    free(new_u_data);
+    free(new_v_data);
   }
 
   init_contexts(encoder,encoder->in.cur_pic->slicetype);
diff --git a/src/sao.c b/src/sao.c
index c92e7c39..ff50e0ba 100644
--- a/src/sao.c
+++ b/src/sao.c
@@ -118,11 +118,16 @@ void sao_reconstruct_color(const pixel *rec_data, pixel *new_rec_data, const sao
  * \param sao  Sao parameters.
  * \param rec  Top-left corner of the LCU, modified to be top-left corner of 
  */
-void sao_calc_block_dims(const picture *pic, const sao_info *sao, vector2d *rec, 
+void sao_calc_block_dims(const picture *pic, color_index color_i, 
+                         const sao_info *sao, vector2d *rec, 
                          vector2d *tl, vector2d *br, vector2d *block)
 {
   vector2d a_ofs = g_sao_edge_offsets[sao->eo_class][0];
   vector2d b_ofs = g_sao_edge_offsets[sao->eo_class][1];
+  const int is_chroma = (color_i != COLOR_Y ? 1 : 0);
+  int width = pic->width >> is_chroma;
+  int height = pic->height >> is_chroma;
+  int block_width = LCU_WIDTH >> is_chroma;
 
   // Handle top and left.
   if (rec->y == 0) {
@@ -141,20 +146,20 @@ void sao_calc_block_dims(const picture *pic, const sao_info *sao, vector2d *rec,
   }
 
   // Handle right and bottom, taking care of non-LCU sized CUs.
-  if (rec->y + LCU_WIDTH >= pic->height) {
+  if (rec->y + block_width >= height) {
     br->y = 0;
-    if (rec->y + LCU_WIDTH >= pic->height) {
-      block->y = pic->height - rec->y;
+    if (rec->y + block_width >= height) {
+      block->y = height - rec->y;
     }
     if (a_ofs.y == 1 || b_ofs.y == 1) {
       block->y -= 1;
       br->y += 1;
     }
   }
-  if (rec->x + LCU_WIDTH >= pic->width) {
+  if (rec->x + block_width >= width) {
     br->x = 0;
-    if (rec->x + LCU_WIDTH > pic->width) {
-      block->x = pic->width - rec->x;
+    if (rec->x + block_width > width) {
+      block->x = width - rec->x;
     }
     if (a_ofs.x == 1 || b_ofs.x == 1) {
       block->x -= 1;
@@ -166,58 +171,62 @@ void sao_calc_block_dims(const picture *pic, const sao_info *sao, vector2d *rec,
   rec->x = (rec->x == 0 ? 0 : -1);
 }
 
-void sao_reconstruct(picture *pic, pixel *new_y_data, unsigned x_ctb, unsigned y_ctb, 
-                     const sao_info *sao_luma, const sao_info *sao_chroma)
+void sao_reconstruct(picture *pic, const pixel *old_rec, 
+                     unsigned x_ctb, unsigned y_ctb, 
+                     const sao_info *sao, color_index color_i)
 {
-  pixel rec_y[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)];
-  pixel new_rec_y[LCU_LUMA_SIZE];
-  pixel *y_recdata = &pic->y_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)];
-  pixel *new_y_recdata = &new_y_data[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)];
+  const int is_chroma = (color_i != COLOR_Y ? 1 : 0);
+  const int pic_stride = pic->width >> is_chroma;
+  const int lcu_stride = LCU_WIDTH >> is_chroma;
+  const int buf_stride = lcu_stride + 2;
 
-  int x = x_ctb * LCU_WIDTH, y = y_ctb * LCU_WIDTH;
-  
-  vector2d rec;
+  pixel *recdata = (color_i == COLOR_Y ? pic->y_recdata : 
+                    (color_i == COLOR_U ? pic->u_recdata : pic->v_recdata));
+  pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)];
+  pixel new_rec[LCU_WIDTH * LCU_WIDTH];
+  // Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32.
+  pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)];
+  const pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)];
+
+  vector2d ofs;
   vector2d tl = { 1, 1 };
   vector2d br = { 1, 1 };
   vector2d block = { LCU_WIDTH, LCU_WIDTH };
 
-  if (sao_luma->type == SAO_TYPE_NONE) {
+  if (sao->type == SAO_TYPE_NONE) {
     return;
   }
 
-  rec.x = x;
-  rec.y = y;
-
-  sao_calc_block_dims(pic, sao_luma, &rec, &tl, &br, &block);
+  ofs.x = x_ctb * lcu_stride;
+  ofs.y = y_ctb * lcu_stride;
+  block.x = lcu_stride;
+  block.y = lcu_stride;
+  sao_calc_block_dims(pic, color_i, sao, &ofs, &tl, &br, &block);
 
   // Data to tmp buffer.
-  picture_blit_pixels(&new_y_data[(y + rec.y) * pic->width + x + rec.x], rec_y,
+  picture_blit_pixels(&old_lcu_rec[ofs.y * pic_stride + ofs.x], 
+                      buf_rec,
                       tl.x + block.x + br.x,
                       tl.y + block.y + br.y,
-                      pic->width, LCU_WIDTH + 2);
+                      pic_stride, buf_stride);
 
-  //picture_blit_pixels(y_recdata, new_rec_y, LCU_WIDTH, LCU_WIDTH, pic->width, LCU_WIDTH);
-
-  sao_reconstruct_color(&rec_y[tl.y * (LCU_WIDTH + 2) + tl.x], 
-                        &new_rec_y[(rec.y + tl.y) * LCU_WIDTH + rec.x + tl.x],
-                        sao_luma, 
-                        LCU_WIDTH + 2, LCU_WIDTH,
+  sao_reconstruct_color(&buf_rec[tl.y * buf_stride + tl.x], 
+                        &new_rec[(ofs.y + tl.y) * lcu_stride + ofs.x + tl.x],
+                        sao, 
+                        buf_stride, lcu_stride,
                         block.x, block.y);
-  //sao_reconstruct_color(rec_u, sao_chroma, COLOR_U);
-  //sao_reconstruct_color(rec_v, sao_chroma, COLOR_V);
-  
+
   // Copy reconstructed block from tmp buffer to rec image.
-  // 
-  picture_blit_pixels(&new_rec_y[(tl.y + rec.y) * LCU_WIDTH + (tl.x + rec.x)], 
-                      &y_recdata[(tl.y + rec.y) * (pic->width) + (tl.x + rec.x)],
-                      block.x, block.y, LCU_WIDTH, pic->width);
+  picture_blit_pixels(&new_rec[(tl.y + ofs.y) * lcu_stride + (tl.x + ofs.x)], 
+                      &lcu_rec[(tl.y + ofs.y) * pic_stride + (tl.x + ofs.x)],
+                      block.x, block.y, lcu_stride, pic_stride);
 }
 
 
 
-void sao_search_best_mode(const pixel *data, const pixel *recdata, 
+void sao_search_best_mode(const pixel *data[], const pixel *recdata[], 
                           int block_width, int block_height,
-                          unsigned buf_size, unsigned buf_cnt,
+                          unsigned buf_cnt,
                           sao_info *sao_out)
 {
   sao_eo_class edge_class;
@@ -235,7 +244,7 @@ void sao_search_best_mode(const pixel *data, const pixel *recdata,
 
     // Call calc_sao_edge_dir once for luma and twice for chroma.
     for (i = 0; i < buf_cnt; ++i) {
-      calc_sao_edge_dir(data + i * buf_size, recdata + i * buf_size, edge_class,
+      calc_sao_edge_dir(data[i], recdata[i], edge_class,
                         block_width, block_height, cat_sum_cnt);
     }
     
@@ -284,17 +293,50 @@ void sao_search_best_mode(const pixel *data, const pixel *recdata,
 
  void sao_search_chroma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao)
 {
-  
+  pixel orig_u[LCU_CHROMA_SIZE];
+  pixel rec_u[LCU_CHROMA_SIZE];
+  pixel orig_v[LCU_CHROMA_SIZE];
+  pixel rec_v[LCU_CHROMA_SIZE];
+  pixel *orig[2] = { orig_u, orig_v };
+  pixel *rec[2] = { rec_u, rec_v };
+  pixel *u_data = &pic->u_data[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)];
+  pixel *u_recdata = &pic->u_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)];
+  pixel *v_data = &pic->v_data[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)];
+  pixel *v_recdata = &pic->v_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)];
+  int block_width  = (LCU_WIDTH / 2);
+  int block_height = (LCU_WIDTH / 2);
+
+  if (x_ctb * (LCU_WIDTH / 2) + (LCU_WIDTH / 2) >= (unsigned)pic->width / 2) {
+    block_width = (pic->width - x_ctb * LCU_WIDTH) / 2;
+  }
+  if (y_ctb * (LCU_WIDTH / 2) + (LCU_WIDTH / 2) >= (unsigned)pic->height / 2) {
+    block_height = (pic->height - y_ctb * LCU_WIDTH) / 2;
+  }
+
+  sao->type = SAO_TYPE_EDGE;
+
+  // Fill temporary buffers with picture data.
+  // These buffers are needed only until we switch to a LCU based data
+  // structure for pixels. Then we can give pointers directly to that structure
+  // without making copies.
+  picture_blit_pixels(u_data, orig_u, block_width, block_height,
+                      pic->width / 2, LCU_WIDTH / 2);
+  picture_blit_pixels(v_data, orig_v, block_width, block_height, 
+                      pic->width / 2, LCU_WIDTH / 2);
+  picture_blit_pixels(u_recdata, rec_u, block_width, block_height,
+                      pic->width / 2, LCU_WIDTH / 2);
+  picture_blit_pixels(v_recdata, rec_v, block_width, block_height,
+                      pic->width / 2, LCU_WIDTH / 2);
+
+  sao_search_best_mode(orig, rec, block_width, block_height, 2, sao);
 }
 
 void sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao)
 {
-  // These buffers are needed only until we switch to a LCU based data
-  // structure for pixels. Then we can give pointers directly to that structure
-  // without making copies.
-  // It's 2-dimensional because sao_search_best_mode takes arguments as arrays.
   pixel orig_y[LCU_LUMA_SIZE];
   pixel rec_y[LCU_LUMA_SIZE];
+  pixel *orig[1] = { orig_y };
+  pixel *rec[1] = { rec_y };
   pixel *y_data = &pic->y_data[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)];
   pixel *y_recdata = &pic->y_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)];
   int block_width = LCU_WIDTH;
@@ -306,21 +348,15 @@ void sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_inf
   if (y_ctb * LCU_WIDTH + LCU_WIDTH >= (unsigned)pic->height) {
     block_height = pic->height - y_ctb * LCU_WIDTH;
   }
-  
-  /*sao->offsets[SAO_EO_CAT0] = 0;
-  sao->offsets[SAO_EO_CAT1] = 7;
-  sao->offsets[SAO_EO_CAT2] = 7;
-  sao->offsets[SAO_EO_CAT3] = -7;
-  sao->offsets[SAO_EO_CAT4] = -7;
-  sao->eo_class = SAO_EO0;
-  sao->type = SAO_TYPE_EDGE;
-  return;*/
 
   sao->type = SAO_TYPE_EDGE;
 
   // Fill temporary buffers with picture data.
+  // These buffers are needed only until we switch to a LCU based data
+  // structure for pixels. Then we can give pointers directly to that structure
+  // without making copies.
   picture_blit_pixels(y_data, orig_y, block_width, block_height, pic->width, LCU_WIDTH);
   picture_blit_pixels(y_recdata, rec_y, block_width, block_height, pic->width, LCU_WIDTH);
 
-  sao_search_best_mode(orig_y, rec_y, block_width, block_height, LCU_LUMA_SIZE, 1, sao);
+  sao_search_best_mode(orig, rec, block_width, block_height, 1, sao);
 }
diff --git a/src/sao.h b/src/sao.h
index e32010fe..0be99eea 100644
--- a/src/sao.h
+++ b/src/sao.h
@@ -47,7 +47,8 @@ typedef struct sao_info_struct {
 void init_sao_info(sao_info *sao);
 void sao_search_chroma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao);
 void sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao);
-void sao_reconstruct(picture *pic, pixel *new_y_data, unsigned x_ctb, unsigned y_ctb, 
-                     const sao_info *sao_luma, const sao_info *sao_chroma);
+void sao_reconstruct(picture *pic, const pixel *old_rec, 
+                     unsigned x_ctb, unsigned y_ctb, 
+                     const sao_info *sao, color_index color_i);
 
 #endif
\ No newline at end of file