From e06d980a9692c8ef0641bd09b7e14e35f332260f Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Thu, 8 Aug 2024 14:48:48 +0300
Subject: [PATCH 1/5] [sao] Use correct pixel type in
 sao_edge_ddistortion_generic

---
 src/strategies/generic/sao_shared_generics.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/strategies/generic/sao_shared_generics.h b/src/strategies/generic/sao_shared_generics.h
index ca6ccfd6..c86a25b7 100644
--- a/src/strategies/generic/sao_shared_generics.h
+++ b/src/strategies/generic/sao_shared_generics.h
@@ -67,10 +67,10 @@ static int sao_edge_ddistortion_generic(const uvg_pixel *orig_data,
       uint32_t a_pos = (y + a_ofs.y) * block_width + x + a_ofs.x;
       uint32_t b_pos = (y + b_ofs.y) * block_width + x + b_ofs.x;
 
-      uint8_t   a    =  rec_data[a_pos];
-      uint8_t   b    =  rec_data[b_pos];
-      uint8_t   c    =  rec_data[c_pos];
-      uint8_t   orig = orig_data[c_pos];
+      uvg_pixel a      = rec_data[a_pos];
+      uvg_pixel b      = rec_data[b_pos];
+      uvg_pixel c      = rec_data[c_pos];
+      uvg_pixel orig   = orig_data[c_pos];
 
       int32_t eo_cat = sao_calc_eo_cat(a, b, c);
       int32_t offset = offsets[eo_cat];

From 67496d18749a27d8ca0ec28ecb52ca23c54f4705 Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Thu, 8 Aug 2024 14:49:33 +0300
Subject: [PATCH 2/5] [10bit] Fix reading 8bit data when using BIT_DEPTH 10+
 and we need to fill the frame

---
 src/yuv_io.c | 53 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/yuv_io.c b/src/yuv_io.c
index 1b72deb9..38da9ea6 100644
--- a/src/yuv_io.c
+++ b/src/yuv_io.c
@@ -53,28 +53,49 @@ static void fill_after_frame(unsigned height, unsigned array_width,
 }
 
 
-static int read_and_fill_frame_data(FILE *file,
-                                    unsigned width, unsigned height, unsigned bytes_per_sample,
-                                    unsigned array_width, uvg_pixel *data)
+static int read_and_fill_frame_data(
+  FILE*      file,
+  unsigned   width,
+  unsigned   height,
+  unsigned   bytes_per_sample,
+  unsigned   array_width,
+  uvg_pixel* data)
 {
-  uvg_pixel* p = data;
-  uvg_pixel* end = data + array_width * height;
-  uvg_pixel fill_char;
-  unsigned i;
 
-  while (p < end) {
-    // Read the beginning of the line from input.
-    if (width != fread(p, bytes_per_sample, width, file))
-      return 0;
+  unsigned   i;
+  if (bytes_per_sample != sizeof(uvg_pixel)) {
+    uint8_t* p   = (uint8_t*)data;
+    uint8_t*  end = (uint8_t*)data + array_width * height;
+    uint8_t  fill_char;
+    while (p < end) {
+      // Read the beginning of the line from input.
+      if (width != fread(p, bytes_per_sample, width, file)) return 0;
+      // Fill the rest with the last pixel value.
+      // Fill the rest with the last pixel value.
+      fill_char = p[width - 1];
 
-    // Fill the rest with the last pixel value.
-    fill_char = p[width - 1];
+      for (i = width; i < array_width; ++i) {
+        p[i] = fill_char;
+      }
 
-    for (i = width; i < array_width; ++i) {
-      p[i] = fill_char;
+      p += array_width;
     }
+  } else {
+    uvg_pixel* p   = data;
+    uvg_pixel* end = data + array_width * height;
+    uvg_pixel  fill_char;
+    while (p < end) {
+      // Read the beginning of the line from input.
+      if (width != fread(p, bytes_per_sample, width, file)) return 0;
+      // Fill the rest with the last pixel value.
+      fill_char = p[width - 1];
 
-    p += array_width;
+      for (i = width; i < array_width; ++i) {
+        p[i] = fill_char;
+      }
+
+      p += array_width;
+    }
   }
   return 1;
 }

From cd65044000daab20a5239895662e1bdabd783c66 Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Wed, 14 Aug 2024 11:26:25 +0300
Subject: [PATCH 3/5] [10bit] Fix second instance of satd shifting based on bit
 depth Already shifted in the base function

---
 src/image.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/image.c b/src/image.c
index 85f88c6b..68faa245 100644
--- a/src/image.c
+++ b/src/image.c
@@ -504,7 +504,7 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic,
                              pic_data,
                              pic->stride,
                              ref_data,
-                             ref->stride) >> (UVG_BIT_DEPTH - 8);
+                             ref->stride);
   } else {
     // Extrapolate pixels from outside the frame.
 
@@ -550,7 +550,7 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic,
       pic_data,
       pic->stride,
       ext_origin,
-      ext_s) >> (UVG_BIT_DEPTH - 8);
+      ext_s);
 
     return satd;
   }

From 862cc92e9748af3ec3e781368f19623be9bf38ac Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Thu, 5 Sep 2024 14:42:50 +0300
Subject: [PATCH 4/5] [cli] Fix yuv_io_seek() buffer type

---
 src/yuv_io.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/yuv_io.c b/src/yuv_io.c
index 38da9ea6..1b94fc78 100644
--- a/src/yuv_io.c
+++ b/src/yuv_io.c
@@ -53,16 +53,16 @@ static void fill_after_frame(unsigned height, unsigned array_width,
 }
 
 
-static int read_and_fill_frame_data(
-  FILE*      file,
-  unsigned   width,
-  unsigned   height,
-  unsigned   bytes_per_sample,
-  unsigned   array_width,
-  uvg_pixel* data)
+static int read_and_fill_frame_data(FILE*      file,
+                                    unsigned   width,
+                                    unsigned   height,
+                                    unsigned   bytes_per_sample,
+                                    unsigned   array_width,
+                                    uvg_pixel* data)
 {
 
   unsigned   i;
+  // Handle separately the case where we use UVG_BIT_DEPTH 10+ but the input is 8-bit.
   if (bytes_per_sample != sizeof(uvg_pixel)) {
     uint8_t* p   = (uint8_t*)data;
     uint8_t*  end = (uint8_t*)data + array_width * height;
@@ -71,7 +71,6 @@ static int read_and_fill_frame_data(
       // Read the beginning of the line from input.
       if (width != fread(p, bytes_per_sample, width, file)) return 0;
       // Fill the rest with the last pixel value.
-      // Fill the rest with the last pixel value.
       fill_char = p[width - 1];
 
       for (i = width; i < array_width; ++i) {
@@ -334,7 +333,7 @@ int yuv_io_seek(FILE* file, unsigned frames,
 
     // Seek failed. Skip data by reading.
     error = 0;
-    unsigned char* tmp[4096];
+    unsigned char tmp[4096];
     size_t bytes_left = skip_bytes;
     while (bytes_left > 0 && !error) {
       const size_t skip = MIN(4096, bytes_left);

From f6ef70adfa28652c74caa919d08fced0d67786cc Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Thu, 5 Sep 2024 16:55:46 +0300
Subject: [PATCH 5/5] [10bit] Enable AVX2 optimized DCT functions for all bit
 depths

---
 src/strategies/avx2/dct-avx2.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c
index 01b1b4bb..036eed98 100644
--- a/src/strategies/avx2/dct-avx2.c
+++ b/src/strategies/avx2/dct-avx2.c
@@ -54,7 +54,6 @@ extern const int16_t uvg_g_dct_32_t[32][32];
 
 #if COMPILE_INTEL_AVX2 
 #include "uvg266.h"
-#if UVG_BIT_DEPTH == 8
 #include <immintrin.h>
 #include "strategies/avx2/dct_avx2_tables.h"
 #define MAX_LOG2_TR_DYNAMIC_RANGE 15
@@ -8039,34 +8038,28 @@ static void mts_idct_avx2(
   }
 }
 
-#endif // UVG_BIT_DEPTH == 8
 #endif //COMPILE_INTEL_AVX2
 
 int uvg_strategy_register_dct_avx2(void* opaque, uint8_t bitdepth)
 {
   bool success = true;
 #if COMPILE_INTEL_AVX2
-#if UVG_BIT_DEPTH == 8
-  if (bitdepth == 8){
-    //success &= uvg_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2);
 
-    success &= uvg_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2);
-    success &= uvg_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2);
-    success &= uvg_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2);
-    success &= uvg_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2);
+  success &= uvg_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2);
+  success &= uvg_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2);
+  success &= uvg_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2);
+  success &= uvg_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2);
 
-    // success &= uvg_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2);
 
-    success &= uvg_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2);
-    success &= uvg_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2);
-    success &= uvg_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2);
-    success &= uvg_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2);
+  success &= uvg_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2);
+  success &= uvg_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2);
+  success &= uvg_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2);
+  success &= uvg_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2);
+
+  success &= uvg_strategyselector_register(opaque, "mts_dct", "avx2", 40, &mts_dct_avx2);
+  success &= uvg_strategyselector_register(opaque, "mts_idct", "avx2", 40, &mts_idct_avx2);
 
-    success &= uvg_strategyselector_register(opaque, "mts_dct", "avx2", 40, &mts_dct_avx2);
-    success &= uvg_strategyselector_register(opaque, "mts_idct", "avx2", 40, &mts_idct_avx2);
 
-  }
-#endif // UVG_BIT_DEPTH == 8
 #endif //COMPILE_INTEL_AVX2  
   return success;
 }