Add support for 4x4 blocks to SATD_ANY_SIZE.

Makes functions satd_any_size_generic and satd_any_size_8bit_avx2 work on blocks whose width and/or height are not multiples of 8.
2024-11-24 02:24:07 +00:00 · 2016-06-15 12:18:32 +09:00 · 2016-06-15 12:18:32 +09:00 · bf26661782
parent 2ae260e422
commit bf26661782
4 changed files with 79 additions and 12 deletions
--- a/src/strategies/avx2/picture-avx2.c
+++ b/src/strategies/avx2/picture-avx2.c
@ -31,6 +31,7 @@
 #include "strategies/strategies-picture.h"
 #include "strategyselector.h"
 #include "strategies/strategies-common.h"
+#include "strategies/generic/picture-generic.h"


 /**
@ -454,6 +455,19 @@ INLINE static void hor_transform_block_dual_avx2(__m256i (*row_diff)[8])
  hor_transform_row_dual_avx2((*row_diff) + 7);
 }

+
+/**
+* \brief  Calculate SATD between two 4x4 blocks inside bigger arrays.
+*/
+static unsigned kvz_satd_4x4_subblock_8bit_avx2(const kvz_pixel * buf1,
+                                                const int32_t     stride1,
+                                                const kvz_pixel * buf2,
+                                                const int32_t     stride2)
+{
+  // TODO: AVX2 implementation
+  return kvz_satd_4x4_subblock_generic(buf1, stride1, buf2, stride2);
+}
+
 static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
 {
  __m128i temp[8];
--- a/src/strategies/generic/picture-generic.c
+++ b/src/strategies/generic/picture-generic.c
@ -99,19 +99,13 @@ static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel *
  return sad;
 }

-
 /**
- * \brief  Calculate SATD between two 4x4 blocks inside bigger arrays.
+ * \brief  Transform differences between two 4x4 blocks.
 * From HM 13.0
 */
-static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
+static int32_t hadamard_4x4_generic(int32_t diff[4*4])
 {
-  int32_t k, satd = 0, diff[16], m[16], d[16];
-  for (k = 0; k < 16; ++k) {
-    diff[k] = piOrg[k] - piCur[k];
-  }
-
-  /*===== hadamard transform =====*/
+  int32_t m[4 * 4];
  m[0] = diff[0] + diff[12];
  m[1] = diff[1] + diff[13];
  m[2] = diff[2] + diff[14];
@ -129,6 +123,7 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
  m[14] = diff[2] - diff[14];
  m[15] = diff[3] - diff[15];

+  int32_t d[4 * 4];
  d[0] = m[0] + m[4];
  d[1] = m[1] + m[5];
  d[2] = m[2] + m[6];
@ -180,14 +175,45 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
  d[14] = m[14] + m[15];
  d[15] = m[15] - m[14];

-  for (k = 0; k<16; ++k) {
-    satd += abs(d[k]);
+  int32_t satd = 0;
+  for (int i = 0; i < 16; i++) {
+    satd += abs(d[i]);
  }
  satd = ((satd + 1) >> 1);

  return satd;
 }

+/**
+ * \brief  Calculate SATD between two 4x4 blocks.
+ */
+static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
+{
+  int32_t diff[4 * 4];
+  for (int i = 0; i < 4 * 4; i++) {
+    diff[i] = piOrg[i] - piCur[i];
+  }
+  return hadamard_4x4_generic(diff);
+}
+
+/**
+* \brief  Calculate SATD between two 4x4 blocks inside bigger arrays.
+*/
+unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
+                                       const int32_t     stride1,
+                                       const kvz_pixel * buf2,
+                                       const int32_t     stride2)
+{
+  int32_t diff[4 * 4];
+  for (int y = 0; y < 4; y++) {
+    for (int x = 0; x < 4; x++) {
+      diff[x + y * 4] = buf1[x + y * stride1] - buf2[x + y * stride2];
+    }
+  }
+  return hadamard_4x4_generic(diff);
+}
+
+
 /**
 * \brief  Calculate SATD between two 8x8 blocks inside bigger arrays.
 */
--- a/src/strategies/generic/picture-generic.h
+++ b/src/strategies/generic/picture-generic.h
@ -39,4 +39,9 @@ kvz_pixel kvz_fast_clip_16bit_to_pixel(int16_t value);
 // Assumes PIXEL_MAX to be 2^n-1
 kvz_pixel kvz_fast_clip_32bit_to_pixel(int32_t value);

+unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
+                                       const int32_t     stride1,
+                                       const kvz_pixel * buf2,
+                                       const int32_t     stride2);
+
 #endif //STRATEGIES_PICTURE_GENERIC_H_
--- a/src/strategies/strategies-picture.h
+++ b/src/strategies/strategies-picture.h
@ -66,11 +66,33 @@ static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
      const kvz_pixel *block2, int stride2) \
  { \
    unsigned sum = 0; \
+    if (width % 8 != 0) { \
+      /* Process the first column using 4x4 blocks. */ \
+      for (int y = 0; y < height; y += 4) { \
+        sum += kvz_satd_4x4_subblock_ ## suffix(&block1[y * stride1], stride1, \
+                                                &block2[y * stride2], stride2); \
+      } \
+      block1 += 4; \
+      block2 += 4; \
+      width -= 4; \
+    } \
+    if (height % 8 != 0) { \
+      /* Process the first row using 4x4 blocks. */ \
+      for (int x = 0; x < width; x += 4) { \
+        sum += kvz_satd_4x4_subblock_ ## suffix(&block1[x], stride1, \
+                                                &block2[x], stride2); \
+      } \
+      block1 += 4 * stride1; \
+      block2 += 4 * stride2; \
+      height -= 4; \
+    } \
+    /* The rest can now be processed with 8x8 blocks. */ \
    for (int y = 0; y < height; y += 8) { \
      const kvz_pixel *row1 = &block1[y * stride1]; \
      const kvz_pixel *row2 = &block2[y * stride2]; \
      for (int x = 0; x < width; x += 8) { \
-        sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, &row2[x], stride2); \
+        sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, \
+                                            &row2[x], stride2); \
      } \
    } \
    return sum >> (KVZ_BIT_DEPTH - 8); \