Extract macro SATD_NxN.

Combines definitions of macros SATD_NXN and SATD_NXN_AVX2 to macro SATD_NxN and moves it to strategies-picture.h.
2024-11-27 19:24:06 +00:00 · 2015-11-23 14:20:44 +02:00 · 2015-11-23 14:20:44 +02:00 · 728a6abecc
parent 1eebfde0c5
commit 728a6abecc
3 changed files with 41 additions and 73 deletions
--- a/src/strategies/avx2/picture-avx2.c
+++ b/src/strategies/avx2/picture-avx2.c
@ -137,7 +137,7 @@ static unsigned sad_8bit_64x64_avx2(const kvz_pixel * buf1, const kvz_pixel * bu
  return m256i_horizontal_sum(sum0);
 }

-static unsigned satd_8bit_4x4_avx2(const kvz_pixel *org, const kvz_pixel *cur)
+static unsigned satd_4x4_8bit_avx2(const kvz_pixel *org, const kvz_pixel *cur)
 {

  __m128i original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)org));
@ -411,7 +411,7 @@ INLINE static void diff_blocks_and_hor_transform_dual_avx2(__m256i (*row_diff)[8
  hor_add_sub_dual_avx2((*row_diff) + 6, (*row_diff) + 7);
 }

-static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
+static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
 {
  __m128i temp_hor[8];
  __m128i temp_ver[8];
@ -426,42 +426,12 @@ static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned
  return result;
 }

-// Function macro for defining hadamard calculating functions
-// for fixed size blocks. They calculate hadamard for integer
-// multiples of 8x8 with the 8x8 hadamard function.
-#define SATD_NXN_AVX2(n) \
-static unsigned satd_8bit_ ## n ## x ## n ## _avx2( \
-  const kvz_pixel * const block1, const kvz_pixel * const block2) \
-{ \
-  unsigned x, y; \
-  unsigned sum = 0; \
-  for (y = 0; y < (n); y += 8) { \
-  unsigned row = y * (n); \
-  for (x = 0; x < (n); x += 8) { \
-  sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (n), &block2[row + x], (n)); \
-    } \
-    } \
-  return sum>>(KVZ_BIT_DEPTH-8); \
-}

-static unsigned satd_8bit_8x8_avx2(
-  const kvz_pixel * const block1, const kvz_pixel * const block2) 
-{ 
-  unsigned x, y; 
-  unsigned sum = 0; 
-  for (y = 0; y < (8); y += 8) { 
-  unsigned row = y * (8); 
-  for (x = 0; x < (8); x += 8) { 
-  sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (8), &block2[row + x], (8)); 
-      } 
-      } 
-  return sum>>(KVZ_BIT_DEPTH-8); \
-}
+SATD_NxN(8bit_avx2,  8)
+SATD_NxN(8bit_avx2, 16)
+SATD_NxN(8bit_avx2, 32)
+SATD_NxN(8bit_avx2, 64)

-//SATD_NXN_AVX2(8) //Use the non-macro version
-SATD_NXN_AVX2(16)
-SATD_NXN_AVX2(32)
-SATD_NXN_AVX2(64)

 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 static void kvz_satd_8bit_8x8_general_dual_avx2(const kvz_pixel * buf1, unsigned stride1,
@ -629,11 +599,11 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
    success &= kvz_strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2);
    success &= kvz_strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2);

-    success &= kvz_strategyselector_register(opaque, "satd_4x4", "avx2", 40, &satd_8bit_4x4_avx2);
-    success &= kvz_strategyselector_register(opaque, "satd_8x8", "avx2", 40, &satd_8bit_8x8_avx2);
-    success &= kvz_strategyselector_register(opaque, "satd_16x16", "avx2", 40, &satd_8bit_16x16_avx2);
-    success &= kvz_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_8bit_32x32_avx2);
-    success &= kvz_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_8bit_64x64_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_4x4", "avx2", 40, &satd_4x4_8bit_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_8x8", "avx2", 40, &satd_8x8_8bit_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_16x16", "avx2", 40, &satd_16x16_8bit_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_32x32_8bit_avx2);
+    success &= kvz_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_64x64_8bit_avx2);

    success &= kvz_strategyselector_register(opaque, "satd_4x4_dual", "avx2", 40, &satd_8bit_4x4_dual_avx2);
    success &= kvz_strategyselector_register(opaque, "satd_8x8_dual", "avx2", 40, &satd_8bit_8x8_dual_avx2);
--- a/src/strategies/generic/picture-generic.c
+++ b/src/strategies/generic/picture-generic.c
@ -191,7 +191,7 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
 /**
 * \brief  Calculate SATD between two 8x8 blocks inside bigger arrays.
 */
-unsigned kvz_satd_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg,
+static unsigned satd_8x8_subblock_generic(const kvz_pixel * piOrg, const int32_t iStrideOrg,
  const kvz_pixel * piCur, const int32_t iStrideCur)
 {
  int32_t k, i, j, jj, sad = 0;
@ -281,36 +281,11 @@ unsigned kvz_satd_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg,
  return sad;
 }

-// Function macro for defining hadamard calculating functions
-// for fixed size blocks. They calculate hadamard for integer
-// multiples of 8x8 with the 8x8 hadamard function.
-#define SATD_NXN(n, pixel_type) \
-static unsigned satd_ ## n ## x ## n ## _generic( \
-  const pixel_type * const block1, const pixel_type * const block2) \
-{ \
-  unsigned x, y; \
-  unsigned sum = 0; \
-  for (y = 0; y < (n); y += 8) { \
-  unsigned row = y * (n); \
-  for (x = 0; x < (n); x += 8) { \
-  sum += kvz_satd_8x8_general(&block1[row + x], (n), &block2[row + x], (n)); \
-  } \
-  } \
-  return sum>>(KVZ_BIT_DEPTH-8); \
-}
-
-// Declare these functions to make sure the signature of the macro matches.
-static cost_pixel_nxn_func satd_4x4_generic;
-static cost_pixel_nxn_func satd_8x8_generic;
-static cost_pixel_nxn_func satd_16x16_generic;
-static cost_pixel_nxn_func satd_32x32_generic;
-static cost_pixel_nxn_func satd_64x64_generic;
-
 // These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
-SATD_NXN(8, kvz_pixel)
-SATD_NXN(16, kvz_pixel)
-SATD_NXN(32, kvz_pixel)
-SATD_NXN(64, kvz_pixel)
+SATD_NxN(generic,  8)
+SATD_NxN(generic, 16)
+SATD_NxN(generic, 32)
+SATD_NxN(generic, 64)

 // Declare these functions to make sure the signature of the macro matches.
 static cost_pixel_nxn_multi_func satd_4x4_dual_generic;
@ -328,7 +303,7 @@ static void satd_ ## n ## x ## n ## _dual_generic( \
  for (y = 0; y < (n); y += 8) { \
  unsigned row = y * (n); \
  for (x = 0; x < (n); x += 8) { \
-  sum += kvz_satd_8x8_general(&preds[0][row + x], (n), &orig[row + x], (n)); \
+  sum += satd_8x8_subblock_generic(&preds[0][row + x], (n), &orig[row + x], (n)); \
  } \
  } \
  costs_out[0] = sum>>(KVZ_BIT_DEPTH-8); \
@ -337,7 +312,7 @@ static void satd_ ## n ## x ## n ## _dual_generic( \
  for (y = 0; y < (n); y += 8) { \
  unsigned row = y * (n); \
  for (x = 0; x < (n); x += 8) { \
-  sum += kvz_satd_8x8_general(&preds[1][row + x], (n), &orig[row + x], (n)); \
+  sum += satd_8x8_subblock_generic(&preds[1][row + x], (n), &orig[row + x], (n)); \
  } \
  } \
  costs_out[1] = sum>>(KVZ_BIT_DEPTH-8); \
--- a/src/strategies/strategies-picture.h
+++ b/src/strategies/strategies-picture.h
@ -24,6 +24,29 @@

 typedef kvz_pixel (*pred_buffer)[32 * 32];

+
+// Function macro for defining hadamard calculating functions
+// for fixed size blocks. They calculate hadamard for integer
+// multiples of 8x8 with the 8x8 hadamard function.
+#define SATD_NxN(suffix, n) \
+/* Declare the function in advance, hopefully reducing the probability that the
+ * macro expands to something unexpected and silently breaks things. */ \
+static cost_pixel_nxn_func satd_ ## n ## x ## n ## _ ## suffix;\
+static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
+    const kvz_pixel * const block1, \
+    const kvz_pixel * const block2) \
+{ \
+  unsigned sum = 0; \
+  for (unsigned y = 0; y < (n); y += 8) { \
+    unsigned row = y * (n); \
+    for (unsigned x = 0; x < (n); x += 8) { \
+      sum += satd_8x8_subblock_ ## suffix(&block1[row + x], (n), &block2[row + x], (n)); \
+    } \
+  } \
+  return sum >> (KVZ_BIT_DEPTH - 8); \
+}
+
+
 typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2,
  const int width, const int height,
  const unsigned stride1, const unsigned stride2);