Merge branch 'intra-sad-strategies'

2024-11-24 02:24:07 +00:00 · 2014-06-16 12:34:37 +03:00 · 2014-06-16 12:34:37 +03:00 · 153b1ee41f
parent fcce6ae823 bb31408d8b
commit 153b1ee41f
11 changed files with 387 additions and 313 deletions
--- a/build/C_Properties.props
+++ b/build/C_Properties.props
@ -14,9 +14,9 @@
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
      <PreprocessorDefinitions>WIN32_LEAN_AND_MEAN;WIN32;WIN64;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(SolutionDir)..\..\pthreads.2\include;$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4244;4204;4206;4028</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4244;4204;4206;4028;4152</DisableSpecificWarnings>
      <OpenMPSupport>false</OpenMPSupport>
-      <TreatSpecificWarningsAsErrors>4013;4029</TreatSpecificWarningsAsErrors>
+      <TreatSpecificWarningsAsErrors>4013;4029;4047</TreatSpecificWarningsAsErrors>
    </ClCompile>
    <Link>
      <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
--- a/build/kvazaar_lib/kvazaar_lib.vcxproj
+++ b/build/kvazaar_lib/kvazaar_lib.vcxproj
@ -83,10 +83,6 @@
      <AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x64</AdditionalLibraryDirectories>
      <AdditionalDependencies>pthreadVC2.lib</AdditionalDependencies>
    </Lib>
-    <ClCompile>
-      <DisableSpecificWarnings>
-      </DisableSpecificWarnings>
-    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <YASM>
--- a/src/image.c
+++ b/src/image.c
@ -388,284 +388,6 @@ unsigned image_calc_sad(const image *pic, const image *ref, int pic_x, int pic_y
 }


-/**
- * \brief  Calculate SATD between two 4x4 blocks inside bigger arrays.
- * From HM 13.0
- */
-static unsigned satd_16bit_4x4(const pixel *piOrg, const pixel *piCur)
-{
-  int32_t k, satd = 0, diff[16], m[16], d[16];
-  for( k = 0; k < 16; ++k ) {
-    diff[k] = piOrg[k] - piCur[k];
-  }
-
-  /*===== hadamard transform =====*/
-  m[ 0] = diff[ 0] + diff[12];
-  m[ 1] = diff[ 1] + diff[13];
-  m[ 2] = diff[ 2] + diff[14];
-  m[ 3] = diff[ 3] + diff[15];
-  m[ 4] = diff[ 4] + diff[ 8];
-  m[ 5] = diff[ 5] + diff[ 9];
-  m[ 6] = diff[ 6] + diff[10];
-  m[ 7] = diff[ 7] + diff[11];
-  m[ 8] = diff[ 4] - diff[ 8];
-  m[ 9] = diff[ 5] - diff[ 9];
-  m[10] = diff[ 6] - diff[10];
-  m[11] = diff[ 7] - diff[11];
-  m[12] = diff[ 0] - diff[12];
-  m[13] = diff[ 1] - diff[13];
-  m[14] = diff[ 2] - diff[14];
-  m[15] = diff[ 3] - diff[15];
-
-  d[ 0] = m[ 0] + m[ 4];
-  d[ 1] = m[ 1] + m[ 5];
-  d[ 2] = m[ 2] + m[ 6];
-  d[ 3] = m[ 3] + m[ 7];
-  d[ 4] = m[ 8] + m[12];
-  d[ 5] = m[ 9] + m[13];
-  d[ 6] = m[10] + m[14];
-  d[ 7] = m[11] + m[15];
-  d[ 8] = m[ 0] - m[ 4];
-  d[ 9] = m[ 1] - m[ 5];
-  d[10] = m[ 2] - m[ 6];
-  d[11] = m[ 3] - m[ 7];
-  d[12] = m[12] - m[ 8];
-  d[13] = m[13] - m[ 9];
-  d[14] = m[14] - m[10];
-  d[15] = m[15] - m[11];
-
-  m[ 0] = d[ 0] + d[ 3];
-  m[ 1] = d[ 1] + d[ 2];
-  m[ 2] = d[ 1] - d[ 2];
-  m[ 3] = d[ 0] - d[ 3];
-  m[ 4] = d[ 4] + d[ 7];
-  m[ 5] = d[ 5] + d[ 6];
-  m[ 6] = d[ 5] - d[ 6];
-  m[ 7] = d[ 4] - d[ 7];
-  m[ 8] = d[ 8] + d[11];
-  m[ 9] = d[ 9] + d[10];
-  m[10] = d[ 9] - d[10];
-  m[11] = d[ 8] - d[11];
-  m[12] = d[12] + d[15];
-  m[13] = d[13] + d[14];
-  m[14] = d[13] - d[14];
-  m[15] = d[12] - d[15];
-
-  d[ 0] = m[ 0] + m[ 1];
-  d[ 1] = m[ 0] - m[ 1];
-  d[ 2] = m[ 2] + m[ 3];
-  d[ 3] = m[ 3] - m[ 2];
-  d[ 4] = m[ 4] + m[ 5];
-  d[ 5] = m[ 4] - m[ 5];
-  d[ 6] = m[ 6] + m[ 7];
-  d[ 7] = m[ 7] - m[ 6];
-  d[ 8] = m[ 8] + m[ 9];
-  d[ 9] = m[ 8] - m[ 9];
-  d[10] = m[10] + m[11];
-  d[11] = m[11] - m[10];
-  d[12] = m[12] + m[13];
-  d[13] = m[12] - m[13];
-  d[14] = m[14] + m[15];
-  d[15] = m[15] - m[14];
-
-  for (k=0; k<16; ++k) {
-    satd += abs(d[k]);
-  }
-  satd = ((satd+1)>>1);
-
-  return satd;
-}
-
-/**
- * \brief  Calculate SATD between two 8x8 blocks inside bigger arrays.
- */
-unsigned satd_16bit_8x8_general(const pixel * piOrg, const int32_t iStrideOrg,
-                                       const pixel * piCur, const int32_t iStrideCur)
-{
-  int32_t k, i, j, jj, sad=0;
-  int32_t diff[64], m1[8][8], m2[8][8], m3[8][8];
-
-  for (k = 0; k < 64; k += 8) {
-    diff[k+0] = piOrg[0] - piCur[0];
-    diff[k+1] = piOrg[1] - piCur[1];
-    diff[k+2] = piOrg[2] - piCur[2];
-    diff[k+3] = piOrg[3] - piCur[3];
-    diff[k+4] = piOrg[4] - piCur[4];
-    diff[k+5] = piOrg[5] - piCur[5];
-    diff[k+6] = piOrg[6] - piCur[6];
-    diff[k+7] = piOrg[7] - piCur[7];
-
-    piCur += iStrideCur;
-    piOrg += iStrideOrg;
-  }
-
-  // horizontal
-  for (j = 0; j < 8; ++j) {
-    jj = j << 3;
-    m2[j][0] = diff[jj  ] + diff[jj+4];
-    m2[j][1] = diff[jj+1] + diff[jj+5];
-    m2[j][2] = diff[jj+2] + diff[jj+6];
-    m2[j][3] = diff[jj+3] + diff[jj+7];
-    m2[j][4] = diff[jj  ] - diff[jj+4];
-    m2[j][5] = diff[jj+1] - diff[jj+5];
-    m2[j][6] = diff[jj+2] - diff[jj+6];
-    m2[j][7] = diff[jj+3] - diff[jj+7];
-
-    m1[j][0] = m2[j][0] + m2[j][2];
-    m1[j][1] = m2[j][1] + m2[j][3];
-    m1[j][2] = m2[j][0] - m2[j][2];
-    m1[j][3] = m2[j][1] - m2[j][3];
-    m1[j][4] = m2[j][4] + m2[j][6];
-    m1[j][5] = m2[j][5] + m2[j][7];
-    m1[j][6] = m2[j][4] - m2[j][6];
-    m1[j][7] = m2[j][5] - m2[j][7];
-
-    m2[j][0] = m1[j][0] + m1[j][1];
-    m2[j][1] = m1[j][0] - m1[j][1];
-    m2[j][2] = m1[j][2] + m1[j][3];
-    m2[j][3] = m1[j][2] - m1[j][3];
-    m2[j][4] = m1[j][4] + m1[j][5];
-    m2[j][5] = m1[j][4] - m1[j][5];
-    m2[j][6] = m1[j][6] + m1[j][7];
-    m2[j][7] = m1[j][6] - m1[j][7];
-  }
-
-  // vertical
-  for (i = 0; i < 8; ++i) {
-    m3[0][i] = m2[0][i] + m2[4][i];
-    m3[1][i] = m2[1][i] + m2[5][i];
-    m3[2][i] = m2[2][i] + m2[6][i];
-    m3[3][i] = m2[3][i] + m2[7][i];
-    m3[4][i] = m2[0][i] - m2[4][i];
-    m3[5][i] = m2[1][i] - m2[5][i];
-    m3[6][i] = m2[2][i] - m2[6][i];
-    m3[7][i] = m2[3][i] - m2[7][i];
-
-    m1[0][i] = m3[0][i] + m3[2][i];
-    m1[1][i] = m3[1][i] + m3[3][i];
-    m1[2][i] = m3[0][i] - m3[2][i];
-    m1[3][i] = m3[1][i] - m3[3][i];
-    m1[4][i] = m3[4][i] + m3[6][i];
-    m1[5][i] = m3[5][i] + m3[7][i];
-    m1[6][i] = m3[4][i] - m3[6][i];
-    m1[7][i] = m3[5][i] - m3[7][i];
-
-    m2[0][i] = m1[0][i] + m1[1][i];
-    m2[1][i] = m1[0][i] - m1[1][i];
-    m2[2][i] = m1[2][i] + m1[3][i];
-    m2[3][i] = m1[2][i] - m1[3][i];
-    m2[4][i] = m1[4][i] + m1[5][i];
-    m2[5][i] = m1[4][i] - m1[5][i];
-    m2[6][i] = m1[6][i] + m1[7][i];
-    m2[7][i] = m1[6][i] - m1[7][i];
-  }
-
-  for (i = 0; i < 64; ++i) {
-    sad += abs(((int*)m2)[i]);
-  }
-
-  sad = (sad + 2) >> 2;
-
-  return sad;
-}
-
-// Function macro for defining hadamard calculating functions
-// for fixed size blocks. They calculate hadamard for integer
-// multiples of 8x8 with the 8x8 hadamard function.
-#define SATD_NXN(n, pixel_type, suffix) \
-  static unsigned satd_ ## suffix ## _ ## n ## x ## n( \
-                  const pixel_type * const block1, const pixel_type * const block2) \
-  { \
-    unsigned x, y; \
-    unsigned sum = 0; \
-    for (y = 0; y < (n); y += 8) { \
-      unsigned row = y * (n); \
-      for (x = 0; x < (n); x += 8) { \
-        sum += satd_16bit_8x8_general(&block1[row + x], (n), &block2[row + x], (n)); \
-      } \
-    } \
-    return sum; \
-    }
-
-// These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
-SATD_NXN(8, pixel, 16bit)
-SATD_NXN(16, pixel, 16bit)
-SATD_NXN(32, pixel, 16bit)
-SATD_NXN(64, pixel, 16bit)
-
-// Function macro for defining SAD calculating functions
-// for fixed size blocks.
-#define SAD_NXN(n, pixel_type, suffix) \
-  static unsigned sad_ ## suffix ## _ ##  n ## x ## n( \
-                  const pixel_type * const block1, const pixel_type * const block2) \
-  { \
-    unsigned i; \
-    unsigned sum = 0; \
-    for (i = 0; i < (n)*(n); ++i) { \
-      sum += abs(block1[i] - block2[i]); \
-    } \
-    return sum; \
-  }
-
-// These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64
-// with function signatures of cost_16bit_nxn_func.
-// They are used through get_sad_16bit_nxn_func.
-SAD_NXN(4, pixel, 16bit)
-SAD_NXN(8, pixel, 16bit)
-SAD_NXN(16, pixel, 16bit)
-SAD_NXN(32, pixel, 16bit)
-SAD_NXN(64, pixel, 16bit)
-
-/**
- * \brief  Get a function that calculates SATD for NxN block.
- *
- * \param n  Width of the region for which SATD is calculated.
- *
- * \returns  Pointer to cost_16bit_nxn_func.
- */
-cost_16bit_nxn_func get_satd_16bit_nxn_func(unsigned n)
-{
-  switch (n) {
-  case 4:
-    return &satd_16bit_4x4;
-  case 8:
-    return &satd_16bit_8x8;
-  case 16:
-    return &satd_16bit_16x16;
-  case 32:
-    return &satd_16bit_32x32;
-  case 64:
-    return &satd_16bit_64x64;
-  default:
-    return NULL;
-    }
-  }
-
-/**
- * \brief  Get a function that calculates SAD for NxN block.
- *
- * \param n  Width of the region for which SAD is calculated.
- *
- * \returns  Pointer to cost_16bit_nxn_func.
- */
-cost_16bit_nxn_func get_sad_16bit_nxn_func(unsigned n)
-  {
-  switch (n) {
-  case 4:
-    return &sad_16bit_4x4;
-  case 8:
-    return &sad_16bit_8x8;
-  case 16:
-    return &sad_16bit_16x16;
-  case 32:
-    return &sad_16bit_32x32;
-  case 64:
-    return &sad_16bit_64x64;
-  default:
-    return NULL;
-  }
-}
-
 unsigned pixels_calc_ssd(const pixel *const ref, const pixel *const rec,
                 const int ref_stride, const int rec_stride,
                 const int width)
--- a/src/image.h
+++ b/src/image.h
@ -75,15 +75,6 @@ unsigned image_calc_sad(const image *pic, const image *ref, int pic_x, int pic_y
                        int block_width, int block_height);


-typedef unsigned (*cost_16bit_nxn_func)(const pixel *block1, const pixel *block2);
-
-
-cost_16bit_nxn_func get_satd_16bit_nxn_func(unsigned n);
-cost_16bit_nxn_func get_sad_16bit_nxn_func(unsigned n);
-
-unsigned pixels_satd_16bit_nxn(pixel *block1, pixel *block2, unsigned n);
-unsigned pixels_sad_16bit_nxn(pixel *block1, pixel *block2, unsigned n);
-
 unsigned pixels_calc_ssd(const pixel *const ref, const pixel *const rec,
                  const int ref_stride, const int rec_stride,
                  const int width);
--- a/src/search.c
+++ b/src/search.c
@ -31,6 +31,7 @@
 #include "config.h"
 #include "bitstream.h"
 #include "image.h"
+#include "strategies/strategies-picture.h"
 #include "intra.h"
 #include "inter.h"
 #include "filter.h"
@ -825,7 +826,7 @@ static void search_intra_rough(encoder_state * const encoder_state,
                               int8_t modes[35], uint32_t costs[35])
 {
  int16_t mode;
-  cost_16bit_nxn_func cost_func = get_sad_16bit_nxn_func(width);
+  cost_pixel_nxn_func * cost_func = pixels_get_sad_func(width);

  // Temporary block arrays
  pixel pred[LCU_WIDTH * LCU_WIDTH + 1];
--- a/src/strategies/picture/picture-generic.c
+++ b/src/strategies/picture/picture-generic.c
@ -26,6 +26,7 @@
 #include "strategyselector.h"


+
 /**
 * \brief Calculate Sum of Absolute Differences (SAD)
 *
@ -41,7 +42,7 @@
 * \returns Sum of Absolute Differences
 */
 static unsigned reg_sad_generic(const pixel * const data1, const pixel * const data2,
-                        const int width, const int height, const unsigned stride1, const unsigned stride2)
+                         const int width, const int height, const unsigned stride1, const unsigned stride2)
 {
  int y, x;
  unsigned sad = 0;
@ -55,6 +56,267 @@ static unsigned reg_sad_generic(const pixel * const data1, const pixel * const d
  return sad;
 }

-static int strategy_register_picture_generic(void* opaque) {
-  return strategyselector_register(opaque, "reg_sad", "generic", 0, &reg_sad_generic);
+
+/**
+ * \brief  Calculate SATD between two 4x4 blocks inside bigger arrays.
+ * From HM 13.0
+ */
+static unsigned satd_8bit_4x4_generic(const pixel *piOrg, const pixel *piCur)
+{
+  int32_t k, satd = 0, diff[16], m[16], d[16];
+  for (k = 0; k < 16; ++k) {
+    diff[k] = piOrg[k] - piCur[k];
+  }
+
+  /*===== hadamard transform =====*/
+  m[0] = diff[0] + diff[12];
+  m[1] = diff[1] + diff[13];
+  m[2] = diff[2] + diff[14];
+  m[3] = diff[3] + diff[15];
+  m[4] = diff[4] + diff[8];
+  m[5] = diff[5] + diff[9];
+  m[6] = diff[6] + diff[10];
+  m[7] = diff[7] + diff[11];
+  m[8] = diff[4] - diff[8];
+  m[9] = diff[5] - diff[9];
+  m[10] = diff[6] - diff[10];
+  m[11] = diff[7] - diff[11];
+  m[12] = diff[0] - diff[12];
+  m[13] = diff[1] - diff[13];
+  m[14] = diff[2] - diff[14];
+  m[15] = diff[3] - diff[15];
+
+  d[0] = m[0] + m[4];
+  d[1] = m[1] + m[5];
+  d[2] = m[2] + m[6];
+  d[3] = m[3] + m[7];
+  d[4] = m[8] + m[12];
+  d[5] = m[9] + m[13];
+  d[6] = m[10] + m[14];
+  d[7] = m[11] + m[15];
+  d[8] = m[0] - m[4];
+  d[9] = m[1] - m[5];
+  d[10] = m[2] - m[6];
+  d[11] = m[3] - m[7];
+  d[12] = m[12] - m[8];
+  d[13] = m[13] - m[9];
+  d[14] = m[14] - m[10];
+  d[15] = m[15] - m[11];
+
+  m[0] = d[0] + d[3];
+  m[1] = d[1] + d[2];
+  m[2] = d[1] - d[2];
+  m[3] = d[0] - d[3];
+  m[4] = d[4] + d[7];
+  m[5] = d[5] + d[6];
+  m[6] = d[5] - d[6];
+  m[7] = d[4] - d[7];
+  m[8] = d[8] + d[11];
+  m[9] = d[9] + d[10];
+  m[10] = d[9] - d[10];
+  m[11] = d[8] - d[11];
+  m[12] = d[12] + d[15];
+  m[13] = d[13] + d[14];
+  m[14] = d[13] - d[14];
+  m[15] = d[12] - d[15];
+
+  d[0] = m[0] + m[1];
+  d[1] = m[0] - m[1];
+  d[2] = m[2] + m[3];
+  d[3] = m[3] - m[2];
+  d[4] = m[4] + m[5];
+  d[5] = m[4] - m[5];
+  d[6] = m[6] + m[7];
+  d[7] = m[7] - m[6];
+  d[8] = m[8] + m[9];
+  d[9] = m[8] - m[9];
+  d[10] = m[10] + m[11];
+  d[11] = m[11] - m[10];
+  d[12] = m[12] + m[13];
+  d[13] = m[12] - m[13];
+  d[14] = m[14] + m[15];
+  d[15] = m[15] - m[14];
+
+  for (k = 0; k<16; ++k) {
+    satd += abs(d[k]);
+  }
+  satd = ((satd + 1) >> 1);
+
+  return satd;
+}
+
+/**
+* \brief  Calculate SATD between two 8x8 blocks inside bigger arrays.
+*/
+static unsigned satd_16bit_8x8_general(const pixel * piOrg, const int32_t iStrideOrg,
+  const pixel * piCur, const int32_t iStrideCur)
+{
+  int32_t k, i, j, jj, sad = 0;
+  int32_t diff[64], m1[8][8], m2[8][8], m3[8][8];
+
+  for (k = 0; k < 64; k += 8) {
+    diff[k + 0] = piOrg[0] - piCur[0];
+    diff[k + 1] = piOrg[1] - piCur[1];
+    diff[k + 2] = piOrg[2] - piCur[2];
+    diff[k + 3] = piOrg[3] - piCur[3];
+    diff[k + 4] = piOrg[4] - piCur[4];
+    diff[k + 5] = piOrg[5] - piCur[5];
+    diff[k + 6] = piOrg[6] - piCur[6];
+    diff[k + 7] = piOrg[7] - piCur[7];
+
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // horizontal
+  for (j = 0; j < 8; ++j) {
+    jj = j << 3;
+    m2[j][0] = diff[jj] + diff[jj + 4];
+    m2[j][1] = diff[jj + 1] + diff[jj + 5];
+    m2[j][2] = diff[jj + 2] + diff[jj + 6];
+    m2[j][3] = diff[jj + 3] + diff[jj + 7];
+    m2[j][4] = diff[jj] - diff[jj + 4];
+    m2[j][5] = diff[jj + 1] - diff[jj + 5];
+    m2[j][6] = diff[jj + 2] - diff[jj + 6];
+    m2[j][7] = diff[jj + 3] - diff[jj + 7];
+
+    m1[j][0] = m2[j][0] + m2[j][2];
+    m1[j][1] = m2[j][1] + m2[j][3];
+    m1[j][2] = m2[j][0] - m2[j][2];
+    m1[j][3] = m2[j][1] - m2[j][3];
+    m1[j][4] = m2[j][4] + m2[j][6];
+    m1[j][5] = m2[j][5] + m2[j][7];
+    m1[j][6] = m2[j][4] - m2[j][6];
+    m1[j][7] = m2[j][5] - m2[j][7];
+
+    m2[j][0] = m1[j][0] + m1[j][1];
+    m2[j][1] = m1[j][0] - m1[j][1];
+    m2[j][2] = m1[j][2] + m1[j][3];
+    m2[j][3] = m1[j][2] - m1[j][3];
+    m2[j][4] = m1[j][4] + m1[j][5];
+    m2[j][5] = m1[j][4] - m1[j][5];
+    m2[j][6] = m1[j][6] + m1[j][7];
+    m2[j][7] = m1[j][6] - m1[j][7];
+  }
+
+  // vertical
+  for (i = 0; i < 8; ++i) {
+    m3[0][i] = m2[0][i] + m2[4][i];
+    m3[1][i] = m2[1][i] + m2[5][i];
+    m3[2][i] = m2[2][i] + m2[6][i];
+    m3[3][i] = m2[3][i] + m2[7][i];
+    m3[4][i] = m2[0][i] - m2[4][i];
+    m3[5][i] = m2[1][i] - m2[5][i];
+    m3[6][i] = m2[2][i] - m2[6][i];
+    m3[7][i] = m2[3][i] - m2[7][i];
+
+    m1[0][i] = m3[0][i] + m3[2][i];
+    m1[1][i] = m3[1][i] + m3[3][i];
+    m1[2][i] = m3[0][i] - m3[2][i];
+    m1[3][i] = m3[1][i] - m3[3][i];
+    m1[4][i] = m3[4][i] + m3[6][i];
+    m1[5][i] = m3[5][i] + m3[7][i];
+    m1[6][i] = m3[4][i] - m3[6][i];
+    m1[7][i] = m3[5][i] - m3[7][i];
+
+    m2[0][i] = m1[0][i] + m1[1][i];
+    m2[1][i] = m1[0][i] - m1[1][i];
+    m2[2][i] = m1[2][i] + m1[3][i];
+    m2[3][i] = m1[2][i] - m1[3][i];
+    m2[4][i] = m1[4][i] + m1[5][i];
+    m2[5][i] = m1[4][i] - m1[5][i];
+    m2[6][i] = m1[6][i] + m1[7][i];
+    m2[7][i] = m1[6][i] - m1[7][i];
+  }
+
+  for (i = 0; i < 64; ++i) {
+    sad += abs(((int*)m2)[i]);
+  }
+
+  sad = (sad + 2) >> 2;
+
+  return sad;
+}
+
+// Function macro for defining hadamard calculating functions
+// for fixed size blocks. They calculate hadamard for integer
+// multiples of 8x8 with the 8x8 hadamard function.
+#define SATD_NXN(n, pixel_type, suffix) \
+static unsigned satd_ ## suffix ## _ ## n ## x ## n ## _generic( \
+  const pixel_type * const block1, const pixel_type * const block2) \
+{ \
+  unsigned x, y; \
+  unsigned sum = 0; \
+  for (y = 0; y < (n); y += 8) { \
+  unsigned row = y * (n); \
+  for (x = 0; x < (n); x += 8) { \
+  sum += satd_16bit_8x8_general(&block1[row + x], (n), &block2[row + x], (n)); \
+  } \
+  } \
+  return sum; \
+}
+
+// Declare these functions to make sure the signature of the macro matches.
+static cost_pixel_nxn_func satd_8bit_4x4_generic;
+static cost_pixel_nxn_func satd_8bit_8x8_generic;
+static cost_pixel_nxn_func satd_8bit_16x16_generic;
+static cost_pixel_nxn_func satd_8bit_32x32_generic;
+static cost_pixel_nxn_func satd_8bit_64x64_generic;
+
+// These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
+SATD_NXN(8, pixel, 8bit)
+SATD_NXN(16, pixel, 8bit)
+SATD_NXN(32, pixel, 8bit)
+SATD_NXN(64, pixel, 8bit)
+
+// Function macro for defining SAD calculating functions
+// for fixed size blocks.
+#define SAD_NXN(n, pixel_type, suffix) \
+static unsigned sad_ ## suffix ## _ ##  n ## x ## n ## _generic( \
+  const pixel_type * const block1, const pixel_type * const block2) \
+{ \
+  unsigned i; \
+  unsigned sum = 0; \
+  for (i = 0; i < (n)*(n); ++i) { \
+  sum += abs(block1[i] - block2[i]); \
+  } \
+  return sum; \
+}
+
+// Declare these functions to make sure the signature of the macro matches.
+static cost_pixel_nxn_func sad_8bit_4x4_generic;
+static cost_pixel_nxn_func sad_8bit_8x8_generic;
+static cost_pixel_nxn_func sad_8bit_16x16_generic;
+static cost_pixel_nxn_func sad_8bit_32x32_generic;
+static cost_pixel_nxn_func sad_8bit_64x64_generic;
+
+// These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64
+// with function signatures of cost_16bit_nxn_func.
+// They are used through get_pixel_sad_func.
+SAD_NXN(4, pixel, 8bit)
+SAD_NXN(8, pixel, 8bit)
+SAD_NXN(16, pixel, 8bit)
+SAD_NXN(32, pixel, 8bit)
+SAD_NXN(64, pixel, 8bit)
+
+
+static int strategy_register_picture_generic(void* opaque)
+{
+  bool success = true;
+
+  success &= strategyselector_register(opaque, "reg_sad", "generic", 0, &reg_sad_generic);
+
+  success &= strategyselector_register(opaque, "sad_8bit_4x4", "generic", 0, &sad_8bit_4x4_generic);
+  success &= strategyselector_register(opaque, "sad_8bit_8x8", "generic", 0, &sad_8bit_8x8_generic);
+  success &= strategyselector_register(opaque, "sad_8bit_16x16", "generic", 0, &sad_8bit_16x16_generic);
+  success &= strategyselector_register(opaque, "sad_8bit_32x32", "generic", 0, &sad_8bit_32x32_generic);
+  success &= strategyselector_register(opaque, "sad_8bit_64x64", "generic", 0, &sad_8bit_64x64_generic);
+
+  success &= strategyselector_register(opaque, "satd_8bit_4x4", "generic", 0, &satd_8bit_4x4_generic);
+  success &= strategyselector_register(opaque, "satd_8bit_8x8", "generic", 0, &satd_8bit_8x8_generic);
+  success &= strategyselector_register(opaque, "satd_8bit_16x16", "generic", 0, &satd_8bit_16x16_generic);
+  success &= strategyselector_register(opaque, "satd_8bit_32x32", "generic", 0, &satd_8bit_32x32_generic);
+  success &= strategyselector_register(opaque, "satd_8bit_64x64", "generic", 0, &satd_8bit_64x64_generic);
+
+  return success;
 }
--- a/src/strategies/strategies-nal.h
+++ b/src/strategies/strategies-nal.h
@ -39,6 +39,7 @@ extern array_checksum_func array_checksum;
 int strategy_register_nal(void* opaque);


-#define STRATEGIES_NAL_EXPORTS {"array_checksum", (void**) &array_checksum}
+#define STRATEGIES_NAL_EXPORTS \
+  {"array_checksum", (void**) &array_checksum},

 #endif //STRATEGIES_NAL_H_
--- a/src/strategies/strategies-picture.c
+++ b/src/strategies/strategies-picture.c
@ -1,5 +1,22 @@
 #include "strategies-picture.h"

+// Define function pointers.
+reg_sad_func * reg_sad = 0;
+
+cost_pixel_nxn_func * sad_8bit_4x4 = 0;
+cost_pixel_nxn_func * sad_8bit_8x8 = 0;
+cost_pixel_nxn_func * sad_8bit_16x16 = 0;
+cost_pixel_nxn_func * sad_8bit_32x32 = 0;
+cost_pixel_nxn_func * sad_8bit_64x64 = 0;
+
+cost_pixel_nxn_func * satd_8bit_4x4 = 0;
+cost_pixel_nxn_func * satd_8bit_8x8 = 0;
+cost_pixel_nxn_func * satd_8bit_16x16 = 0;
+cost_pixel_nxn_func * satd_8bit_32x32 = 0;
+cost_pixel_nxn_func * satd_8bit_64x64 = 0;
+
+
+// Include inline functions.
 #include "picture/picture-generic.c"
 #if COMPILE_INTEL_SSE2
 #include "picture/picture-sse2.c"
@ -12,9 +29,6 @@
 #endif


-reg_sad_func reg_sad;
-
-
 int strategy_register_picture(void* opaque) {
  if (!strategy_register_picture_generic(opaque)) return 0;
  
@ -40,3 +54,55 @@ int strategy_register_picture(void* opaque) {
 #endif //COMPILE_POWERPC
  return 1;
 }
+
+
+/**
+* \brief  Get a function that calculates SATD for NxN block.
+*
+* \param n  Width of the region for which SATD is calculated.
+*
+* \returns  Pointer to cost_16bit_nxn_func.
+*/
+cost_pixel_nxn_func * pixels_get_satd_func(unsigned n)
+{
+  switch (n) {
+  case 4:
+    return satd_8bit_4x4;
+  case 8:
+    return satd_8bit_8x8;
+  case 16:
+    return satd_8bit_16x16;
+  case 32:
+    return satd_8bit_32x32;
+  case 64:
+    return satd_8bit_64x64;
+  default:
+    return NULL;
+  }
+}
+
+
+/**
+* \brief  Get a function that calculates SAD for NxN block.
+*
+* \param n  Width of the region for which SAD is calculated.
+*
+* \returns  Pointer to cost_16bit_nxn_func.
+*/
+cost_pixel_nxn_func * pixels_get_sad_func(unsigned n)
+{
+  switch (n) {
+  case 4:
+    return sad_8bit_4x4;
+  case 8:
+    return sad_8bit_8x8;
+  case 16:
+    return sad_8bit_16x16;
+  case 32:
+    return sad_8bit_32x32;
+  case 64:
+    return sad_8bit_64x64;
+  default:
+    return NULL;
+  }
+}
--- a/src/strategies/strategies-picture.h
+++ b/src/strategies/strategies-picture.h
@ -21,16 +21,47 @@

 #include "../image.h"

-//Function pointer to reg_sad
-typedef unsigned(*reg_sad_func)(const pixel *const data1, const pixel *const data2,
-                                const int width, const int height,
-                                const unsigned stride1, const unsigned stride2);
-extern reg_sad_func reg_sad;
+
+typedef unsigned(reg_sad_func)(const pixel *const data1, const pixel *const data2,
+  const int width, const int height,
+  const unsigned stride1, const unsigned stride2);
+typedef unsigned (cost_pixel_nxn_func)(const pixel *block1, const pixel *block2);
+
+
+// Declare function pointers.
+extern reg_sad_func * reg_sad;
+
+extern cost_pixel_nxn_func * sad_8bit_4x4;
+extern cost_pixel_nxn_func * sad_8bit_8x8;
+extern cost_pixel_nxn_func * sad_8bit_16x16;
+extern cost_pixel_nxn_func * sad_8bit_32x32;
+extern cost_pixel_nxn_func * sad_8bit_64x64;
+
+extern cost_pixel_nxn_func * satd_8bit_4x4;
+extern cost_pixel_nxn_func * satd_8bit_8x8;
+extern cost_pixel_nxn_func * satd_8bit_16x16;
+extern cost_pixel_nxn_func * satd_8bit_32x32;
+extern cost_pixel_nxn_func * satd_8bit_64x64;


 int strategy_register_picture(void* opaque);
+cost_pixel_nxn_func * pixels_get_satd_func(unsigned n);
+cost_pixel_nxn_func * pixels_get_sad_func(unsigned n);
+
+
+#define STRATEGIES_PICTURE_EXPORTS \
+  {"reg_sad", (void**) &reg_sad}, \
+  {"sad_8bit_4x4", (void**) &sad_8bit_4x4}, \
+  {"sad_8bit_8x8", (void**) &sad_8bit_8x8}, \
+  {"sad_8bit_16x16", (void**) &sad_8bit_16x16}, \
+  {"sad_8bit_32x32", (void**) &sad_8bit_32x32}, \
+  {"sad_8bit_64x64", (void**) &sad_8bit_64x64}, \
+  {"satd_8bit_4x4", (void**) &satd_8bit_4x4}, \
+  {"satd_8bit_8x8", (void**) &satd_8bit_8x8}, \
+  {"satd_8bit_16x16", (void**) &satd_8bit_16x16}, \
+  {"satd_8bit_32x32", (void**) &satd_8bit_32x32}, \
+  {"satd_8bit_64x64", (void**) &satd_8bit_64x64}, \


-#define STRATEGIES_PICTURE_EXPORTS {"reg_sad", (void**) &reg_sad}

 #endif //STRATEGIES_PICTURE_H_
--- a/src/strategyselector.c
+++ b/src/strategyselector.c
@ -100,9 +100,9 @@ int strategyselector_register(void * const opaque, const char * const type, cons
    new_strategy->priority = priority;
    new_strategy->fptr = fptr;
  }
-#ifdef _DEBUG
+#ifdef DEBUG_STRATEGYSELECTOR
  fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr);
-#endif //_DEBUG
+#endif //DEBUG_STRATEGYSELECTOR
  
  return 1;
 }
@ -139,7 +139,7 @@ static void* strategyselector_choose_for(const strategy_list * const strategies,
    return NULL;
  }

-#ifdef _DEBUG
+#ifdef DEBUG_STRATEGYSELECTOR
  fprintf(stderr, "Choosing strategy for %s:\n", strategy_type);
  for (i=0; i < strategies->count; ++i) {
    if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
@ -150,7 +150,7 @@ static void* strategyselector_choose_for(const strategy_list * const strategies,
      }
    }
  }
-#endif //_DEBUG
+#endif //DEBUG_STRATEGYSELECTOR
  
  
  if (max_priority_i == -1) {
--- a/src/strategyselector.h
+++ b/src/strategyselector.h
@ -19,6 +19,10 @@
 * along with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
 ****************************************************************************/

+#if defined(_DEBUG) && !defined(DEBUG_STRATEGYSELECTOR)
+# define DEBUG_STRATEGYSELECTOR
+#endif
+
 //Hardware data (abstraction of defines). Extend for other compilers

 #if defined(_M_IX86) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__)
@ -137,8 +141,8 @@ int strategyselector_register(void *opaque, const char *type, const char *strate
 #include "strategies/strategies-picture.h"

 static const strategy_to_select strategies_to_select[] = {
-  STRATEGIES_NAL_EXPORTS,
-  STRATEGIES_PICTURE_EXPORTS,
+  STRATEGIES_NAL_EXPORTS
+  STRATEGIES_PICTURE_EXPORTS
  {NULL, NULL},
 };