Extract macro SATD_NxN.

Combines definitions of macros SATD_NXN and SATD_NXN_AVX2 to macro
SATD_NxN and moves it to strategies-picture.h.
This commit is contained in:
Arttu Ylä-Outinen 2015-11-23 14:20:44 +02:00
parent 1eebfde0c5
commit 728a6abecc
3 changed files with 41 additions and 73 deletions

View file

@ -137,7 +137,7 @@ static unsigned sad_8bit_64x64_avx2(const kvz_pixel * buf1, const kvz_pixel * bu
return m256i_horizontal_sum(sum0);
}
static unsigned satd_8bit_4x4_avx2(const kvz_pixel *org, const kvz_pixel *cur)
static unsigned satd_4x4_8bit_avx2(const kvz_pixel *org, const kvz_pixel *cur)
{
__m128i original = _mm_cvtepu8_epi16(_mm_loadl_epi64((__m128i*)org));
@ -411,7 +411,7 @@ INLINE static void diff_blocks_and_hor_transform_dual_avx2(__m256i (*row_diff)[8
hor_add_sub_dual_avx2((*row_diff) + 6, (*row_diff) + 7);
}
static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
{
__m128i temp_hor[8];
__m128i temp_ver[8];
@ -426,42 +426,12 @@ static unsigned kvz_satd_8bit_8x8_general_avx2(const kvz_pixel * buf1, unsigned
return result;
}
// Function macro for defining hadamard calculating functions
// for fixed size blocks. They calculate hadamard for integer
// multiples of 8x8 with the 8x8 hadamard function.
#define SATD_NXN_AVX2(n) \
static unsigned satd_8bit_ ## n ## x ## n ## _avx2( \
const kvz_pixel * const block1, const kvz_pixel * const block2) \
{ \
unsigned x, y; \
unsigned sum = 0; \
for (y = 0; y < (n); y += 8) { \
unsigned row = y * (n); \
for (x = 0; x < (n); x += 8) { \
sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (n), &block2[row + x], (n)); \
} \
} \
return sum>>(KVZ_BIT_DEPTH-8); \
}
static unsigned satd_8bit_8x8_avx2(
const kvz_pixel * const block1, const kvz_pixel * const block2)
{
unsigned x, y;
unsigned sum = 0;
for (y = 0; y < (8); y += 8) {
unsigned row = y * (8);
for (x = 0; x < (8); x += 8) {
sum += kvz_satd_8bit_8x8_general_avx2(&block1[row + x], (8), &block2[row + x], (8));
}
}
return sum>>(KVZ_BIT_DEPTH-8); \
}
SATD_NxN(8bit_avx2, 8)
SATD_NxN(8bit_avx2, 16)
SATD_NxN(8bit_avx2, 32)
SATD_NxN(8bit_avx2, 64)
//SATD_NXN_AVX2(8) //Use the non-macro version
SATD_NXN_AVX2(16)
SATD_NXN_AVX2(32)
SATD_NXN_AVX2(64)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static void kvz_satd_8bit_8x8_general_dual_avx2(const kvz_pixel * buf1, unsigned stride1,
@ -629,11 +599,11 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
success &= kvz_strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2);
success &= kvz_strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2);
success &= kvz_strategyselector_register(opaque, "satd_4x4", "avx2", 40, &satd_8bit_4x4_avx2);
success &= kvz_strategyselector_register(opaque, "satd_8x8", "avx2", 40, &satd_8bit_8x8_avx2);
success &= kvz_strategyselector_register(opaque, "satd_16x16", "avx2", 40, &satd_8bit_16x16_avx2);
success &= kvz_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_8bit_32x32_avx2);
success &= kvz_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_8bit_64x64_avx2);
success &= kvz_strategyselector_register(opaque, "satd_4x4", "avx2", 40, &satd_4x4_8bit_avx2);
success &= kvz_strategyselector_register(opaque, "satd_8x8", "avx2", 40, &satd_8x8_8bit_avx2);
success &= kvz_strategyselector_register(opaque, "satd_16x16", "avx2", 40, &satd_16x16_8bit_avx2);
success &= kvz_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_32x32_8bit_avx2);
success &= kvz_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_64x64_8bit_avx2);
success &= kvz_strategyselector_register(opaque, "satd_4x4_dual", "avx2", 40, &satd_8bit_4x4_dual_avx2);
success &= kvz_strategyselector_register(opaque, "satd_8x8_dual", "avx2", 40, &satd_8bit_8x8_dual_avx2);

View file

@ -191,7 +191,7 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
/**
* \brief Calculate SATD between two 8x8 blocks inside bigger arrays.
*/
unsigned kvz_satd_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg,
static unsigned satd_8x8_subblock_generic(const kvz_pixel * piOrg, const int32_t iStrideOrg,
const kvz_pixel * piCur, const int32_t iStrideCur)
{
int32_t k, i, j, jj, sad = 0;
@ -281,36 +281,11 @@ unsigned kvz_satd_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg,
return sad;
}
// Function macro for defining hadamard calculating functions
// for fixed size blocks. They calculate hadamard for integer
// multiples of 8x8 with the 8x8 hadamard function.
#define SATD_NXN(n, pixel_type) \
static unsigned satd_ ## n ## x ## n ## _generic( \
const pixel_type * const block1, const pixel_type * const block2) \
{ \
unsigned x, y; \
unsigned sum = 0; \
for (y = 0; y < (n); y += 8) { \
unsigned row = y * (n); \
for (x = 0; x < (n); x += 8) { \
sum += kvz_satd_8x8_general(&block1[row + x], (n), &block2[row + x], (n)); \
} \
} \
return sum>>(KVZ_BIT_DEPTH-8); \
}
// Declare these functions to make sure the signature of the macro matches.
static cost_pixel_nxn_func satd_4x4_generic;
static cost_pixel_nxn_func satd_8x8_generic;
static cost_pixel_nxn_func satd_16x16_generic;
static cost_pixel_nxn_func satd_32x32_generic;
static cost_pixel_nxn_func satd_64x64_generic;
// These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
SATD_NXN(8, kvz_pixel)
SATD_NXN(16, kvz_pixel)
SATD_NXN(32, kvz_pixel)
SATD_NXN(64, kvz_pixel)
SATD_NxN(generic, 8)
SATD_NxN(generic, 16)
SATD_NxN(generic, 32)
SATD_NxN(generic, 64)
// Declare these functions to make sure the signature of the macro matches.
static cost_pixel_nxn_multi_func satd_4x4_dual_generic;
@ -328,7 +303,7 @@ static void satd_ ## n ## x ## n ## _dual_generic( \
for (y = 0; y < (n); y += 8) { \
unsigned row = y * (n); \
for (x = 0; x < (n); x += 8) { \
sum += kvz_satd_8x8_general(&preds[0][row + x], (n), &orig[row + x], (n)); \
sum += satd_8x8_subblock_generic(&preds[0][row + x], (n), &orig[row + x], (n)); \
} \
} \
costs_out[0] = sum>>(KVZ_BIT_DEPTH-8); \
@ -337,7 +312,7 @@ static void satd_ ## n ## x ## n ## _dual_generic( \
for (y = 0; y < (n); y += 8) { \
unsigned row = y * (n); \
for (x = 0; x < (n); x += 8) { \
sum += kvz_satd_8x8_general(&preds[1][row + x], (n), &orig[row + x], (n)); \
sum += satd_8x8_subblock_generic(&preds[1][row + x], (n), &orig[row + x], (n)); \
} \
} \
costs_out[1] = sum>>(KVZ_BIT_DEPTH-8); \

View file

@ -24,6 +24,29 @@
typedef kvz_pixel (*pred_buffer)[32 * 32];
// Function macro for defining hadamard calculating functions
// for fixed size blocks. They calculate hadamard for integer
// multiples of 8x8 with the 8x8 hadamard function.
#define SATD_NxN(suffix, n) \
/* Declare the function in advance, hopefully reducing the probability that the
* macro expands to something unexpected and silently breaks things. */ \
static cost_pixel_nxn_func satd_ ## n ## x ## n ## _ ## suffix;\
static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
const kvz_pixel * const block1, \
const kvz_pixel * const block2) \
{ \
unsigned sum = 0; \
for (unsigned y = 0; y < (n); y += 8) { \
unsigned row = y * (n); \
for (unsigned x = 0; x < (n); x += 8) { \
sum += satd_8x8_subblock_ ## suffix(&block1[row + x], (n), &block2[row + x], (n)); \
} \
} \
return sum >> (KVZ_BIT_DEPTH - 8); \
}
typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2,
const int width, const int height,
const unsigned stride1, const unsigned stride2);