Add arbitrary-sized SATD functions.

Adds strategy satd_any_size for generic and AVX2. The satd_any_size
functions are implemented with macro SATD_ANY_SIZE defined in
strategies-picture.h.
This commit is contained in:
Arttu Ylä-Outinen 2015-12-08 12:21:22 +02:00
parent 6bdc08b6eb
commit 056fa09ba5
3 changed files with 30 additions and 0 deletions

View file

@ -431,6 +431,7 @@ SATD_NxN(8bit_avx2, 8)
SATD_NxN(8bit_avx2, 16) SATD_NxN(8bit_avx2, 16)
SATD_NxN(8bit_avx2, 32) SATD_NxN(8bit_avx2, 32)
SATD_NxN(8bit_avx2, 64) SATD_NxN(8bit_avx2, 64)
SATD_ANY_SIZE(8bit_avx2)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -610,6 +611,7 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "avx2", 40, &satd_8bit_16x16_dual_avx2); success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "avx2", 40, &satd_8bit_16x16_dual_avx2);
success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "avx2", 40, &satd_8bit_32x32_dual_avx2); success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "avx2", 40, &satd_8bit_32x32_dual_avx2);
success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "avx2", 40, &satd_8bit_64x64_dual_avx2); success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "avx2", 40, &satd_8bit_64x64_dual_avx2);
success &= kvz_strategyselector_register(opaque, "satd_any_size", "avx2", 40, &satd_any_size_8bit_avx2);
success &= kvz_strategyselector_register(opaque, "pixels_blit", "avx2", 40, &kvz_pixels_blit_avx2); success &= kvz_strategyselector_register(opaque, "pixels_blit", "avx2", 40, &kvz_pixels_blit_avx2);
} }

View file

@ -286,6 +286,8 @@ SATD_NxN(generic, 8)
SATD_NxN(generic, 16) SATD_NxN(generic, 16)
SATD_NxN(generic, 32) SATD_NxN(generic, 32)
SATD_NxN(generic, 64) SATD_NxN(generic, 64)
SATD_ANY_SIZE(generic)
// Declare these functions to make sure the signature of the macro matches. // Declare these functions to make sure the signature of the macro matches.
static cost_pixel_nxn_multi_func satd_4x4_dual_generic; static cost_pixel_nxn_multi_func satd_4x4_dual_generic;
@ -471,6 +473,7 @@ int kvz_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "generic", 0, &satd_16x16_dual_generic); success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "generic", 0, &satd_16x16_dual_generic);
success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "generic", 0, &satd_32x32_dual_generic); success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "generic", 0, &satd_32x32_dual_generic);
success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "generic", 0, &satd_64x64_dual_generic); success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "generic", 0, &satd_64x64_dual_generic);
success &= kvz_strategyselector_register(opaque, "satd_any_size", "generic", 0, &satd_any_size_generic);
success &= kvz_strategyselector_register(opaque, "pixels_blit", "generic", 0, &kvz_pixels_blit_generic); success &= kvz_strategyselector_register(opaque, "pixels_blit", "generic", 0, &kvz_pixels_blit_generic);

View file

@ -47,6 +47,27 @@ static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
} }
// Function macro for defining hadamard calculating functions for dynamic size
// blocks. They calculate hadamard for integer multiples of 8x8 with the 8x8
// hadamard function.
#define SATD_ANY_SIZE(suffix) \
static unsigned satd_any_size_ ## suffix ( \
int width, int height, \
const kvz_pixel *block1, int stride1, \
const kvz_pixel *block2, int stride2) \
{ \
unsigned sum = 0; \
for (int y = 0; y < height; y += 8) { \
const kvz_pixel *row1 = &block1[y * stride1]; \
const kvz_pixel *row2 = &block2[y * stride2]; \
for (int x = 0; x < width; x += 8) { \
sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, &row2[x], stride2); \
} \
} \
return sum >> (KVZ_BIT_DEPTH - 8); \
}
typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2, typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2,
const int width, const int height, const int width, const int height,
const unsigned stride1, const unsigned stride2); const unsigned stride1, const unsigned stride2);
@ -72,6 +93,9 @@ extern cost_pixel_nxn_func * kvz_satd_8x8;
extern cost_pixel_nxn_func * kvz_satd_16x16; extern cost_pixel_nxn_func * kvz_satd_16x16;
extern cost_pixel_nxn_func * kvz_satd_32x32; extern cost_pixel_nxn_func * kvz_satd_32x32;
extern cost_pixel_nxn_func * kvz_satd_64x64; extern cost_pixel_nxn_func * kvz_satd_64x64;
extern unsigned (*kvz_satd_any_size)(int width, int height,
const kvz_pixel *block1, int stride1,
const kvz_pixel *block2, int stride2);
extern cost_pixel_nxn_multi_func * kvz_sad_4x4_dual; extern cost_pixel_nxn_multi_func * kvz_sad_4x4_dual;
extern cost_pixel_nxn_multi_func * kvz_sad_8x8_dual; extern cost_pixel_nxn_multi_func * kvz_sad_8x8_dual;
@ -106,6 +130,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
{"satd_16x16", (void**) &kvz_satd_16x16}, \ {"satd_16x16", (void**) &kvz_satd_16x16}, \
{"satd_32x32", (void**) &kvz_satd_32x32}, \ {"satd_32x32", (void**) &kvz_satd_32x32}, \
{"satd_64x64", (void**) &kvz_satd_64x64}, \ {"satd_64x64", (void**) &kvz_satd_64x64}, \
{"satd_any_size", (void**) &kvz_satd_any_size}, \
{"sad_4x4_dual", (void**) &kvz_sad_4x4_dual}, \ {"sad_4x4_dual", (void**) &kvz_sad_4x4_dual}, \
{"sad_8x8_dual", (void**) &kvz_sad_8x8_dual}, \ {"sad_8x8_dual", (void**) &kvz_sad_8x8_dual}, \
{"sad_16x16_dual", (void**) &kvz_sad_16x16_dual}, \ {"sad_16x16_dual", (void**) &kvz_sad_16x16_dual}, \