mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Add arbitrary-sized SATD functions.
Adds strategy satd_any_size for generic and AVX2. The satd_any_size functions are implemented with macro SATD_ANY_SIZE defined in strategies-picture.h.
This commit is contained in:
parent
6bdc08b6eb
commit
056fa09ba5
|
@ -431,6 +431,7 @@ SATD_NxN(8bit_avx2, 8)
|
||||||
SATD_NxN(8bit_avx2, 16)
|
SATD_NxN(8bit_avx2, 16)
|
||||||
SATD_NxN(8bit_avx2, 32)
|
SATD_NxN(8bit_avx2, 32)
|
||||||
SATD_NxN(8bit_avx2, 64)
|
SATD_NxN(8bit_avx2, 64)
|
||||||
|
SATD_ANY_SIZE(8bit_avx2)
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -610,6 +611,7 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "avx2", 40, &satd_8bit_16x16_dual_avx2);
|
success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "avx2", 40, &satd_8bit_16x16_dual_avx2);
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "avx2", 40, &satd_8bit_32x32_dual_avx2);
|
success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "avx2", 40, &satd_8bit_32x32_dual_avx2);
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "avx2", 40, &satd_8bit_64x64_dual_avx2);
|
success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "avx2", 40, &satd_8bit_64x64_dual_avx2);
|
||||||
|
success &= kvz_strategyselector_register(opaque, "satd_any_size", "avx2", 40, &satd_any_size_8bit_avx2);
|
||||||
|
|
||||||
success &= kvz_strategyselector_register(opaque, "pixels_blit", "avx2", 40, &kvz_pixels_blit_avx2);
|
success &= kvz_strategyselector_register(opaque, "pixels_blit", "avx2", 40, &kvz_pixels_blit_avx2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -286,6 +286,8 @@ SATD_NxN(generic, 8)
|
||||||
SATD_NxN(generic, 16)
|
SATD_NxN(generic, 16)
|
||||||
SATD_NxN(generic, 32)
|
SATD_NxN(generic, 32)
|
||||||
SATD_NxN(generic, 64)
|
SATD_NxN(generic, 64)
|
||||||
|
SATD_ANY_SIZE(generic)
|
||||||
|
|
||||||
|
|
||||||
// Declare these functions to make sure the signature of the macro matches.
|
// Declare these functions to make sure the signature of the macro matches.
|
||||||
static cost_pixel_nxn_multi_func satd_4x4_dual_generic;
|
static cost_pixel_nxn_multi_func satd_4x4_dual_generic;
|
||||||
|
@ -471,6 +473,7 @@ int kvz_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "generic", 0, &satd_16x16_dual_generic);
|
success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "generic", 0, &satd_16x16_dual_generic);
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "generic", 0, &satd_32x32_dual_generic);
|
success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "generic", 0, &satd_32x32_dual_generic);
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "generic", 0, &satd_64x64_dual_generic);
|
success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "generic", 0, &satd_64x64_dual_generic);
|
||||||
|
success &= kvz_strategyselector_register(opaque, "satd_any_size", "generic", 0, &satd_any_size_generic);
|
||||||
|
|
||||||
success &= kvz_strategyselector_register(opaque, "pixels_blit", "generic", 0, &kvz_pixels_blit_generic);
|
success &= kvz_strategyselector_register(opaque, "pixels_blit", "generic", 0, &kvz_pixels_blit_generic);
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,27 @@ static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Function macro for defining hadamard calculating functions for dynamic size
|
||||||
|
// blocks. They calculate hadamard for integer multiples of 8x8 with the 8x8
|
||||||
|
// hadamard function.
|
||||||
|
#define SATD_ANY_SIZE(suffix) \
|
||||||
|
static unsigned satd_any_size_ ## suffix ( \
|
||||||
|
int width, int height, \
|
||||||
|
const kvz_pixel *block1, int stride1, \
|
||||||
|
const kvz_pixel *block2, int stride2) \
|
||||||
|
{ \
|
||||||
|
unsigned sum = 0; \
|
||||||
|
for (int y = 0; y < height; y += 8) { \
|
||||||
|
const kvz_pixel *row1 = &block1[y * stride1]; \
|
||||||
|
const kvz_pixel *row2 = &block2[y * stride2]; \
|
||||||
|
for (int x = 0; x < width; x += 8) { \
|
||||||
|
sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, &row2[x], stride2); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
return sum >> (KVZ_BIT_DEPTH - 8); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2,
|
typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2,
|
||||||
const int width, const int height,
|
const int width, const int height,
|
||||||
const unsigned stride1, const unsigned stride2);
|
const unsigned stride1, const unsigned stride2);
|
||||||
|
@ -72,6 +93,9 @@ extern cost_pixel_nxn_func * kvz_satd_8x8;
|
||||||
extern cost_pixel_nxn_func * kvz_satd_16x16;
|
extern cost_pixel_nxn_func * kvz_satd_16x16;
|
||||||
extern cost_pixel_nxn_func * kvz_satd_32x32;
|
extern cost_pixel_nxn_func * kvz_satd_32x32;
|
||||||
extern cost_pixel_nxn_func * kvz_satd_64x64;
|
extern cost_pixel_nxn_func * kvz_satd_64x64;
|
||||||
|
extern unsigned (*kvz_satd_any_size)(int width, int height,
|
||||||
|
const kvz_pixel *block1, int stride1,
|
||||||
|
const kvz_pixel *block2, int stride2);
|
||||||
|
|
||||||
extern cost_pixel_nxn_multi_func * kvz_sad_4x4_dual;
|
extern cost_pixel_nxn_multi_func * kvz_sad_4x4_dual;
|
||||||
extern cost_pixel_nxn_multi_func * kvz_sad_8x8_dual;
|
extern cost_pixel_nxn_multi_func * kvz_sad_8x8_dual;
|
||||||
|
@ -106,6 +130,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
|
||||||
{"satd_16x16", (void**) &kvz_satd_16x16}, \
|
{"satd_16x16", (void**) &kvz_satd_16x16}, \
|
||||||
{"satd_32x32", (void**) &kvz_satd_32x32}, \
|
{"satd_32x32", (void**) &kvz_satd_32x32}, \
|
||||||
{"satd_64x64", (void**) &kvz_satd_64x64}, \
|
{"satd_64x64", (void**) &kvz_satd_64x64}, \
|
||||||
|
{"satd_any_size", (void**) &kvz_satd_any_size}, \
|
||||||
{"sad_4x4_dual", (void**) &kvz_sad_4x4_dual}, \
|
{"sad_4x4_dual", (void**) &kvz_sad_4x4_dual}, \
|
||||||
{"sad_8x8_dual", (void**) &kvz_sad_8x8_dual}, \
|
{"sad_8x8_dual", (void**) &kvz_sad_8x8_dual}, \
|
||||||
{"sad_16x16_dual", (void**) &kvz_sad_16x16_dual}, \
|
{"sad_16x16_dual", (void**) &kvz_sad_16x16_dual}, \
|
||||||
|
|
Loading…
Reference in a new issue