From 4122f360890a6b9ce3309b1a8dea5f7bacf0ca72 Mon Sep 17 00:00:00 2001 From: Ari Lemmetti Date: Tue, 11 Aug 2015 17:21:39 +0300 Subject: [PATCH] Prevent the registration of strategies that are incompatible when KVZ_BIT_DEPTH != 8 Remove unnecessary or misleading mentions of "8bit" --- src/strategies/avx2/dct-avx2.c | 2 + src/strategies/avx2/ipol-avx2.c | 2 + src/strategies/avx2/picture-avx2.c | 10 ++-- src/strategies/generic/picture-generic.c | 72 ++++++++++++------------ src/strategies/sse2/picture-sse2.c | 4 +- src/strategies/sse41/picture-sse41.c | 2 + src/strategies/strategies-picture.c | 40 ++++++------- src/strategies/strategies-picture.h | 40 ++++++------- src/strategies/x86_asm/picture-x86-asm.c | 22 ++++---- tests/sad_tests.c | 4 +- 10 files changed, 105 insertions(+), 93 deletions(-) diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c index f333fd39..84f240c8 100644 --- a/src/strategies/avx2/dct-avx2.c +++ b/src/strategies/avx2/dct-avx2.c @@ -357,6 +357,7 @@ int strategy_register_dct_avx2(void* opaque) { bool success = true; #if COMPILE_INTEL_AVX2 + #if KVZ_BIT_DEPTH == 8 success &= strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2); success &= strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); @@ -370,6 +371,7 @@ int strategy_register_dct_avx2(void* opaque) success &= strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); success &= strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); success &= strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); + #endif //KVZ_BIT_DEPTH == 8 #endif //COMPILE_INTEL_AVX2 return success; } diff --git a/src/strategies/avx2/ipol-avx2.c b/src/strategies/avx2/ipol-avx2.c index 967e7c40..a4df8317 100644 --- a/src/strategies/avx2/ipol-avx2.c +++ b/src/strategies/avx2/ipol-avx2.c @@ -523,9 +523,11 @@ int strategy_register_ipol_avx2(void* opaque) { bool success = true; #if COMPILE_INTEL_AVX2 + #if KVZ_BIT_DEPTH == 8 success &= strategyselector_register(opaque, "filter_inter_quarterpel_luma", "avx2", 40, &filter_inter_quarterpel_luma_avx2); success &= strategyselector_register(opaque, "filter_inter_halfpel_chroma", "avx2", 40, &filter_inter_halfpel_chroma_avx2); success &= strategyselector_register(opaque, "filter_inter_octpel_chroma", "avx2", 40, &filter_inter_octpel_chroma_avx2); + #endif //KVZ_BIT_DEPTH == 8 success &= strategyselector_register(opaque, "extend_borders", "avx2", 40, &extend_borders_avx2); #endif //COMPILE_INTEL_AVX2 return success; diff --git a/src/strategies/avx2/picture-avx2.c b/src/strategies/avx2/picture-avx2.c index 7be80d7c..f9340a7d 100644 --- a/src/strategies/avx2/picture-avx2.c +++ b/src/strategies/avx2/picture-avx2.c @@ -148,10 +148,12 @@ int strategy_register_picture_avx2(void* opaque) // transform skip, but we might again one day and this is some of the // simplest code to look at for anyone interested in doing more // optimizations, so it's worth it to keep this maintained. - success &= strategyselector_register(opaque, "sad_8bit_8x8", "avx2", 40, &sad_8bit_8x8_avx2); - success &= strategyselector_register(opaque, "sad_8bit_16x16", "avx2", 40, &sad_8bit_16x16_avx2); - success &= strategyselector_register(opaque, "sad_8bit_32x32", "avx2", 40, &sad_8bit_32x32_avx2); - success &= strategyselector_register(opaque, "sad_8bit_64x64", "avx2", 40, &sad_8bit_64x64_avx2); + #if KVZ_BIT_DEPTH == 8 + success &= strategyselector_register(opaque, "sad_8x8", "avx2", 40, &sad_8bit_8x8_avx2); + success &= strategyselector_register(opaque, "sad_16x16", "avx2", 40, &sad_8bit_16x16_avx2); + success &= strategyselector_register(opaque, "sad_32x32", "avx2", 40, &sad_8bit_32x32_avx2); + success &= strategyselector_register(opaque, "sad_64x64", "avx2", 40, &sad_8bit_64x64_avx2); + #endif //KVZ_BIT_DEPTH == 8 #endif return success; } diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c index dcfa361b..f2cdb5b8 100644 --- a/src/strategies/generic/picture-generic.c +++ b/src/strategies/generic/picture-generic.c @@ -104,7 +104,7 @@ static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel * * \brief Calculate SATD between two 4x4 blocks inside bigger arrays. * From HM 13.0 */ -static unsigned satd_8bit_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur) +static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur) { int32_t k, satd = 0, diff[16], m[16], d[16]; for (k = 0; k < 16; ++k) { @@ -191,7 +191,7 @@ static unsigned satd_8bit_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *p /** * \brief Calculate SATD between two 8x8 blocks inside bigger arrays. */ -unsigned satd_16bit_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg, +unsigned satd_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg, const kvz_pixel * piCur, const int32_t iStrideCur) { int32_t k, i, j, jj, sad = 0; @@ -284,8 +284,8 @@ unsigned satd_16bit_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOr // Function macro for defining hadamard calculating functions // for fixed size blocks. They calculate hadamard for integer // multiples of 8x8 with the 8x8 hadamard function. -#define SATD_NXN(n, pixel_type, suffix) \ -unsigned satd_ ## suffix ## _ ## n ## x ## n ## _generic( \ +#define SATD_NXN(n, pixel_type) \ +unsigned satd_ ## n ## x ## n ## _generic( \ const pixel_type * const block1, const pixel_type * const block2) \ { \ unsigned x, y; \ @@ -293,29 +293,29 @@ unsigned satd_ ## suffix ## _ ## n ## x ## n ## _generic( \ for (y = 0; y < (n); y += 8) { \ unsigned row = y * (n); \ for (x = 0; x < (n); x += 8) { \ - sum += satd_16bit_8x8_general(&block1[row + x], (n), &block2[row + x], (n)); \ + sum += satd_8x8_general(&block1[row + x], (n), &block2[row + x], (n)); \ } \ } \ return sum>>(KVZ_BIT_DEPTH-8); \ } // Declare these functions to make sure the signature of the macro matches. -cost_pixel_nxn_func satd_8bit_4x4_generic; -cost_pixel_nxn_func satd_8bit_8x8_generic; -cost_pixel_nxn_func satd_8bit_16x16_generic; -cost_pixel_nxn_func satd_8bit_32x32_generic; -cost_pixel_nxn_func satd_8bit_64x64_generic; +cost_pixel_nxn_func satd_4x4_generic; +cost_pixel_nxn_func satd_8x8_generic; +cost_pixel_nxn_func satd_16x16_generic; +cost_pixel_nxn_func satd_32x32_generic; +cost_pixel_nxn_func satd_64x64_generic; // These macros define sadt_16bit_NxN for N = 8, 16, 32, 64 -SATD_NXN(8, kvz_pixel, 8bit) -SATD_NXN(16, kvz_pixel, 8bit) -SATD_NXN(32, kvz_pixel, 8bit) -SATD_NXN(64, kvz_pixel, 8bit) +SATD_NXN(8, kvz_pixel) +SATD_NXN(16, kvz_pixel) +SATD_NXN(32, kvz_pixel) +SATD_NXN(64, kvz_pixel) // Function macro for defining SAD calculating functions // for fixed size blocks. -#define SAD_NXN(n, pixel_type, suffix) \ -static unsigned sad_ ## suffix ## _ ## n ## x ## n ## _generic( \ +#define SAD_NXN(n, pixel_type) \ +static unsigned sad_ ## n ## x ## n ## _generic( \ const pixel_type * const block1, const pixel_type * const block2) \ { \ unsigned i; \ @@ -327,20 +327,20 @@ static unsigned sad_ ## suffix ## _ ## n ## x ## n ## _generic( \ } // Declare these functions to make sure the signature of the macro matches. -static cost_pixel_nxn_func sad_8bit_4x4_generic; -static cost_pixel_nxn_func sad_8bit_8x8_generic; -static cost_pixel_nxn_func sad_8bit_16x16_generic; -static cost_pixel_nxn_func sad_8bit_32x32_generic; -static cost_pixel_nxn_func sad_8bit_64x64_generic; +static cost_pixel_nxn_func sad_4x4_generic; +static cost_pixel_nxn_func sad_8x8_generic; +static cost_pixel_nxn_func sad_16x16_generic; +static cost_pixel_nxn_func sad_32x32_generic; +static cost_pixel_nxn_func sad_64x64_generic; // These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64 // with function signatures of cost_16bit_nxn_func. // They are used through get_pixel_sad_func. -SAD_NXN(4, kvz_pixel, 8bit) -SAD_NXN(8, kvz_pixel, 8bit) -SAD_NXN(16, kvz_pixel, 8bit) -SAD_NXN(32, kvz_pixel, 8bit) -SAD_NXN(64, kvz_pixel, 8bit) +SAD_NXN(4, kvz_pixel) +SAD_NXN(8, kvz_pixel) +SAD_NXN(16, kvz_pixel) +SAD_NXN(32, kvz_pixel) +SAD_NXN(64, kvz_pixel) int strategy_register_picture_generic(void* opaque) @@ -349,17 +349,17 @@ int strategy_register_picture_generic(void* opaque) success &= strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic); - success &= strategyselector_register(opaque, "sad_8bit_4x4", "generic", 0, &sad_8bit_4x4_generic); - success &= strategyselector_register(opaque, "sad_8bit_8x8", "generic", 0, &sad_8bit_8x8_generic); - success &= strategyselector_register(opaque, "sad_8bit_16x16", "generic", 0, &sad_8bit_16x16_generic); - success &= strategyselector_register(opaque, "sad_8bit_32x32", "generic", 0, &sad_8bit_32x32_generic); - success &= strategyselector_register(opaque, "sad_8bit_64x64", "generic", 0, &sad_8bit_64x64_generic); + success &= strategyselector_register(opaque, "sad_4x4", "generic", 0, &sad_4x4_generic); + success &= strategyselector_register(opaque, "sad_8x8", "generic", 0, &sad_8x8_generic); + success &= strategyselector_register(opaque, "sad_16x16", "generic", 0, &sad_16x16_generic); + success &= strategyselector_register(opaque, "sad_32x32", "generic", 0, &sad_32x32_generic); + success &= strategyselector_register(opaque, "sad_64x64", "generic", 0, &sad_64x64_generic); - success &= strategyselector_register(opaque, "satd_8bit_4x4", "generic", 0, &satd_8bit_4x4_generic); - success &= strategyselector_register(opaque, "satd_8bit_8x8", "generic", 0, &satd_8bit_8x8_generic); - success &= strategyselector_register(opaque, "satd_8bit_16x16", "generic", 0, &satd_8bit_16x16_generic); - success &= strategyselector_register(opaque, "satd_8bit_32x32", "generic", 0, &satd_8bit_32x32_generic); - success &= strategyselector_register(opaque, "satd_8bit_64x64", "generic", 0, &satd_8bit_64x64_generic); + success &= strategyselector_register(opaque, "satd_4x4", "generic", 0, &satd_4x4_generic); + success &= strategyselector_register(opaque, "satd_8x8", "generic", 0, &satd_8x8_generic); + success &= strategyselector_register(opaque, "satd_16x16", "generic", 0, &satd_16x16_generic); + success &= strategyselector_register(opaque, "satd_32x32", "generic", 0, &satd_32x32_generic); + success &= strategyselector_register(opaque, "satd_64x64", "generic", 0, &satd_64x64_generic); return success; } diff --git a/src/strategies/sse2/picture-sse2.c b/src/strategies/sse2/picture-sse2.c index 457b9c2b..269a3b69 100644 --- a/src/strategies/sse2/picture-sse2.c +++ b/src/strategies/sse2/picture-sse2.c @@ -73,8 +73,10 @@ static unsigned sad_8bit_4x4_sse2(const kvz_pixel *buf1, const kvz_pixel *buf2) int strategy_register_picture_sse2(void* opaque) { bool success = true; #if COMPILE_INTEL_SSE2 + #if KVZ_BIT_DEPTH == 8 success &= strategyselector_register(opaque, "reg_sad", "sse2", 10, ®_sad_sse2); - success &= strategyselector_register(opaque, "sad_8bit_4x4", "sse2", 10, &sad_8bit_4x4_sse2); + success &= strategyselector_register(opaque, "sad_4x4", "sse2", 10, &sad_8bit_4x4_sse2); + #endif //KVZ_BIT_DEPTH == 8 #endif return success; } diff --git a/src/strategies/sse41/picture-sse41.c b/src/strategies/sse41/picture-sse41.c index 280e9731..e893a24a 100644 --- a/src/strategies/sse41/picture-sse41.c +++ b/src/strategies/sse41/picture-sse41.c @@ -96,7 +96,9 @@ static unsigned reg_sad_sse41(const kvz_pixel * const data1, const kvz_pixel * c int strategy_register_picture_sse41(void* opaque) { bool success = true; #if COMPILE_INTEL_SSE41 + #if KVZ_BIT_DEPTH == 8 success &= strategyselector_register(opaque, "reg_sad", "sse41", 20, ®_sad_sse41); + #endif //KVZ_BIT_DEPTH == 8 #endif return success; } diff --git a/src/strategies/strategies-picture.c b/src/strategies/strategies-picture.c index 519b1363..56fb8fcf 100644 --- a/src/strategies/strategies-picture.c +++ b/src/strategies/strategies-picture.c @@ -24,17 +24,17 @@ // Define function pointers. reg_sad_func * reg_sad = 0; -cost_pixel_nxn_func * sad_8bit_4x4 = 0; -cost_pixel_nxn_func * sad_8bit_8x8 = 0; -cost_pixel_nxn_func * sad_8bit_16x16 = 0; -cost_pixel_nxn_func * sad_8bit_32x32 = 0; -cost_pixel_nxn_func * sad_8bit_64x64 = 0; +cost_pixel_nxn_func * sad_4x4 = 0; +cost_pixel_nxn_func * sad_8x8 = 0; +cost_pixel_nxn_func * sad_16x16 = 0; +cost_pixel_nxn_func * sad_32x32 = 0; +cost_pixel_nxn_func * sad_64x64 = 0; -cost_pixel_nxn_func * satd_8bit_4x4 = 0; -cost_pixel_nxn_func * satd_8bit_8x8 = 0; -cost_pixel_nxn_func * satd_8bit_16x16 = 0; -cost_pixel_nxn_func * satd_8bit_32x32 = 0; -cost_pixel_nxn_func * satd_8bit_64x64 = 0; +cost_pixel_nxn_func * satd_4x4 = 0; +cost_pixel_nxn_func * satd_8x8 = 0; +cost_pixel_nxn_func * satd_16x16 = 0; +cost_pixel_nxn_func * satd_32x32 = 0; +cost_pixel_nxn_func * satd_64x64 = 0; // Headers for platform optimizations. @@ -82,15 +82,15 @@ cost_pixel_nxn_func * pixels_get_satd_func(unsigned n) { switch (n) { case 4: - return satd_8bit_4x4; + return satd_4x4; case 8: - return satd_8bit_8x8; + return satd_8x8; case 16: - return satd_8bit_16x16; + return satd_16x16; case 32: - return satd_8bit_32x32; + return satd_32x32; case 64: - return satd_8bit_64x64; + return satd_64x64; default: return NULL; } @@ -108,15 +108,15 @@ cost_pixel_nxn_func * pixels_get_sad_func(unsigned n) { switch (n) { case 4: - return sad_8bit_4x4; + return sad_4x4; case 8: - return sad_8bit_8x8; + return sad_8x8; case 16: - return sad_8bit_16x16; + return sad_16x16; case 32: - return sad_8bit_32x32; + return sad_32x32; case 64: - return sad_8bit_64x64; + return sad_64x64; default: return NULL; } diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index a2afd042..01288b1a 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -32,17 +32,17 @@ typedef unsigned (cost_pixel_nxn_func)(const kvz_pixel *block1, const kvz_pixel // Declare function pointers. extern reg_sad_func * reg_sad; -extern cost_pixel_nxn_func * sad_8bit_4x4; -extern cost_pixel_nxn_func * sad_8bit_8x8; -extern cost_pixel_nxn_func * sad_8bit_16x16; -extern cost_pixel_nxn_func * sad_8bit_32x32; -extern cost_pixel_nxn_func * sad_8bit_64x64; +extern cost_pixel_nxn_func * sad_4x4; +extern cost_pixel_nxn_func * sad_8x8; +extern cost_pixel_nxn_func * sad_16x16; +extern cost_pixel_nxn_func * sad_32x32; +extern cost_pixel_nxn_func * sad_64x64; -extern cost_pixel_nxn_func * satd_8bit_4x4; -extern cost_pixel_nxn_func * satd_8bit_8x8; -extern cost_pixel_nxn_func * satd_8bit_16x16; -extern cost_pixel_nxn_func * satd_8bit_32x32; -extern cost_pixel_nxn_func * satd_8bit_64x64; +extern cost_pixel_nxn_func * satd_4x4; +extern cost_pixel_nxn_func * satd_8x8; +extern cost_pixel_nxn_func * satd_16x16; +extern cost_pixel_nxn_func * satd_32x32; +extern cost_pixel_nxn_func * satd_64x64; int strategy_register_picture(void* opaque); @@ -52,16 +52,16 @@ cost_pixel_nxn_func * pixels_get_sad_func(unsigned n); #define STRATEGIES_PICTURE_EXPORTS \ {"reg_sad", (void**) ®_sad}, \ - {"sad_8bit_4x4", (void**) &sad_8bit_4x4}, \ - {"sad_8bit_8x8", (void**) &sad_8bit_8x8}, \ - {"sad_8bit_16x16", (void**) &sad_8bit_16x16}, \ - {"sad_8bit_32x32", (void**) &sad_8bit_32x32}, \ - {"sad_8bit_64x64", (void**) &sad_8bit_64x64}, \ - {"satd_8bit_4x4", (void**) &satd_8bit_4x4}, \ - {"satd_8bit_8x8", (void**) &satd_8bit_8x8}, \ - {"satd_8bit_16x16", (void**) &satd_8bit_16x16}, \ - {"satd_8bit_32x32", (void**) &satd_8bit_32x32}, \ - {"satd_8bit_64x64", (void**) &satd_8bit_64x64}, \ + {"sad_4x4", (void**) &sad_4x4}, \ + {"sad_8x8", (void**) &sad_8x8}, \ + {"sad_16x16", (void**) &sad_16x16}, \ + {"sad_32x32", (void**) &sad_32x32}, \ + {"sad_64x64", (void**) &sad_64x64}, \ + {"satd_4x4", (void**) &satd_4x4}, \ + {"satd_8x8", (void**) &satd_8x8}, \ + {"satd_16x16", (void**) &satd_16x16}, \ + {"satd_32x32", (void**) &satd_32x32}, \ + {"satd_64x64", (void**) &satd_64x64}, \ diff --git a/src/strategies/x86_asm/picture-x86-asm.c b/src/strategies/x86_asm/picture-x86-asm.c index 89403997..bdc59e28 100644 --- a/src/strategies/x86_asm/picture-x86-asm.c +++ b/src/strategies/x86_asm/picture-x86-asm.c @@ -107,19 +107,21 @@ const int width, const int height, const unsigned stride1, const unsigned stride int strategy_register_picture_x86_asm_avx(void* opaque) { bool success = true; #if defined(KVZ_COMPILE_ASM) + #if KVZ_BIT_DEPTH == 8 success &= strategyselector_register(opaque, "reg_sad", "x86_asm_avx", 30, ®_sad_x86_asm); - success &= strategyselector_register(opaque, "sad_8bit_4x4", "x86_asm_avx", 30, &kvz_sad_4x4_avx); - success &= strategyselector_register(opaque, "sad_8bit_8x8", "x86_asm_avx", 30, &kvz_sad_8x8_avx); - success &= strategyselector_register(opaque, "sad_8bit_16x16", "x86_asm_avx", 30, &kvz_sad_16x16_avx); - success &= strategyselector_register(opaque, "sad_8bit_32x32", "x86_asm_avx", 30, &kvz_sad_32x32_avx); - success &= strategyselector_register(opaque, "sad_8bit_64x64", "x86_asm_avx", 30, &kvz_sad_64x64_avx); + success &= strategyselector_register(opaque, "sad_4x4", "x86_asm_avx", 30, &kvz_sad_4x4_avx); + success &= strategyselector_register(opaque, "sad_8x8", "x86_asm_avx", 30, &kvz_sad_8x8_avx); + success &= strategyselector_register(opaque, "sad_16x16", "x86_asm_avx", 30, &kvz_sad_16x16_avx); + success &= strategyselector_register(opaque, "sad_32x32", "x86_asm_avx", 30, &kvz_sad_32x32_avx); + success &= strategyselector_register(opaque, "sad_64x64", "x86_asm_avx", 30, &kvz_sad_64x64_avx); - success &= strategyselector_register(opaque, "satd_8bit_4x4", "x86_asm_avx", 30, &kvz_satd_4x4_avx); - success &= strategyselector_register(opaque, "satd_8bit_8x8", "x86_asm_avx", 30, &kvz_satd_8x8_avx); - success &= strategyselector_register(opaque, "satd_8bit_16x16", "x86_asm_avx", 30, &kvz_satd_16x16_avx); - success &= strategyselector_register(opaque, "satd_8bit_32x32", "x86_asm_avx", 30, &kvz_satd_32x32_avx); - success &= strategyselector_register(opaque, "satd_8bit_64x64", "x86_asm_avx", 30, &kvz_satd_64x64_avx); + success &= strategyselector_register(opaque, "satd_4x4", "x86_asm_avx", 30, &kvz_satd_4x4_avx); + success &= strategyselector_register(opaque, "satd_8x8", "x86_asm_avx", 30, &kvz_satd_8x8_avx); + success &= strategyselector_register(opaque, "satd_16x16", "x86_asm_avx", 30, &kvz_satd_16x16_avx); + success &= strategyselector_register(opaque, "satd_32x32", "x86_asm_avx", 30, &kvz_satd_32x32_avx); + success &= strategyselector_register(opaque, "satd_64x64", "x86_asm_avx", 30, &kvz_satd_64x64_avx); + #endif //KVZ_BIT_DEPTH == 8 #endif //!defined(KVZ_COMPILE_ASM) return success; } diff --git a/tests/sad_tests.c b/tests/sad_tests.c index 14aeed4a..a727f931 100644 --- a/tests/sad_tests.c +++ b/tests/sad_tests.c @@ -35,7 +35,7 @@ ////////////////////////////////////////////////////////////////////////// // GLOBALS -static const uint8_t ref_data[64] = { +static const kvz_pixel ref_data[64] = { 1,2,2,2,2,2,2,3, 4,5,5,5,5,5,5,6, 4,5,5,5,5,5,5,6, @@ -46,7 +46,7 @@ static const uint8_t ref_data[64] = { 7,8,8,8,8,8,8,9 }; -static const uint8_t pic_data[64] = { +static const kvz_pixel pic_data[64] = { 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,