diff --git a/src/strategies/avx2/avx2_common_functions.h b/src/strategies/avx2/avx2_common_functions.h index 3b6063ad..701f3ee9 100644 --- a/src/strategies/avx2/avx2_common_functions.h +++ b/src/strategies/avx2/avx2_common_functions.h @@ -15,6 +15,16 @@ #define FIX_W32 #endif +// Non-inline functions defined in this header are likely to trigger a +// warning for each module including this header that does NOT use them, +// at least on unix-ish platforms (GCC/Clang both on native Unix and MinGW). +// Tell 'em we actually want to do that, it's not an accident. +#if defined __GNUC__ || defined __clang__ || defined __MINGW32__ || defined __MINGW64__ + #define FIX_UNUSED __attribute__((unused)) +#endif + +#define FIX_NOINLINE FIX_W32 FIX_UNUSED + /* * Reorder coefficients from raster to scan order * Fun fact: Once upon a time, doing this in a loop looked like this: @@ -123,8 +133,7 @@ static INLINE void get_first_last_nz_int16(__m256i ints, int32_t *first, int32_t *last = (31 - (int32_t)_lzcnt_u32(nonzero_bytes)) >> 1; } -/* MOVED TO SAO-AVX2.C WHERE THIS IS USED -int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v) +static int32_t FIX_NOINLINE hsum_8x32b(const __m256i v) { __m256i sum1 = v; __m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2)); @@ -138,5 +147,5 @@ int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v) int32_t sum9 = _mm_cvtsi128_si32 (sum8); return sum9; } -*/ + #endif diff --git a/src/strategies/avx2/sao-avx2.c b/src/strategies/avx2/sao-avx2.c index 5bd2dd91..e42f911a 100644 --- a/src/strategies/avx2/sao-avx2.c +++ b/src/strategies/avx2/sao-avx2.c @@ -271,21 +271,6 @@ static INLINE __m256i FIX_W32 do_one_edge_ymm(const __m256i a, return calc_diff_off_delta(diff_lo, diff_hi, offset, orig); } -int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v) -{ - __m256i sum1 = v; - __m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i sum3 = _mm256_add_epi32(sum1, sum2); - __m256i sum4 = _mm256_shuffle_epi32(sum3, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i sum5 = _mm256_add_epi32(sum3, sum4); - __m256i sum6 = _mm256_shuffle_epi32(sum5, _MM_SHUFFLE(2, 3, 0, 1)); - __m256i sum7 = _mm256_add_epi32(sum5, sum6); - - __m128i sum8 = _mm256_castsi256_si128(sum7); - int32_t sum9 = _mm_cvtsi128_si32(sum8); - return sum9; -} - static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, int32_t block_width, @@ -384,7 +369,7 @@ static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, sum = _mm256_add_epi32(sum, curr); } } - return kvz_hsum_8x32b(sum); + return hsum_8x32b(sum); } static void FIX_W32 calc_edge_dir_one_ymm(const __m256i a, @@ -499,7 +484,7 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, } } for (uint32_t i = 0; i < 5; i++) { - int32_t sum = kvz_hsum_8x32b(diff_accum[i]); + int32_t sum = hsum_8x32b(diff_accum[i]); diff_sum[i] += sum; } } @@ -892,7 +877,7 @@ static int32_t sao_band_ddistortion_avx2(const encoder_state_t *state, sum = _mm256_add_epi32 (sum, curr_sum); } } - return kvz_hsum_8x32b(sum); + return hsum_8x32b(sum); use_generic: return sao_band_ddistortion_generic(state, orig_data, rec_data, block_width,