Work around the ancient Win32 calling convention hassle

See if this'll work now
This commit is contained in:
Pauli Oikkonen 2019-09-06 12:53:45 +03:00
parent c5ca18950c
commit 99597b828a

View file

@ -34,12 +34,23 @@
#include "sao.h" #include "sao.h"
#include "strategyselector.h" #include "strategyselector.h"
// The calling convention used by MSVC on 32-bit builds will essentially
// disallow functions to have more than 3 XMM/YMM parameters, because it
// will not provide more than 8-byte param alignment, and only the first
// three vector params will be carried in SIMD registers. Now the
// vectorcall convention could probably be problematic in globally visible
// funcitons, but likely not in static ones.
#if defined _MSC_VER && defined _WIN32 && !defined _WIN64
#define FIX_W32 __vectorcall
#else
#define FIX_W32
#endif
// These optimizations are based heavily on sao-generic.c. // These optimizations are based heavily on sao-generic.c.
// Might be useful to check that if (when) this file // Might be useful to check that if (when) this file
// is difficult to understand. // is difficult to understand.
static int32_t hsum_8x32b(const __m256i v) static int32_t FIX_W32 hsum_8x32b(const __m256i v)
{ {
__m256i sum1 = v; __m256i sum1 = v;
__m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2)); __m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2));
@ -69,7 +80,7 @@ static INLINE __m256i sign3_diff_epu8(const __m256i a, const __m256i b)
} }
// Mapping of edge_idx values to eo-classes, 32x8b at once // Mapping of edge_idx values to eo-classes, 32x8b at once
static __m256i calc_eo_cat(const __m256i a, static __m256i FIX_W32 calc_eo_cat(const __m256i a,
const __m256i b, const __m256i b,
const __m256i c) const __m256i c)
{ {
@ -222,7 +233,7 @@ static INLINE __m256i broadcast_xmm2ymm(const __m128i v)
} }
// Used for edge_ddistortion and band_ddistortion // Used for edge_ddistortion and band_ddistortion
static __m256i calc_diff_off_delta(const __m256i diff_lo, static __m256i FIX_W32 calc_diff_off_delta(const __m256i diff_lo,
const __m256i diff_hi, const __m256i diff_hi,
const __m256i offsets, const __m256i offsets,
const __m256i orig) const __m256i orig)
@ -266,7 +277,7 @@ static __m256i calc_diff_off_delta(const __m256i diff_lo,
return _mm256_add_epi32 (sum0, sum1); return _mm256_add_epi32 (sum0, sum1);
} }
static INLINE __m256i do_one_edge_ymm(const __m256i a, static INLINE __m256i FIX_W32 do_one_edge_ymm(const __m256i a,
const __m256i b, const __m256i b,
const __m256i c, const __m256i c,
const __m256i orig, const __m256i orig,
@ -387,7 +398,7 @@ static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data,
return hsum_8x32b(sum); return hsum_8x32b(sum);
} }
static void calc_edge_dir_one_ymm(const __m256i a, static void FIX_W32 calc_edge_dir_one_ymm(const __m256i a,
const __m256i b, const __m256i b,
const __m256i c, const __m256i c,
const __m256i orig, const __m256i orig,
@ -684,7 +695,7 @@ static INLINE void reconstruct_color_band(const encoder_control_t *encoder,
} }
} }
static __m256i do_one_nonband_ymm(const __m256i a, static __m256i FIX_W32 do_one_nonband_ymm(const __m256i a,
const __m256i b, const __m256i b,
const __m256i c, const __m256i c,
const __m256i sao_offs) const __m256i sao_offs)