mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-18 03:04:06 +00:00
Work around the ancient Win32 calling convention hassle
See if this'll work now
This commit is contained in:
parent
c5ca18950c
commit
99597b828a
|
@ -34,12 +34,23 @@
|
|||
#include "sao.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
// The calling convention used by MSVC on 32-bit builds will essentially
|
||||
// disallow functions to have more than 3 XMM/YMM parameters, because it
|
||||
// will not provide more than 8-byte param alignment, and only the first
|
||||
// three vector params will be carried in SIMD registers. Now the
|
||||
// vectorcall convention could probably be problematic in globally visible
|
||||
// funcitons, but likely not in static ones.
|
||||
#if defined _MSC_VER && defined _WIN32 && !defined _WIN64
|
||||
#define FIX_W32 __vectorcall
|
||||
#else
|
||||
#define FIX_W32
|
||||
#endif
|
||||
|
||||
// These optimizations are based heavily on sao-generic.c.
|
||||
// Might be useful to check that if (when) this file
|
||||
// is difficult to understand.
|
||||
|
||||
static int32_t hsum_8x32b(const __m256i v)
|
||||
static int32_t FIX_W32 hsum_8x32b(const __m256i v)
|
||||
{
|
||||
__m256i sum1 = v;
|
||||
__m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
|
@ -69,7 +80,7 @@ static INLINE __m256i sign3_diff_epu8(const __m256i a, const __m256i b)
|
|||
}
|
||||
|
||||
// Mapping of edge_idx values to eo-classes, 32x8b at once
|
||||
static __m256i calc_eo_cat(const __m256i a,
|
||||
static __m256i FIX_W32 calc_eo_cat(const __m256i a,
|
||||
const __m256i b,
|
||||
const __m256i c)
|
||||
{
|
||||
|
@ -222,7 +233,7 @@ static INLINE __m256i broadcast_xmm2ymm(const __m128i v)
|
|||
}
|
||||
|
||||
// Used for edge_ddistortion and band_ddistortion
|
||||
static __m256i calc_diff_off_delta(const __m256i diff_lo,
|
||||
static __m256i FIX_W32 calc_diff_off_delta(const __m256i diff_lo,
|
||||
const __m256i diff_hi,
|
||||
const __m256i offsets,
|
||||
const __m256i orig)
|
||||
|
@ -266,7 +277,7 @@ static __m256i calc_diff_off_delta(const __m256i diff_lo,
|
|||
return _mm256_add_epi32 (sum0, sum1);
|
||||
}
|
||||
|
||||
static INLINE __m256i do_one_edge_ymm(const __m256i a,
|
||||
static INLINE __m256i FIX_W32 do_one_edge_ymm(const __m256i a,
|
||||
const __m256i b,
|
||||
const __m256i c,
|
||||
const __m256i orig,
|
||||
|
@ -387,7 +398,7 @@ static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data,
|
|||
return hsum_8x32b(sum);
|
||||
}
|
||||
|
||||
static void calc_edge_dir_one_ymm(const __m256i a,
|
||||
static void FIX_W32 calc_edge_dir_one_ymm(const __m256i a,
|
||||
const __m256i b,
|
||||
const __m256i c,
|
||||
const __m256i orig,
|
||||
|
@ -684,7 +695,7 @@ static INLINE void reconstruct_color_band(const encoder_control_t *encoder,
|
|||
}
|
||||
}
|
||||
|
||||
static __m256i do_one_nonband_ymm(const __m256i a,
|
||||
static __m256i FIX_W32 do_one_nonband_ymm(const __m256i a,
|
||||
const __m256i b,
|
||||
const __m256i c,
|
||||
const __m256i sao_offs)
|
||||
|
|
Loading…
Reference in a new issue