Work around the ancient Win32 calling convention hassle

See if this'll work now
2024-12-18 03:04:06 +00:00 · 2019-09-06 12:53:45 +03:00 · 2019-09-06 12:53:45 +03:00 · 99597b828a
parent c5ca18950c
commit 99597b828a
1 changed files with 36 additions and 25 deletions
--- a/src/strategies/avx2/sao-avx2.c
+++ b/src/strategies/avx2/sao-avx2.c
@ -34,12 +34,23 @@
 #include "sao.h"
 #include "strategyselector.h"
 // The calling convention used by MSVC on 32-bit builds will essentially
 // disallow functions to have more than 3 XMM/YMM parameters, because it
 // will not provide more than 8-byte param alignment, and only the first
 // three vector params will be carried in SIMD registers. Now the
 // vectorcall convention could probably be problematic in globally visible
 // funcitons, but likely not in static ones.
 #if defined _MSC_VER && defined _WIN32 && !defined _WIN64
  #define FIX_W32 __vectorcall
 #else
  #define FIX_W32
 #endif
 // These optimizations are based heavily on sao-generic.c.
 // Might be useful to check that if (when) this file
 // is difficult to understand.
-static int32_t hsum_8x32b(const __m256i v)
+static int32_t FIX_W32 hsum_8x32b(const __m256i v)
 {
  __m256i sum1 = v;
  __m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2));
@ -69,9 +80,9 @@ static INLINE __m256i sign3_diff_epu8(const __m256i a, const __m256i b)
 }
 // Mapping of edge_idx values to eo-classes, 32x8b at once
-static __m256i calc_eo_cat(const __m256i a,
+static __m256i FIX_W32 calc_eo_cat(const __m256i a,
-                           const __m256i b,
+                                   const __m256i b,
-                           const __m256i c)
+                                   const __m256i c)
 {
  const __m256i twos       = _mm256_set1_epi8  (0x02);
  const __m256i idx_to_cat = _mm256_setr_epi64x(0x0403000201, 0,
@ -222,10 +233,10 @@ static INLINE __m256i broadcast_xmm2ymm(const __m128i v)
 }
 // Used for edge_ddistortion and band_ddistortion
-static __m256i calc_diff_off_delta(const __m256i diff_lo,
+static __m256i FIX_W32 calc_diff_off_delta(const __m256i diff_lo,
-                                   const __m256i diff_hi,
+                                           const __m256i diff_hi,
-                                   const __m256i offsets,
+                                           const __m256i offsets,
-                                   const __m256i orig)
+                                           const __m256i orig)
 {
  const __m256i zero          = _mm256_setzero_si256();
  const __m256i negate_hiword = _mm256_set1_epi32(0xffff0001);
@ -266,12 +277,12 @@ static __m256i calc_diff_off_delta(const __m256i diff_lo,
  return                 _mm256_add_epi32     (sum0,         sum1);
 }
-static INLINE __m256i do_one_edge_ymm(const __m256i a,
+static INLINE __m256i FIX_W32 do_one_edge_ymm(const __m256i a,
-                                      const __m256i b,
+                                              const __m256i b,
-                                      const __m256i c,
+                                              const __m256i c,
-                                      const __m256i orig,
+                                              const __m256i orig,
-                                      const __m256i badbyte_mask,
+                                              const __m256i badbyte_mask,
-                                      const __m256i offsets_256)
+                                              const __m256i offsets_256)
 {
  __m256i eo_cat = calc_eo_cat(a, b, c);
          eo_cat = _mm256_or_si256    (eo_cat,      badbyte_mask);
@ -387,13 +398,13 @@ static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data,
  return hsum_8x32b(sum);
 }
-static void calc_edge_dir_one_ymm(const __m256i  a,
+static void FIX_W32 calc_edge_dir_one_ymm(const __m256i  a,
-                                  const __m256i  b,
+                                          const __m256i  b,
-                                  const __m256i  c,
+                                          const __m256i  c,
-                                  const __m256i  orig,
+                                          const __m256i  orig,
-                                  const __m256i  badbyte_mask,
+                                          const __m256i  badbyte_mask,
-                                        __m256i *diff_accum,
+                                                __m256i *diff_accum,
-                                        int32_t *hit_cnt)
+                                                int32_t *hit_cnt)
 {
  const __m256i ones_16 = _mm256_set1_epi16(1);
        __m256i eo_cat  = calc_eo_cat      (a, b, c);
@ -684,10 +695,10 @@ static INLINE void reconstruct_color_band(const encoder_control_t *encoder,
  }
 }
-static __m256i do_one_nonband_ymm(const __m256i a,
+static __m256i FIX_W32 do_one_nonband_ymm(const __m256i a,
-                                  const __m256i b,
+                                          const __m256i b,
-                                  const __m256i c,
+                                          const __m256i c,
-                                  const __m256i sao_offs)
+                                          const __m256i sao_offs)
 {
  const __m256i zero = _mm256_setzero_si256();