Inline fixed width vectorized SAD functions

This commit is contained in:
Pauli Oikkonen 2019-01-16 19:57:53 +02:00
parent 6504145cce
commit a711ce3df5
2 changed files with 8 additions and 8 deletions

View file

@ -4,7 +4,7 @@
#include "strategies/sse41/reg_sad_pow2_widths-sse41.h" #include "strategies/sse41/reg_sad_pow2_widths-sse41.h"
#include "kvazaar.h" #include "kvazaar.h"
static uint32_t reg_sad_w32(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w32(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {
@ -28,7 +28,7 @@ static uint32_t reg_sad_w32(const kvz_pixel * const data1, const kvz_pixel * con
return _mm_cvtsi128_si32(sad); return _mm_cvtsi128_si32(sad);
} }
static uint32_t reg_sad_w64(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w64(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {

View file

@ -4,7 +4,7 @@
#include <immintrin.h> #include <immintrin.h>
#include "kvazaar.h" #include "kvazaar.h"
static uint32_t reg_sad_w4(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w4(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {
@ -43,7 +43,7 @@ static uint32_t reg_sad_w4(const kvz_pixel * const data1, const kvz_pixel * cons
return _mm_cvtsi128_si32(sad); return _mm_cvtsi128_si32(sad);
} }
static uint32_t reg_sad_w8(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w8(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {
@ -82,7 +82,7 @@ static uint32_t reg_sad_w8(const kvz_pixel * const data1, const kvz_pixel * cons
return result; return result;
} }
static uint32_t reg_sad_w12(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w12(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {
@ -101,7 +101,7 @@ static uint32_t reg_sad_w12(const kvz_pixel * const data1, const kvz_pixel * con
return _mm_cvtsi128_si32(sad); return _mm_cvtsi128_si32(sad);
} }
static uint32_t reg_sad_w16(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w16(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {
@ -118,7 +118,7 @@ static uint32_t reg_sad_w16(const kvz_pixel * const data1, const kvz_pixel * con
return _mm_cvtsi128_si32(sad); return _mm_cvtsi128_si32(sad);
} }
static uint32_t reg_sad_w24(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_w24(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {
@ -170,7 +170,7 @@ static uint32_t reg_sad_w24(const kvz_pixel * const data1, const kvz_pixel * con
return _mm_cvtsi128_si32(sad); return _mm_cvtsi128_si32(sad);
} }
static uint32_t reg_sad_arbitrary(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_arbitrary(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t width, const int32_t height, const uint32_t stride1, const int32_t width, const int32_t height, const uint32_t stride1,
const uint32_t stride2) const uint32_t stride2)
{ {