Move function hsum to file where it is used to avoid errors.

This commit is contained in:
siivonek 2020-04-02 14:03:06 +02:00
parent 58be514e2a
commit 566680af7b
2 changed files with 17 additions and 1 deletions

View file

@ -123,6 +123,7 @@ static INLINE void get_first_last_nz_int16(__m256i ints, int32_t *first, int32_t
*last = (31 - (int32_t)_lzcnt_u32(nonzero_bytes)) >> 1; *last = (31 - (int32_t)_lzcnt_u32(nonzero_bytes)) >> 1;
} }
/* MOVED TO SAO-AVX2.C WHERE THIS IS USED
int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v) int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v)
{ {
__m256i sum1 = v; __m256i sum1 = v;
@ -137,5 +138,5 @@ int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v)
int32_t sum9 = _mm_cvtsi128_si32 (sum8); int32_t sum9 = _mm_cvtsi128_si32 (sum8);
return sum9; return sum9;
} }
*/
#endif #endif

View file

@ -271,6 +271,21 @@ static INLINE __m256i FIX_W32 do_one_edge_ymm(const __m256i a,
return calc_diff_off_delta(diff_lo, diff_hi, offset, orig); return calc_diff_off_delta(diff_lo, diff_hi, offset, orig);
} }
int32_t FIX_W32 kvz_hsum_8x32b(const __m256i v)
{
__m256i sum1 = v;
__m256i sum2 = _mm256_permute4x64_epi64(sum1, _MM_SHUFFLE(1, 0, 3, 2));
__m256i sum3 = _mm256_add_epi32(sum1, sum2);
__m256i sum4 = _mm256_shuffle_epi32(sum3, _MM_SHUFFLE(1, 0, 3, 2));
__m256i sum5 = _mm256_add_epi32(sum3, sum4);
__m256i sum6 = _mm256_shuffle_epi32(sum5, _MM_SHUFFLE(2, 3, 0, 1));
__m256i sum7 = _mm256_add_epi32(sum5, sum6);
__m128i sum8 = _mm256_castsi256_si128(sum7);
int32_t sum9 = _mm_cvtsi128_si32(sum8);
return sum9;
}
static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, static int32_t sao_edge_ddistortion_avx2(const kvz_pixel *orig_data,
const kvz_pixel *rec_data, const kvz_pixel *rec_data,
int32_t block_width, int32_t block_width,