mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-03 21:44:06 +00:00
02cd17b427
Add implementations for these functions that process the image line by line instead of using the 16x16 function to process block by block. The 32x32 is around 30% faster, and 64x64 is around 15% faster, on Haswell. PASS inter_sad: 28.744M x reg_sad(32x32):x86_asm_avx (1014 ticks, 1.014 sec) PASS inter_sad: 7.882M x reg_sad(64x64):x86_asm_avx (1014 ticks, 1.014 sec) to PASS inter_sad: 37.828M x reg_sad(32x32):x86_asm_avx (1014 ticks, 1.014 sec) PASS inter_sad: 9.081M x reg_sad(64x64):x86_asm_avx (1014 ticks, 1.014 sec)
44 lines
1.8 KiB
C
44 lines
1.8 KiB
C
#ifndef _PICTURE_X86_ASM_SAD_H_
|
|
#define _PICTURE_X86_ASM_SAD_H_
|
|
/*****************************************************************************
|
|
* This file is part of Kvazaar HEVC encoder.
|
|
*
|
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
|
* COPYING file).
|
|
*
|
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
|
* the terms of the GNU Lesser General Public License as published by the
|
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
|
* option) any later version.
|
|
*
|
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
|
****************************************************************************/
|
|
|
|
/**
|
|
* \ingroup Optimization
|
|
* \file
|
|
* Optimizations for AVX, utilizing ASM implementations.
|
|
*/
|
|
|
|
#include "global.h" // IWYU pragma: keep
|
|
|
|
|
|
unsigned kvz_sad_4x4_avx(const kvz_pixel*, const kvz_pixel*);
|
|
unsigned kvz_sad_8x8_avx(const kvz_pixel*, const kvz_pixel*);
|
|
unsigned kvz_sad_16x16_avx(const kvz_pixel*, const kvz_pixel*);
|
|
|
|
unsigned kvz_sad_4x4_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
|
unsigned kvz_sad_8x8_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
|
unsigned kvz_sad_16x16_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
|
unsigned kvz_sad_32x32_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
|
unsigned kvz_sad_64x64_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
|
|
|
|
|
#endif
|