Merge branch 'avx2-fix'

This commit is contained in:
Ari Koivula 2016-01-20 18:49:51 +02:00
commit ff3dea7385
4 changed files with 130 additions and 29 deletions

15
.gitignore vendored
View file

@ -33,21 +33,20 @@ Makefile.in
/stamp-h1 /stamp-h1
# Other files # Other files
*.exe *.a
*.o
*.lo
*.d *.d
*.dll
*.dylib
*.exe
*.la
*.lo
*.o
*.log *.log
.kdev4 .kdev4
configure.scan configure.scan
src/kvazaar src/kvazaar
src/libkvazaar.a
src/libkvazaar.la
src/libkvazaar.so.* src/libkvazaar.so.*
src/kvazaar.dll
src/libkvazaar.dll.a
src/libkvazaar.*.dylib
src/kvazaar.pc src/kvazaar.pc
tests/kvazaar_tests tests/kvazaar_tests
tests/kvazaar_tests.trs tests/kvazaar_tests.trs

View file

@ -39,6 +39,14 @@ AM_PROG_AR
LT_INIT([win32-dll]) LT_INIT([win32-dll])
AX_CHECK_COMPILE_FLAG(-mavx2, [flag_avx2="true"])
AX_CHECK_COMPILE_FLAG(-msse4.1, [flag_sse4_1="true"])
AX_CHECK_COMPILE_FLAG(-msse2, [flag_sse2="true"])
AM_CONDITIONAL([HAVE_AVX2], [test x"$flag_avx2" = x"true"])
AM_CONDITIONAL([HAVE_SSE4_1], [test x"$flag_sse4_1" = x"true"])
AM_CONDITIONAL([HAVE_SSE2], [test x"$flag_sse2" = x"true"])
AX_PTHREAD AX_PTHREAD
CFLAGS="-Wall -Wtype-limits -Wvla -I$srcdir/src -ftree-vectorize -fvisibility=hidden $PTHREAD_CFLAGS $CFLAGS" CFLAGS="-Wall -Wtype-limits -Wvla -I$srcdir/src -ftree-vectorize -fvisibility=hidden $PTHREAD_CFLAGS $CFLAGS"
CPPFLAGS="-DKVZ_DLL_EXPORTS $CPPFLAGS" CPPFLAGS="-DKVZ_DLL_EXPORTS $CPPFLAGS"

View file

@ -0,0 +1,74 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 4
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_VAR_IF(CACHEVAR,yes,
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS

View file

@ -1,4 +1,3 @@
bin_PROGRAMS = kvazaar bin_PROGRAMS = kvazaar
lib_LTLIBRARIES = libkvazaar.la lib_LTLIBRARIES = libkvazaar.la
@ -18,6 +17,11 @@ include_HEADERS = \
noinst_HEADERS = \ noinst_HEADERS = \
extras/x86inc.asm extras/x86inc.asm
noinst_LTLIBRARIES = \
libaltivec.la \
libavx2.la \
libsse2.la \
libsse41.la
kvazaar_SOURCES = \ kvazaar_SOURCES = \
encmain.c \ encmain.c \
@ -91,8 +95,6 @@ libkvazaar_la_SOURCES = \
transform.h \ transform.h \
videoframe.c \ videoframe.c \
videoframe.h \ videoframe.h \
strategies/altivec/picture-altivec.c \
strategies/altivec/picture-altivec.h \
strategies/generic/dct-generic.c \ strategies/generic/dct-generic.c \
strategies/generic/dct-generic.h \ strategies/generic/dct-generic.h \
strategies/generic/intra-generic.c \ strategies/generic/intra-generic.c \
@ -105,10 +107,6 @@ libkvazaar_la_SOURCES = \
strategies/generic/picture-generic.h \ strategies/generic/picture-generic.h \
strategies/generic/quant-generic.c \ strategies/generic/quant-generic.c \
strategies/generic/quant-generic.h \ strategies/generic/quant-generic.h \
strategies/sse2/picture-sse2.c \
strategies/sse2/picture-sse2.h \
strategies/sse41/picture-sse41.c \
strategies/sse41/picture-sse41.h \
strategies/strategies-common.h \ strategies/strategies-common.h \
strategies/strategies-dct.c \ strategies/strategies-dct.c \
strategies/strategies-dct.h \ strategies/strategies-dct.h \
@ -127,7 +125,20 @@ libkvazaar_la_SOURCES = \
strategyselector.c \ strategyselector.c \
strategyselector.h strategyselector.h
avx2_sources = \ libkvazaar_la_LIBADD = \
libaltivec.la \
libavx2.la \
libsse2.la \
libsse41.la
libkvazaar_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-number $(KVZ_API_VERSION)
libaltivec_la_SOURCES = \
strategies/altivec/picture-altivec.c \
strategies/altivec/picture-altivec.h
libavx2_la_SOURCES = \
strategies/avx2/dct-avx2.c \ strategies/avx2/dct-avx2.c \
strategies/avx2/dct-avx2.h \ strategies/avx2/dct-avx2.h \
strategies/avx2/intra-avx2.c \ strategies/avx2/intra-avx2.c \
@ -139,36 +150,45 @@ avx2_sources = \
strategies/avx2/quant-avx2.c \ strategies/avx2/quant-avx2.c \
strategies/avx2/quant-avx2.h strategies/avx2/quant-avx2.h
libkvazaar_la_SOURCES += $(avx2_sources) libsse2_la_SOURCES = \
libkvazaar_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-number $(KVZ_API_VERSION) strategies/sse2/picture-sse2.c \
strategies/sse2/picture-sse2.h
libsse41_la_SOURCES = \
strategies/sse41/picture-sse41.c \
strategies/sse41/picture-sse41.h
if HAVE_PPC if HAVE_PPC
libaltivec_la_CFLAGS = -maltivec
strategies/altivec/picture-altivec.lo: CFLAGS += -maltivec endif
else #HAVE_PPC
if HAVE_X86 if HAVE_X86
strategies/sse2/picture-sse2.lo: CFLAGS += -msse2
strategies/sse41/picture-sse41.lo: CFLAGS += -msse4.1 if HAVE_AVX2
$(avx2_sources): CFLAGS += -mavx2 libavx2_la_CFLAGS = -mavx2
endif
if HAVE_SSE4_1
libsse41_la_CFLAGS = -msse4.1
endif
if HAVE_SSE2
libsse2_la_CFLAGS = -msse2
endif
if ENABLE_ASM if ENABLE_ASM
libkvazaar_la_SOURCES += \ noinst_LTLIBRARIES += libasm.la
libkvazaar_la_LIBADD += libasm.la
libasm_la_SOURCES = \
strategies/x86_asm/picture-x86-asm-sad.asm \ strategies/x86_asm/picture-x86-asm-sad.asm \
strategies/x86_asm/picture-x86-asm-sad.h \ strategies/x86_asm/picture-x86-asm-sad.h \
strategies/x86_asm/picture-x86-asm-satd.asm \ strategies/x86_asm/picture-x86-asm-satd.asm \
strategies/x86_asm/picture-x86-asm-satd.h strategies/x86_asm/picture-x86-asm-satd.h
libasm_la_CFLAGS = -DKVZ_COMPILE_ASM
strategies/x86_asm/picture-x86-asm.lo: CFLAGS += -DKVZ_COMPILE_ASM
strategies/x86_asm/picture-x86-asm-sad.lo: strategies/x86_asm/picture-x86-asm-sad.asm strategies/x86_asm/picture-x86-asm-sad.lo: strategies/x86_asm/picture-x86-asm-sad.asm
strategies/x86_asm/picture-x86-asm-satd.lo: strategies/x86_asm/picture-x86-asm-satd.asm strategies/x86_asm/picture-x86-asm-satd.lo: strategies/x86_asm/picture-x86-asm-satd.asm
endif #ENABLE_ASM endif #ENABLE_ASM
endif #HAVE_X86 endif #HAVE_X86
endif #HAVE_PPC
yasm_verbose = $(yasm_verbose_@AM_V@) yasm_verbose = $(yasm_verbose_@AM_V@)
yasm_verbose_ = $(yasm_verbose_@AM_DEFAULT_V@) yasm_verbose_ = $(yasm_verbose_@AM_DEFAULT_V@)