diff --git a/src/Makefile b/src/Makefile index e6ff36d0..fb86b2d5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -50,7 +50,7 @@ LDFLAGS += -lm LD = gcc -fopenmp YASM = yasm ASMOBJS = cpu.o -OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o tables.o transform.o +OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o strategyselector.o tables.o transform.o PROG = ./kvazaar PROGS = $(PROG) diff --git a/src/encmain.c b/src/encmain.c index 39adff3f..9ef1da5b 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -42,9 +42,7 @@ #include "picture.h" #include "transform.h" #include "scalinglist.h" - -// Assembly optimization headers -#include "x86/cpu.h" +#include "strategyselector.h" /** * \brief Program main function. @@ -54,9 +52,6 @@ */ int main(int argc, char *argv[]) { - int ecx = 0,edx =0; - /* CPU feature bits */ - enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28}; config *cfg = NULL; //!< Global configuration FILE *input = NULL; //!< input file (YUV) FILE *output = NULL; //!< output file (HEVC NAL stream) @@ -77,6 +72,13 @@ int main(int argc, char *argv[]) _setmode( _fileno( stderr ), _O_TEXT ); #endif + //Initialize strategies + if (!strategyselector_init()) { + fprintf(stderr, "Failed to initialize strategies.\n"); + return EXIT_FAILURE; + } + + // Handle configuration cfg = config_alloc(); @@ -179,21 +181,6 @@ int main(int argc, char *argv[]) goto exit_failure; } - // Dig CPU features with cpuid - kvz_cpu_cpuid(&ecx,&edx); - fprintf(stderr, "CPU features enabled: "); - // EDX - if (edx & (1<input, "-")) { input = stdin; @@ -390,6 +377,8 @@ int main(int argc, char *argv[]) encoder_control_finalize(&encoder); free_exp_golomb(); + + strategyselector_free(); return EXIT_SUCCESS; @@ -398,5 +387,6 @@ exit_failure: if (input) fclose(input); if (output) fclose(output); if (recout) fclose(recout); + strategyselector_free(); return EXIT_FAILURE; } diff --git a/src/picture.c b/src/picture.c index 8603a9db..09922db3 100644 --- a/src/picture.c +++ b/src/picture.c @@ -22,6 +22,7 @@ */ #include "picture.h" +#include "strategyselector.h" #include #include @@ -769,42 +770,6 @@ static unsigned hor_sad(const pixel *pic_data, const pixel *ref_data, } -#if defined(__SSE2__) -#include "inline-optimizations/picture-sse2.c" -#elif defined(__ALTIVEC__) -#include "picture-altivec.c" -#else -//Generic implementations -/** - * \brief Calculate Sum of Absolute Differences (SAD) - * - * Calculate Sum of Absolute Differences (SAD) between two rectangular regions - * located in arbitrary points in the picture. - * - * \param data1 Starting point of the first picture. - * \param data2 Starting point of the second picture. - * \param width Width of the region for which SAD is calculated. - * \param height Height of the region for which SAD is calculated. - * \param stride Width of the pixel array. - * - * \returns Sum of Absolute Differences - */ -static unsigned reg_sad(const pixel * const data1, const pixel * const data2, - const int width, const int height, const unsigned stride1, const unsigned stride2) -{ - int y, x; - unsigned sad = 0; - - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]); - } - } - - return sad; -} -#endif - /** * \brief Handle special cases of comparing blocks that are not completely * inside the frame. diff --git a/src/strategies/picture-generic.c b/src/strategies/picture-generic.c new file mode 100644 index 00000000..028f9a00 --- /dev/null +++ b/src/strategies/picture-generic.c @@ -0,0 +1,60 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2014 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see . + ****************************************************************************/ + +/* + * \file + */ + +#include + +#include "../strategyselector.h" + + +/** + * \brief Calculate Sum of Absolute Differences (SAD) + * + * Calculate Sum of Absolute Differences (SAD) between two rectangular regions + * located in arbitrary points in the picture. + * + * \param data1 Starting point of the first picture. + * \param data2 Starting point of the second picture. + * \param width Width of the region for which SAD is calculated. + * \param height Height of the region for which SAD is calculated. + * \param stride Width of the pixel array. + * + * \returns Sum of Absolute Differences + */ +static unsigned reg_sad_generic(const pixel * const data1, const pixel * const data2, + const int width, const int height, const unsigned stride1, const unsigned stride2) +{ + int y, x; + unsigned sad = 0; + + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]); + } + } + + return sad; +} + +static int strategy_register_picture_generic(void* opaque) { + return strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic); +} diff --git a/src/strategies/picture-sse2.c b/src/strategies/picture-sse2.c new file mode 100644 index 00000000..f9bf92ec --- /dev/null +++ b/src/strategies/picture-sse2.c @@ -0,0 +1,56 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2014 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see . + ****************************************************************************/ + +/* + * \file + */ +#include "../strategyselector.h" +#include "../picture.h" +#include +#include + +__attribute__ ((__target__ ("sse2"))) +static unsigned reg_sad_sse2(const pixel * const data1, const pixel * const data2, + const int width, const int height, const unsigned stride1, const unsigned stride2) +{ + int y, x; + unsigned sad = 0; + __m128i sse_inc = _mm_setzero_si128 (); + long long int sse_inc_array[2]; + + for (y = 0; y < height; ++y) { + for (x = 0; x <= width-16; x+=16) { + const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]); + const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]); + sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b)); + } + + for (; x < width; ++x) { + sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]); + } + } + _mm_storeu_si128((__m128i*) sse_inc_array, sse_inc); + sad += sse_inc_array[0] + sse_inc_array[1]; + + return sad; +} + +static int strategy_register_picture_sse2(void* opaque) { + return strategyselector_register(opaque, "reg_sad", "sse2", 10, ®_sad_sse2); +} diff --git a/src/inline-optimizations/picture-sse2.c b/src/strategies/picture-sse41.c similarity index 89% rename from src/inline-optimizations/picture-sse2.c rename to src/strategies/picture-sse41.c index 8de61f92..e1b0300d 100644 --- a/src/inline-optimizations/picture-sse2.c +++ b/src/strategies/picture-sse41.c @@ -20,12 +20,13 @@ /* * \file */ - +#include "../strategyselector.h" #include "../picture.h" #include +#include -#ifdef __SSE2__ -static unsigned reg_sad(const pixel * const data1, const pixel * const data2, +__attribute__ ((__target__ ("sse2,sse4.1"))) +static unsigned reg_sad_sse41(const pixel * const data1, const pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { int y, x; @@ -40,7 +41,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2, sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b)); } -#ifdef __SSE4_1__ { const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]); const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]); @@ -74,7 +74,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2, } x = (width - (width%2)); } -#endif //__SSE4_1__ for (; x < width; ++x) { sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]); @@ -85,6 +84,7 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2, return sad; } -#else -#error picture-sse2.c requires __SSE2__ -#endif //__SSE2__ + +static int strategy_register_picture_sse41(void* opaque) { + return strategyselector_register(opaque, "reg_sad", "sse41", 20, ®_sad_sse41); +} diff --git a/src/strategies/picture.c b/src/strategies/picture.c new file mode 100644 index 00000000..900310f6 --- /dev/null +++ b/src/strategies/picture.c @@ -0,0 +1,27 @@ +#include "picture-generic.c" +#if COMPILE_INTEL_SSE2 +#include "picture-sse2.c" +#endif +#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41 +#include "picture-sse41.c" +#endif + +unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2, + const int width, const int height, const unsigned stride1, const unsigned stride2); + + +static int strategy_register_picture(void* opaque) { + if (!strategy_register_picture_generic(opaque)) return 0; + + if (g_hardware_flags.intel_flags.sse2) { +#if COMPILE_INTEL_SSE2 + if (!strategy_register_picture_sse2(opaque)) return 0; +#endif + if (g_hardware_flags.intel_flags.sse41) { +#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41 + if (!strategy_register_picture_sse41(opaque)) return 0; +#endif + } + } + return 1; +} diff --git a/src/strategies/picture.h b/src/strategies/picture.h new file mode 100644 index 00000000..2c80295e --- /dev/null +++ b/src/strategies/picture.h @@ -0,0 +1,30 @@ +#ifndef STRATEGIES_PICTURE_H_ +#define STRATEGIES_PICTURE_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2014 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see . + ****************************************************************************/ + +#include "../picture.h" + +//Function pointer to reg_sad +extern unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2, + const int width, const int height, const unsigned stride1, const unsigned stride2); + +#define STRATEGIES_PICTURE_EXPORTS {"reg_sad", (void**) ®_sad} + +#endif //STRATEGIES_PICTURE_H_ diff --git a/src/strategyselector.c b/src/strategyselector.c new file mode 100644 index 00000000..6e768674 --- /dev/null +++ b/src/strategyselector.c @@ -0,0 +1,220 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2014 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see . + ****************************************************************************/ + +/* + * \file + */ + +#include + +#include "strategyselector.h" + +hardware_flags g_hardware_flags; + +static void set_hardware_flags(); +static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type); + +//Strategies to include (add new file here) +#include "strategies/picture.c" + +//Returns 1 if successful +int strategyselector_init() { + const strategy_to_select *cur_strategy_to_select = strategies_to_select; + strategy_list strategies; + + strategies.allocated = 0; + strategies.count = 0; + strategies.strategies = NULL; + + set_hardware_flags(); + + //Add new register function here + if (!strategy_register_picture(&strategies)) { + fprintf(stderr, "strategy_register_picture failed!\n"); + return 0; + } + + while(cur_strategy_to_select->fptr) { + *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type); + + if (!(*(cur_strategy_to_select->fptr))) { + fprintf(stderr, "Could not find a strategy for %s!\n", cur_strategy_to_select->strategy_type); + return 0; + } + ++cur_strategy_to_select; + } + + //We can free the structure now, as all strategies are statically set to pointers + if (strategies.allocated) { + free(strategies.strategies); + } + + return 1; +} + +void strategyselector_free() { + //Do nothing (yet) +} + +//Returns 1 if successful, 0 otherwise +int strategyselector_register(void * const opaque, const char * const type, const char * const strategy_name, int priority, void * const fptr) { + strategy_list * const strategies = opaque; + + if (strategies->allocated == strategies->count) { + strategy* new_strategies = realloc(strategies->strategies, sizeof(strategy) * (strategies->allocated + STRATEGY_LIST_ALLOC_SIZE)); + if (!new_strategies) { + fprintf(stderr, "Could not increase strategies list size!\n"); + return 0; + } + strategies->strategies = new_strategies; + strategies->allocated += STRATEGY_LIST_ALLOC_SIZE; + } + + { + strategy *new_strategy = &strategies->strategies[strategies->count++]; + new_strategy->type = type; + new_strategy->strategy_name = strategy_name; + new_strategy->priority = priority; + new_strategy->fptr = fptr; + } +#ifdef _DEBUG + fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr); +#endif //_DEBUG + + return 1; +} + +static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type) { + unsigned int max_priority = 0; + int max_priority_i = -1; + char buffer[256]; + char *override = NULL; + int i = 0; + + snprintf(buffer, 255, "KVAZAAR_OVERRIDE_%s", strategy_type); + override = getenv(buffer); + + for (i=0; i < strategies->count; ++i) { + if (strcmp(strategies->strategies[i].type, strategy_type) == 0) { + if (override && strcmp(strategies->strategies[i].strategy_name, override) == 0) { + fprintf(stderr, "%s environment variable present, choosing %s:%s\n", buffer, strategy_type, strategies->strategies[i].strategy_name); + return strategies->strategies[i].fptr; + } + if (strategies->strategies[i].priority >= max_priority) { + max_priority_i = i; + max_priority = strategies->strategies[i].priority; + } + } + } + + if (override) { + fprintf(stderr, "%s environment variable present, but no strategy %s was found!\n", buffer, override); + return NULL; + } + +#ifdef _DEBUG + fprintf(stderr, "Choosing strategy for %s:\n", strategy_type); + for (i=0; i < strategies->count; ++i) { + if (strcmp(strategies->strategies[i].type, strategy_type) == 0) { + if (i != max_priority_i) { + fprintf(stderr, "- %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr); + } else { + fprintf(stderr, "> %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr); + } + } + } +#endif //_DEBUG + + + if (max_priority_i == -1) { + return NULL; + } + + return strategies->strategies[max_priority_i].fptr; +} + +#if COMPILE_INTEL +#include "x86/cpu.h" +#endif + +static void set_hardware_flags() { + memset(&g_hardware_flags, 0, sizeof(g_hardware_flags)); + + g_hardware_flags.arm = COMPILE_ARM; + g_hardware_flags.intel = COMPILE_INTEL; + g_hardware_flags.powerpc = COMPILE_POWERPC; + +#if COMPILE_INTEL + { + int ecx = 0,edx =0; + /* CPU feature bits */ + enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28}; + + // Dig CPU features with cpuid + kvz_cpu_cpuid(&ecx,&edx); + + // EDX + if (edx & (1<. + ****************************************************************************/ + +//Hardware data (abstraction of defines). Extend for other compilers + +#if defined(_M_IX86) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__) +#define COMPILE_INTEL 1 + +#if defined(__MMX__) +#define COMPILE_INTEL_MMX 1 +#endif + +#if defined(__SSE__) +#define COMPILE_INTEL_SSE 1 +#endif + +#if defined(__SSE2__) +#define COMPILE_INTEL_SSE2 1 +#endif + +#if defined(__SSE3__) +#define COMPILE_INTEL_SSE3 1 +#endif + +#if defined(__SSSE3__) +#define COMPILE_INTEL_SSSE3 1 +#endif + +#if defined(__SSE4_1__) +#define COMPILE_INTEL_SSE41 1 +#endif + +#if defined(__SSE4_2__) +#define COMPILE_INTEL_SSE42 1 +#endif + +#if defined(__AVX__) +#define COMPILE_INTEL_AVX 1 +#endif + +#else +#define COMPILE_INTEL 0 +#endif + +#if defined (_M_PPC) || defined(__powerpc64__) || defined(__powerpc__) +#define COMPILE_POWERPC 1 +#else +#define COMPILE_POWERPC 0 +#endif + +#if defined (_M_ARM) || defined(__arm__) || defined(__thumb__) +#define COMPILE_ARM 1 +#else +#define COMPILE_ARM 0 +#endif + + + +typedef struct { + const char *type; //Type of the function, usually its name + const char *strategy_name; //Name of the strategy (e.g. sse2) + unsigned int priority; //Priority. 0 = lowest (default strategy) + void *fptr; //Pointer to the function +} strategy; + +typedef struct { + unsigned int count; + unsigned int allocated; + strategy* strategies; +} strategy_list; + +#define STRATEGY_LIST_ALLOC_SIZE 16 + +typedef struct { + const char *strategy_type; + void **fptr; +} strategy_to_select; + +typedef struct { + int intel; + struct { + int mmx; + int sse; + int sse2; + int sse3; + int ssse3; + int sse41; + int sse42; + int avx; + } intel_flags; + + int powerpc; + struct { + int altivec; + } powerpc_flags; + + int arm; + struct { + int neon; + } arm_flags; +} hardware_flags; + +extern hardware_flags g_hardware_flags; + + +int strategyselector_init(); +void strategyselector_free(); +int strategyselector_register(void *opaque, const char *type, const char *strategy_name, int priority, void *fptr); + + +//Strategy to include +#include "strategies/picture.h" + +static const strategy_to_select strategies_to_select[] = { + STRATEGIES_PICTURE_EXPORTS, + {NULL, NULL}, +}; + + + + + +#endif //STRATEGYSELECTOR_H_