Strategies and runtime detection/choice of best algorithm

This commit is contained in:
Laurent Fasnacht 2014-04-29 10:14:42 +02:00
parent 2a17e9a7aa
commit bf7e755cf7
10 changed files with 555 additions and 66 deletions

View file

@ -50,7 +50,7 @@ LDFLAGS += -lm
LD = gcc -fopenmp
YASM = yasm
ASMOBJS = cpu.o
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o tables.o transform.o
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o strategyselector.o tables.o transform.o
PROG = ./kvazaar
PROGS = $(PROG)

View file

@ -42,9 +42,7 @@
#include "picture.h"
#include "transform.h"
#include "scalinglist.h"
// Assembly optimization headers
#include "x86/cpu.h"
#include "strategyselector.h"
/**
* \brief Program main function.
@ -54,9 +52,6 @@
*/
int main(int argc, char *argv[])
{
int ecx = 0,edx =0;
/* CPU feature bits */
enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28};
config *cfg = NULL; //!< Global configuration
FILE *input = NULL; //!< input file (YUV)
FILE *output = NULL; //!< output file (HEVC NAL stream)
@ -77,6 +72,13 @@ int main(int argc, char *argv[])
_setmode( _fileno( stderr ), _O_TEXT );
#endif
//Initialize strategies
if (!strategyselector_init()) {
fprintf(stderr, "Failed to initialize strategies.\n");
return EXIT_FAILURE;
}
// Handle configuration
cfg = config_alloc();
@ -179,21 +181,6 @@ int main(int argc, char *argv[])
goto exit_failure;
}
// Dig CPU features with cpuid
kvz_cpu_cpuid(&ecx,&edx);
fprintf(stderr, "CPU features enabled: ");
// EDX
if (edx & (1<<BIT_MMX)) fprintf(stderr, "MMX ");
if (edx & (1<<BIT_SSE)) fprintf(stderr, "SSE ");
if (edx & (1<<BIT_SSE2)) fprintf(stderr, "SSE2 ");
// ECX
if (ecx & (1<<BIT_SSE3)) fprintf(stderr, "SSE3 ");
if (ecx & (1<<BIT_SSSE3)) fprintf(stderr, "SSSE3 ");
if (ecx & (1<<BIT_SSE41)) fprintf(stderr, "SSE4.1 ");
if (ecx & (1<<BIT_SSE42)) fprintf(stderr, "SSE4.2 ");
if (ecx & (1<<BIT_AVX)) fprintf(stderr, "AVX ");
fprintf(stderr, "\n");
// Check if the input file name is a dash, this means stdin
if (!strcmp(cfg->input, "-")) {
input = stdin;
@ -390,6 +377,8 @@ int main(int argc, char *argv[])
encoder_control_finalize(&encoder);
free_exp_golomb();
strategyselector_free();
return EXIT_SUCCESS;
@ -398,5 +387,6 @@ exit_failure:
if (input) fclose(input);
if (output) fclose(output);
if (recout) fclose(recout);
strategyselector_free();
return EXIT_FAILURE;
}

View file

@ -22,6 +22,7 @@
*/
#include "picture.h"
#include "strategyselector.h"
#include <string.h>
#include <stdio.h>
@ -769,42 +770,6 @@ static unsigned hor_sad(const pixel *pic_data, const pixel *ref_data,
}
#if defined(__SSE2__)
#include "inline-optimizations/picture-sse2.c"
#elif defined(__ALTIVEC__)
#include "picture-altivec.c"
#else
//Generic implementations
/**
* \brief Calculate Sum of Absolute Differences (SAD)
*
* Calculate Sum of Absolute Differences (SAD) between two rectangular regions
* located in arbitrary points in the picture.
*
* \param data1 Starting point of the first picture.
* \param data2 Starting point of the second picture.
* \param width Width of the region for which SAD is calculated.
* \param height Height of the region for which SAD is calculated.
* \param stride Width of the pixel array.
*
* \returns Sum of Absolute Differences
*/
static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
const int width, const int height, const unsigned stride1, const unsigned stride2)
{
int y, x;
unsigned sad = 0;
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
}
}
return sad;
}
#endif
/**
* \brief Handle special cases of comparing blocks that are not completely
* inside the frame.

View file

@ -0,0 +1,60 @@
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Kvazaar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
/*
* \file
*/
#include <stdlib.h>
#include "../strategyselector.h"
/**
* \brief Calculate Sum of Absolute Differences (SAD)
*
* Calculate Sum of Absolute Differences (SAD) between two rectangular regions
* located in arbitrary points in the picture.
*
* \param data1 Starting point of the first picture.
* \param data2 Starting point of the second picture.
* \param width Width of the region for which SAD is calculated.
* \param height Height of the region for which SAD is calculated.
* \param stride Width of the pixel array.
*
* \returns Sum of Absolute Differences
*/
static unsigned reg_sad_generic(const pixel * const data1, const pixel * const data2,
const int width, const int height, const unsigned stride1, const unsigned stride2)
{
int y, x;
unsigned sad = 0;
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
}
}
return sad;
}
static int strategy_register_picture_generic(void* opaque) {
return strategyselector_register(opaque, "reg_sad", "generic", 0, &reg_sad_generic);
}

View file

@ -0,0 +1,56 @@
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Kvazaar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
/*
* \file
*/
#include "../strategyselector.h"
#include "../picture.h"
#include <immintrin.h>
#include <assert.h>
__attribute__ ((__target__ ("sse2")))
static unsigned reg_sad_sse2(const pixel * const data1, const pixel * const data2,
const int width, const int height, const unsigned stride1, const unsigned stride2)
{
int y, x;
unsigned sad = 0;
__m128i sse_inc = _mm_setzero_si128 ();
long long int sse_inc_array[2];
for (y = 0; y < height; ++y) {
for (x = 0; x <= width-16; x+=16) {
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
}
for (; x < width; ++x) {
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
}
}
_mm_storeu_si128((__m128i*) sse_inc_array, sse_inc);
sad += sse_inc_array[0] + sse_inc_array[1];
return sad;
}
static int strategy_register_picture_sse2(void* opaque) {
return strategyselector_register(opaque, "reg_sad", "sse2", 10, &reg_sad_sse2);
}

View file

@ -20,12 +20,13 @@
/*
* \file
*/
#include "../strategyselector.h"
#include "../picture.h"
#include <immintrin.h>
#include <assert.h>
#ifdef __SSE2__
static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
__attribute__ ((__target__ ("sse2,sse4.1")))
static unsigned reg_sad_sse41(const pixel * const data1, const pixel * const data2,
const int width, const int height, const unsigned stride1, const unsigned stride2)
{
int y, x;
@ -40,7 +41,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
}
#ifdef __SSE4_1__
{
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
@ -74,7 +74,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
}
x = (width - (width%2));
}
#endif //__SSE4_1__
for (; x < width; ++x) {
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
@ -85,6 +84,7 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
return sad;
}
#else
#error picture-sse2.c requires __SSE2__
#endif //__SSE2__
static int strategy_register_picture_sse41(void* opaque) {
return strategyselector_register(opaque, "reg_sad", "sse41", 20, &reg_sad_sse41);
}

27
src/strategies/picture.c Normal file
View file

@ -0,0 +1,27 @@
#include "picture-generic.c"
#if COMPILE_INTEL_SSE2
#include "picture-sse2.c"
#endif
#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41
#include "picture-sse41.c"
#endif
unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2,
const int width, const int height, const unsigned stride1, const unsigned stride2);
static int strategy_register_picture(void* opaque) {
if (!strategy_register_picture_generic(opaque)) return 0;
if (g_hardware_flags.intel_flags.sse2) {
#if COMPILE_INTEL_SSE2
if (!strategy_register_picture_sse2(opaque)) return 0;
#endif
if (g_hardware_flags.intel_flags.sse41) {
#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41
if (!strategy_register_picture_sse41(opaque)) return 0;
#endif
}
}
return 1;
}

30
src/strategies/picture.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef STRATEGIES_PICTURE_H_
#define STRATEGIES_PICTURE_H_
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Kvazaar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "../picture.h"
//Function pointer to reg_sad
extern unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2,
const int width, const int height, const unsigned stride1, const unsigned stride2);
#define STRATEGIES_PICTURE_EXPORTS {"reg_sad", (void**) &reg_sad}
#endif //STRATEGIES_PICTURE_H_

220
src/strategyselector.c Normal file
View file

@ -0,0 +1,220 @@
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Kvazaar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
/*
* \file
*/
#include <string.h>
#include "strategyselector.h"
hardware_flags g_hardware_flags;
static void set_hardware_flags();
static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type);
//Strategies to include (add new file here)
#include "strategies/picture.c"
//Returns 1 if successful
int strategyselector_init() {
const strategy_to_select *cur_strategy_to_select = strategies_to_select;
strategy_list strategies;
strategies.allocated = 0;
strategies.count = 0;
strategies.strategies = NULL;
set_hardware_flags();
//Add new register function here
if (!strategy_register_picture(&strategies)) {
fprintf(stderr, "strategy_register_picture failed!\n");
return 0;
}
while(cur_strategy_to_select->fptr) {
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
if (!(*(cur_strategy_to_select->fptr))) {
fprintf(stderr, "Could not find a strategy for %s!\n", cur_strategy_to_select->strategy_type);
return 0;
}
++cur_strategy_to_select;
}
//We can free the structure now, as all strategies are statically set to pointers
if (strategies.allocated) {
free(strategies.strategies);
}
return 1;
}
void strategyselector_free() {
//Do nothing (yet)
}
//Returns 1 if successful, 0 otherwise
int strategyselector_register(void * const opaque, const char * const type, const char * const strategy_name, int priority, void * const fptr) {
strategy_list * const strategies = opaque;
if (strategies->allocated == strategies->count) {
strategy* new_strategies = realloc(strategies->strategies, sizeof(strategy) * (strategies->allocated + STRATEGY_LIST_ALLOC_SIZE));
if (!new_strategies) {
fprintf(stderr, "Could not increase strategies list size!\n");
return 0;
}
strategies->strategies = new_strategies;
strategies->allocated += STRATEGY_LIST_ALLOC_SIZE;
}
{
strategy *new_strategy = &strategies->strategies[strategies->count++];
new_strategy->type = type;
new_strategy->strategy_name = strategy_name;
new_strategy->priority = priority;
new_strategy->fptr = fptr;
}
#ifdef _DEBUG
fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr);
#endif //_DEBUG
return 1;
}
static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type) {
unsigned int max_priority = 0;
int max_priority_i = -1;
char buffer[256];
char *override = NULL;
int i = 0;
snprintf(buffer, 255, "KVAZAAR_OVERRIDE_%s", strategy_type);
override = getenv(buffer);
for (i=0; i < strategies->count; ++i) {
if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
if (override && strcmp(strategies->strategies[i].strategy_name, override) == 0) {
fprintf(stderr, "%s environment variable present, choosing %s:%s\n", buffer, strategy_type, strategies->strategies[i].strategy_name);
return strategies->strategies[i].fptr;
}
if (strategies->strategies[i].priority >= max_priority) {
max_priority_i = i;
max_priority = strategies->strategies[i].priority;
}
}
}
if (override) {
fprintf(stderr, "%s environment variable present, but no strategy %s was found!\n", buffer, override);
return NULL;
}
#ifdef _DEBUG
fprintf(stderr, "Choosing strategy for %s:\n", strategy_type);
for (i=0; i < strategies->count; ++i) {
if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
if (i != max_priority_i) {
fprintf(stderr, "- %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
} else {
fprintf(stderr, "> %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
}
}
}
#endif //_DEBUG
if (max_priority_i == -1) {
return NULL;
}
return strategies->strategies[max_priority_i].fptr;
}
#if COMPILE_INTEL
#include "x86/cpu.h"
#endif
static void set_hardware_flags() {
memset(&g_hardware_flags, 0, sizeof(g_hardware_flags));
g_hardware_flags.arm = COMPILE_ARM;
g_hardware_flags.intel = COMPILE_INTEL;
g_hardware_flags.powerpc = COMPILE_POWERPC;
#if COMPILE_INTEL
{
int ecx = 0,edx =0;
/* CPU feature bits */
enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28};
// Dig CPU features with cpuid
kvz_cpu_cpuid(&ecx,&edx);
// EDX
if (edx & (1<<BIT_MMX)) g_hardware_flags.intel_flags.mmx = 1;
if (edx & (1<<BIT_SSE)) g_hardware_flags.intel_flags.sse = 1;
if (edx & (1<<BIT_SSE2)) g_hardware_flags.intel_flags.sse2 = 1;
// ECX
if (ecx & (1<<BIT_SSE3)) g_hardware_flags.intel_flags.sse3 = 1;;
if (ecx & (1<<BIT_SSSE3)) g_hardware_flags.intel_flags.ssse3 = 1;
if (ecx & (1<<BIT_SSE41)) g_hardware_flags.intel_flags.sse41 = 1;
if (ecx & (1<<BIT_SSE42)) g_hardware_flags.intel_flags.sse42 = 1;
if (ecx & (1<<BIT_AVX)) g_hardware_flags.intel_flags.avx = 1;
fprintf(stderr, "Compiled: INTEL, flags:");
#if COMPILE_INTEL_MMX
fprintf(stderr, " MMX");
#endif
#if COMPILE_INTEL_SSE
fprintf(stderr, " SSE");
#endif
#if COMPILE_INTEL_SSE2
fprintf(stderr, " SSE2");
#endif
#if COMPILE_INTEL_SSE3
fprintf(stderr, " SSE3");
#endif
#if COMPILE_INTEL_SSSE3
fprintf(stderr, " SSSE3");
#endif
#if COMPILE_INTEL_SSE41
fprintf(stderr, " SSE41");
#endif
#if COMPILE_INTEL_SSE42
fprintf(stderr, " SSE42");
#endif
#if COMPILE_INTEL_AVX
fprintf(stderr, " AVX");
#endif
fprintf(stderr, "\nRun on : INTEL, flags:");
if (g_hardware_flags.intel_flags.mmx) fprintf(stderr, " MMX");
if (g_hardware_flags.intel_flags.sse) fprintf(stderr, " SSE");
if (g_hardware_flags.intel_flags.sse2) fprintf(stderr, " SSE2");
if (g_hardware_flags.intel_flags.sse3) fprintf(stderr, " SSE3");
if (g_hardware_flags.intel_flags.ssse3) fprintf(stderr, " SSSE3");
if (g_hardware_flags.intel_flags.sse41) fprintf(stderr, " SSE41");
if (g_hardware_flags.intel_flags.sse42) fprintf(stderr, " SSE42");
if (g_hardware_flags.intel_flags.avx) fprintf(stderr, " AVX");
fprintf(stderr, "\n");
}
#endif //COMPILE_INTEL
}

141
src/strategyselector.h Normal file
View file

@ -0,0 +1,141 @@
#ifndef STRATEGYSELECTOR_H_
#define STRATEGYSELECTOR_H_
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Kvazaar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
//Hardware data (abstraction of defines). Extend for other compilers
#if defined(_M_IX86) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__)
#define COMPILE_INTEL 1
#if defined(__MMX__)
#define COMPILE_INTEL_MMX 1
#endif
#if defined(__SSE__)
#define COMPILE_INTEL_SSE 1
#endif
#if defined(__SSE2__)
#define COMPILE_INTEL_SSE2 1
#endif
#if defined(__SSE3__)
#define COMPILE_INTEL_SSE3 1
#endif
#if defined(__SSSE3__)
#define COMPILE_INTEL_SSSE3 1
#endif
#if defined(__SSE4_1__)
#define COMPILE_INTEL_SSE41 1
#endif
#if defined(__SSE4_2__)
#define COMPILE_INTEL_SSE42 1
#endif
#if defined(__AVX__)
#define COMPILE_INTEL_AVX 1
#endif
#else
#define COMPILE_INTEL 0
#endif
#if defined (_M_PPC) || defined(__powerpc64__) || defined(__powerpc__)
#define COMPILE_POWERPC 1
#else
#define COMPILE_POWERPC 0
#endif
#if defined (_M_ARM) || defined(__arm__) || defined(__thumb__)
#define COMPILE_ARM 1
#else
#define COMPILE_ARM 0
#endif
typedef struct {
const char *type; //Type of the function, usually its name
const char *strategy_name; //Name of the strategy (e.g. sse2)
unsigned int priority; //Priority. 0 = lowest (default strategy)
void *fptr; //Pointer to the function
} strategy;
typedef struct {
unsigned int count;
unsigned int allocated;
strategy* strategies;
} strategy_list;
#define STRATEGY_LIST_ALLOC_SIZE 16
typedef struct {
const char *strategy_type;
void **fptr;
} strategy_to_select;
typedef struct {
int intel;
struct {
int mmx;
int sse;
int sse2;
int sse3;
int ssse3;
int sse41;
int sse42;
int avx;
} intel_flags;
int powerpc;
struct {
int altivec;
} powerpc_flags;
int arm;
struct {
int neon;
} arm_flags;
} hardware_flags;
extern hardware_flags g_hardware_flags;
int strategyselector_init();
void strategyselector_free();
int strategyselector_register(void *opaque, const char *type, const char *strategy_name, int priority, void *fptr);
//Strategy to include
#include "strategies/picture.h"
static const strategy_to_select strategies_to_select[] = {
STRATEGIES_PICTURE_EXPORTS,
{NULL, NULL},
};
#endif //STRATEGYSELECTOR_H_