mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Strategies and runtime detection/choice of best algorithm
This commit is contained in:
parent
2a17e9a7aa
commit
bf7e755cf7
|
@ -50,7 +50,7 @@ LDFLAGS += -lm
|
||||||
LD = gcc -fopenmp
|
LD = gcc -fopenmp
|
||||||
YASM = yasm
|
YASM = yasm
|
||||||
ASMOBJS = cpu.o
|
ASMOBJS = cpu.o
|
||||||
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o tables.o transform.o
|
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o strategyselector.o tables.o transform.o
|
||||||
PROG = ./kvazaar
|
PROG = ./kvazaar
|
||||||
PROGS = $(PROG)
|
PROGS = $(PROG)
|
||||||
|
|
||||||
|
|
|
@ -42,9 +42,7 @@
|
||||||
#include "picture.h"
|
#include "picture.h"
|
||||||
#include "transform.h"
|
#include "transform.h"
|
||||||
#include "scalinglist.h"
|
#include "scalinglist.h"
|
||||||
|
#include "strategyselector.h"
|
||||||
// Assembly optimization headers
|
|
||||||
#include "x86/cpu.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Program main function.
|
* \brief Program main function.
|
||||||
|
@ -54,9 +52,6 @@
|
||||||
*/
|
*/
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int ecx = 0,edx =0;
|
|
||||||
/* CPU feature bits */
|
|
||||||
enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28};
|
|
||||||
config *cfg = NULL; //!< Global configuration
|
config *cfg = NULL; //!< Global configuration
|
||||||
FILE *input = NULL; //!< input file (YUV)
|
FILE *input = NULL; //!< input file (YUV)
|
||||||
FILE *output = NULL; //!< output file (HEVC NAL stream)
|
FILE *output = NULL; //!< output file (HEVC NAL stream)
|
||||||
|
@ -77,6 +72,13 @@ int main(int argc, char *argv[])
|
||||||
_setmode( _fileno( stderr ), _O_TEXT );
|
_setmode( _fileno( stderr ), _O_TEXT );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//Initialize strategies
|
||||||
|
if (!strategyselector_init()) {
|
||||||
|
fprintf(stderr, "Failed to initialize strategies.\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Handle configuration
|
// Handle configuration
|
||||||
cfg = config_alloc();
|
cfg = config_alloc();
|
||||||
|
|
||||||
|
@ -179,21 +181,6 @@ int main(int argc, char *argv[])
|
||||||
goto exit_failure;
|
goto exit_failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dig CPU features with cpuid
|
|
||||||
kvz_cpu_cpuid(&ecx,&edx);
|
|
||||||
fprintf(stderr, "CPU features enabled: ");
|
|
||||||
// EDX
|
|
||||||
if (edx & (1<<BIT_MMX)) fprintf(stderr, "MMX ");
|
|
||||||
if (edx & (1<<BIT_SSE)) fprintf(stderr, "SSE ");
|
|
||||||
if (edx & (1<<BIT_SSE2)) fprintf(stderr, "SSE2 ");
|
|
||||||
// ECX
|
|
||||||
if (ecx & (1<<BIT_SSE3)) fprintf(stderr, "SSE3 ");
|
|
||||||
if (ecx & (1<<BIT_SSSE3)) fprintf(stderr, "SSSE3 ");
|
|
||||||
if (ecx & (1<<BIT_SSE41)) fprintf(stderr, "SSE4.1 ");
|
|
||||||
if (ecx & (1<<BIT_SSE42)) fprintf(stderr, "SSE4.2 ");
|
|
||||||
if (ecx & (1<<BIT_AVX)) fprintf(stderr, "AVX ");
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
|
|
||||||
// Check if the input file name is a dash, this means stdin
|
// Check if the input file name is a dash, this means stdin
|
||||||
if (!strcmp(cfg->input, "-")) {
|
if (!strcmp(cfg->input, "-")) {
|
||||||
input = stdin;
|
input = stdin;
|
||||||
|
@ -390,6 +377,8 @@ int main(int argc, char *argv[])
|
||||||
encoder_control_finalize(&encoder);
|
encoder_control_finalize(&encoder);
|
||||||
|
|
||||||
free_exp_golomb();
|
free_exp_golomb();
|
||||||
|
|
||||||
|
strategyselector_free();
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
|
|
||||||
|
@ -398,5 +387,6 @@ exit_failure:
|
||||||
if (input) fclose(input);
|
if (input) fclose(input);
|
||||||
if (output) fclose(output);
|
if (output) fclose(output);
|
||||||
if (recout) fclose(recout);
|
if (recout) fclose(recout);
|
||||||
|
strategyselector_free();
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "picture.h"
|
#include "picture.h"
|
||||||
|
#include "strategyselector.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -769,42 +770,6 @@ static unsigned hor_sad(const pixel *pic_data, const pixel *ref_data,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
|
||||||
#include "inline-optimizations/picture-sse2.c"
|
|
||||||
#elif defined(__ALTIVEC__)
|
|
||||||
#include "picture-altivec.c"
|
|
||||||
#else
|
|
||||||
//Generic implementations
|
|
||||||
/**
|
|
||||||
* \brief Calculate Sum of Absolute Differences (SAD)
|
|
||||||
*
|
|
||||||
* Calculate Sum of Absolute Differences (SAD) between two rectangular regions
|
|
||||||
* located in arbitrary points in the picture.
|
|
||||||
*
|
|
||||||
* \param data1 Starting point of the first picture.
|
|
||||||
* \param data2 Starting point of the second picture.
|
|
||||||
* \param width Width of the region for which SAD is calculated.
|
|
||||||
* \param height Height of the region for which SAD is calculated.
|
|
||||||
* \param stride Width of the pixel array.
|
|
||||||
*
|
|
||||||
* \returns Sum of Absolute Differences
|
|
||||||
*/
|
|
||||||
static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
|
||||||
{
|
|
||||||
int y, x;
|
|
||||||
unsigned sad = 0;
|
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
|
||||||
for (x = 0; x < width; ++x) {
|
|
||||||
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sad;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Handle special cases of comparing blocks that are not completely
|
* \brief Handle special cases of comparing blocks that are not completely
|
||||||
* inside the frame.
|
* inside the frame.
|
||||||
|
|
60
src/strategies/picture-generic.c
Normal file
60
src/strategies/picture-generic.c
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \file
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "../strategyselector.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Calculate Sum of Absolute Differences (SAD)
|
||||||
|
*
|
||||||
|
* Calculate Sum of Absolute Differences (SAD) between two rectangular regions
|
||||||
|
* located in arbitrary points in the picture.
|
||||||
|
*
|
||||||
|
* \param data1 Starting point of the first picture.
|
||||||
|
* \param data2 Starting point of the second picture.
|
||||||
|
* \param width Width of the region for which SAD is calculated.
|
||||||
|
* \param height Height of the region for which SAD is calculated.
|
||||||
|
* \param stride Width of the pixel array.
|
||||||
|
*
|
||||||
|
* \returns Sum of Absolute Differences
|
||||||
|
*/
|
||||||
|
static unsigned reg_sad_generic(const pixel * const data1, const pixel * const data2,
|
||||||
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
|
{
|
||||||
|
int y, x;
|
||||||
|
unsigned sad = 0;
|
||||||
|
|
||||||
|
for (y = 0; y < height; ++y) {
|
||||||
|
for (x = 0; x < width; ++x) {
|
||||||
|
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sad;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int strategy_register_picture_generic(void* opaque) {
|
||||||
|
return strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic);
|
||||||
|
}
|
56
src/strategies/picture-sse2.c
Normal file
56
src/strategies/picture-sse2.c
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \file
|
||||||
|
*/
|
||||||
|
#include "../strategyselector.h"
|
||||||
|
#include "../picture.h"
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
__attribute__ ((__target__ ("sse2")))
|
||||||
|
static unsigned reg_sad_sse2(const pixel * const data1, const pixel * const data2,
|
||||||
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
|
{
|
||||||
|
int y, x;
|
||||||
|
unsigned sad = 0;
|
||||||
|
__m128i sse_inc = _mm_setzero_si128 ();
|
||||||
|
long long int sse_inc_array[2];
|
||||||
|
|
||||||
|
for (y = 0; y < height; ++y) {
|
||||||
|
for (x = 0; x <= width-16; x+=16) {
|
||||||
|
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
|
||||||
|
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
|
||||||
|
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; x < width; ++x) {
|
||||||
|
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_mm_storeu_si128((__m128i*) sse_inc_array, sse_inc);
|
||||||
|
sad += sse_inc_array[0] + sse_inc_array[1];
|
||||||
|
|
||||||
|
return sad;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int strategy_register_picture_sse2(void* opaque) {
|
||||||
|
return strategyselector_register(opaque, "reg_sad", "sse2", 10, ®_sad_sse2);
|
||||||
|
}
|
|
@ -20,12 +20,13 @@
|
||||||
/*
|
/*
|
||||||
* \file
|
* \file
|
||||||
*/
|
*/
|
||||||
|
#include "../strategyselector.h"
|
||||||
#include "../picture.h"
|
#include "../picture.h"
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#ifdef __SSE2__
|
__attribute__ ((__target__ ("sse2,sse4.1")))
|
||||||
static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
static unsigned reg_sad_sse41(const pixel * const data1, const pixel * const data2,
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
{
|
{
|
||||||
int y, x;
|
int y, x;
|
||||||
|
@ -40,7 +41,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
||||||
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
|
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE4_1__
|
|
||||||
{
|
{
|
||||||
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
|
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
|
||||||
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
|
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
|
||||||
|
@ -74,7 +74,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
||||||
}
|
}
|
||||||
x = (width - (width%2));
|
x = (width - (width%2));
|
||||||
}
|
}
|
||||||
#endif //__SSE4_1__
|
|
||||||
|
|
||||||
for (; x < width; ++x) {
|
for (; x < width; ++x) {
|
||||||
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||||
|
@ -85,6 +84,7 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
||||||
|
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
#error picture-sse2.c requires __SSE2__
|
static int strategy_register_picture_sse41(void* opaque) {
|
||||||
#endif //__SSE2__
|
return strategyselector_register(opaque, "reg_sad", "sse41", 20, ®_sad_sse41);
|
||||||
|
}
|
27
src/strategies/picture.c
Normal file
27
src/strategies/picture.c
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include "picture-generic.c"
|
||||||
|
#if COMPILE_INTEL_SSE2
|
||||||
|
#include "picture-sse2.c"
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41
|
||||||
|
#include "picture-sse41.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2,
|
||||||
|
const int width, const int height, const unsigned stride1, const unsigned stride2);
|
||||||
|
|
||||||
|
|
||||||
|
static int strategy_register_picture(void* opaque) {
|
||||||
|
if (!strategy_register_picture_generic(opaque)) return 0;
|
||||||
|
|
||||||
|
if (g_hardware_flags.intel_flags.sse2) {
|
||||||
|
#if COMPILE_INTEL_SSE2
|
||||||
|
if (!strategy_register_picture_sse2(opaque)) return 0;
|
||||||
|
#endif
|
||||||
|
if (g_hardware_flags.intel_flags.sse41) {
|
||||||
|
#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41
|
||||||
|
if (!strategy_register_picture_sse41(opaque)) return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
30
src/strategies/picture.h
Normal file
30
src/strategies/picture.h
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
#ifndef STRATEGIES_PICTURE_H_
|
||||||
|
#define STRATEGIES_PICTURE_H_
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include "../picture.h"
|
||||||
|
|
||||||
|
//Function pointer to reg_sad
|
||||||
|
extern unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2,
|
||||||
|
const int width, const int height, const unsigned stride1, const unsigned stride2);
|
||||||
|
|
||||||
|
#define STRATEGIES_PICTURE_EXPORTS {"reg_sad", (void**) ®_sad}
|
||||||
|
|
||||||
|
#endif //STRATEGIES_PICTURE_H_
|
220
src/strategyselector.c
Normal file
220
src/strategyselector.c
Normal file
|
@ -0,0 +1,220 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \file
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "strategyselector.h"
|
||||||
|
|
||||||
|
hardware_flags g_hardware_flags;
|
||||||
|
|
||||||
|
static void set_hardware_flags();
|
||||||
|
static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type);
|
||||||
|
|
||||||
|
//Strategies to include (add new file here)
|
||||||
|
#include "strategies/picture.c"
|
||||||
|
|
||||||
|
//Returns 1 if successful
|
||||||
|
int strategyselector_init() {
|
||||||
|
const strategy_to_select *cur_strategy_to_select = strategies_to_select;
|
||||||
|
strategy_list strategies;
|
||||||
|
|
||||||
|
strategies.allocated = 0;
|
||||||
|
strategies.count = 0;
|
||||||
|
strategies.strategies = NULL;
|
||||||
|
|
||||||
|
set_hardware_flags();
|
||||||
|
|
||||||
|
//Add new register function here
|
||||||
|
if (!strategy_register_picture(&strategies)) {
|
||||||
|
fprintf(stderr, "strategy_register_picture failed!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(cur_strategy_to_select->fptr) {
|
||||||
|
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
|
||||||
|
|
||||||
|
if (!(*(cur_strategy_to_select->fptr))) {
|
||||||
|
fprintf(stderr, "Could not find a strategy for %s!\n", cur_strategy_to_select->strategy_type);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
++cur_strategy_to_select;
|
||||||
|
}
|
||||||
|
|
||||||
|
//We can free the structure now, as all strategies are statically set to pointers
|
||||||
|
if (strategies.allocated) {
|
||||||
|
free(strategies.strategies);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void strategyselector_free() {
|
||||||
|
//Do nothing (yet)
|
||||||
|
}
|
||||||
|
|
||||||
|
//Returns 1 if successful, 0 otherwise
|
||||||
|
int strategyselector_register(void * const opaque, const char * const type, const char * const strategy_name, int priority, void * const fptr) {
|
||||||
|
strategy_list * const strategies = opaque;
|
||||||
|
|
||||||
|
if (strategies->allocated == strategies->count) {
|
||||||
|
strategy* new_strategies = realloc(strategies->strategies, sizeof(strategy) * (strategies->allocated + STRATEGY_LIST_ALLOC_SIZE));
|
||||||
|
if (!new_strategies) {
|
||||||
|
fprintf(stderr, "Could not increase strategies list size!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
strategies->strategies = new_strategies;
|
||||||
|
strategies->allocated += STRATEGY_LIST_ALLOC_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
strategy *new_strategy = &strategies->strategies[strategies->count++];
|
||||||
|
new_strategy->type = type;
|
||||||
|
new_strategy->strategy_name = strategy_name;
|
||||||
|
new_strategy->priority = priority;
|
||||||
|
new_strategy->fptr = fptr;
|
||||||
|
}
|
||||||
|
#ifdef _DEBUG
|
||||||
|
fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr);
|
||||||
|
#endif //_DEBUG
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type) {
|
||||||
|
unsigned int max_priority = 0;
|
||||||
|
int max_priority_i = -1;
|
||||||
|
char buffer[256];
|
||||||
|
char *override = NULL;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
snprintf(buffer, 255, "KVAZAAR_OVERRIDE_%s", strategy_type);
|
||||||
|
override = getenv(buffer);
|
||||||
|
|
||||||
|
for (i=0; i < strategies->count; ++i) {
|
||||||
|
if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
|
||||||
|
if (override && strcmp(strategies->strategies[i].strategy_name, override) == 0) {
|
||||||
|
fprintf(stderr, "%s environment variable present, choosing %s:%s\n", buffer, strategy_type, strategies->strategies[i].strategy_name);
|
||||||
|
return strategies->strategies[i].fptr;
|
||||||
|
}
|
||||||
|
if (strategies->strategies[i].priority >= max_priority) {
|
||||||
|
max_priority_i = i;
|
||||||
|
max_priority = strategies->strategies[i].priority;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (override) {
|
||||||
|
fprintf(stderr, "%s environment variable present, but no strategy %s was found!\n", buffer, override);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
fprintf(stderr, "Choosing strategy for %s:\n", strategy_type);
|
||||||
|
for (i=0; i < strategies->count; ++i) {
|
||||||
|
if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
|
||||||
|
if (i != max_priority_i) {
|
||||||
|
fprintf(stderr, "- %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "> %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif //_DEBUG
|
||||||
|
|
||||||
|
|
||||||
|
if (max_priority_i == -1) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return strategies->strategies[max_priority_i].fptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if COMPILE_INTEL
|
||||||
|
#include "x86/cpu.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void set_hardware_flags() {
|
||||||
|
memset(&g_hardware_flags, 0, sizeof(g_hardware_flags));
|
||||||
|
|
||||||
|
g_hardware_flags.arm = COMPILE_ARM;
|
||||||
|
g_hardware_flags.intel = COMPILE_INTEL;
|
||||||
|
g_hardware_flags.powerpc = COMPILE_POWERPC;
|
||||||
|
|
||||||
|
#if COMPILE_INTEL
|
||||||
|
{
|
||||||
|
int ecx = 0,edx =0;
|
||||||
|
/* CPU feature bits */
|
||||||
|
enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28};
|
||||||
|
|
||||||
|
// Dig CPU features with cpuid
|
||||||
|
kvz_cpu_cpuid(&ecx,&edx);
|
||||||
|
|
||||||
|
// EDX
|
||||||
|
if (edx & (1<<BIT_MMX)) g_hardware_flags.intel_flags.mmx = 1;
|
||||||
|
if (edx & (1<<BIT_SSE)) g_hardware_flags.intel_flags.sse = 1;
|
||||||
|
if (edx & (1<<BIT_SSE2)) g_hardware_flags.intel_flags.sse2 = 1;
|
||||||
|
// ECX
|
||||||
|
if (ecx & (1<<BIT_SSE3)) g_hardware_flags.intel_flags.sse3 = 1;;
|
||||||
|
if (ecx & (1<<BIT_SSSE3)) g_hardware_flags.intel_flags.ssse3 = 1;
|
||||||
|
if (ecx & (1<<BIT_SSE41)) g_hardware_flags.intel_flags.sse41 = 1;
|
||||||
|
if (ecx & (1<<BIT_SSE42)) g_hardware_flags.intel_flags.sse42 = 1;
|
||||||
|
if (ecx & (1<<BIT_AVX)) g_hardware_flags.intel_flags.avx = 1;
|
||||||
|
|
||||||
|
fprintf(stderr, "Compiled: INTEL, flags:");
|
||||||
|
#if COMPILE_INTEL_MMX
|
||||||
|
fprintf(stderr, " MMX");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSE
|
||||||
|
fprintf(stderr, " SSE");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSE2
|
||||||
|
fprintf(stderr, " SSE2");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSE3
|
||||||
|
fprintf(stderr, " SSE3");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSSE3
|
||||||
|
fprintf(stderr, " SSSE3");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSE41
|
||||||
|
fprintf(stderr, " SSE41");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_SSE42
|
||||||
|
fprintf(stderr, " SSE42");
|
||||||
|
#endif
|
||||||
|
#if COMPILE_INTEL_AVX
|
||||||
|
fprintf(stderr, " AVX");
|
||||||
|
#endif
|
||||||
|
fprintf(stderr, "\nRun on : INTEL, flags:");
|
||||||
|
if (g_hardware_flags.intel_flags.mmx) fprintf(stderr, " MMX");
|
||||||
|
if (g_hardware_flags.intel_flags.sse) fprintf(stderr, " SSE");
|
||||||
|
if (g_hardware_flags.intel_flags.sse2) fprintf(stderr, " SSE2");
|
||||||
|
if (g_hardware_flags.intel_flags.sse3) fprintf(stderr, " SSE3");
|
||||||
|
if (g_hardware_flags.intel_flags.ssse3) fprintf(stderr, " SSSE3");
|
||||||
|
if (g_hardware_flags.intel_flags.sse41) fprintf(stderr, " SSE41");
|
||||||
|
if (g_hardware_flags.intel_flags.sse42) fprintf(stderr, " SSE42");
|
||||||
|
if (g_hardware_flags.intel_flags.avx) fprintf(stderr, " AVX");
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
|
#endif //COMPILE_INTEL
|
||||||
|
}
|
141
src/strategyselector.h
Normal file
141
src/strategyselector.h
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
#ifndef STRATEGYSELECTOR_H_
|
||||||
|
#define STRATEGYSELECTOR_H_
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as published
|
||||||
|
* by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
//Hardware data (abstraction of defines). Extend for other compilers
|
||||||
|
|
||||||
|
#if defined(_M_IX86) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__)
|
||||||
|
#define COMPILE_INTEL 1
|
||||||
|
|
||||||
|
#if defined(__MMX__)
|
||||||
|
#define COMPILE_INTEL_MMX 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE__)
|
||||||
|
#define COMPILE_INTEL_SSE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
#define COMPILE_INTEL_SSE2 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE3__)
|
||||||
|
#define COMPILE_INTEL_SSE3 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSSE3__)
|
||||||
|
#define COMPILE_INTEL_SSSE3 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_1__)
|
||||||
|
#define COMPILE_INTEL_SSE41 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__SSE4_2__)
|
||||||
|
#define COMPILE_INTEL_SSE42 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__AVX__)
|
||||||
|
#define COMPILE_INTEL_AVX 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define COMPILE_INTEL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined (_M_PPC) || defined(__powerpc64__) || defined(__powerpc__)
|
||||||
|
#define COMPILE_POWERPC 1
|
||||||
|
#else
|
||||||
|
#define COMPILE_POWERPC 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined (_M_ARM) || defined(__arm__) || defined(__thumb__)
|
||||||
|
#define COMPILE_ARM 1
|
||||||
|
#else
|
||||||
|
#define COMPILE_ARM 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *type; //Type of the function, usually its name
|
||||||
|
const char *strategy_name; //Name of the strategy (e.g. sse2)
|
||||||
|
unsigned int priority; //Priority. 0 = lowest (default strategy)
|
||||||
|
void *fptr; //Pointer to the function
|
||||||
|
} strategy;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned int count;
|
||||||
|
unsigned int allocated;
|
||||||
|
strategy* strategies;
|
||||||
|
} strategy_list;
|
||||||
|
|
||||||
|
#define STRATEGY_LIST_ALLOC_SIZE 16
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *strategy_type;
|
||||||
|
void **fptr;
|
||||||
|
} strategy_to_select;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int intel;
|
||||||
|
struct {
|
||||||
|
int mmx;
|
||||||
|
int sse;
|
||||||
|
int sse2;
|
||||||
|
int sse3;
|
||||||
|
int ssse3;
|
||||||
|
int sse41;
|
||||||
|
int sse42;
|
||||||
|
int avx;
|
||||||
|
} intel_flags;
|
||||||
|
|
||||||
|
int powerpc;
|
||||||
|
struct {
|
||||||
|
int altivec;
|
||||||
|
} powerpc_flags;
|
||||||
|
|
||||||
|
int arm;
|
||||||
|
struct {
|
||||||
|
int neon;
|
||||||
|
} arm_flags;
|
||||||
|
} hardware_flags;
|
||||||
|
|
||||||
|
extern hardware_flags g_hardware_flags;
|
||||||
|
|
||||||
|
|
||||||
|
int strategyselector_init();
|
||||||
|
void strategyselector_free();
|
||||||
|
int strategyselector_register(void *opaque, const char *type, const char *strategy_name, int priority, void *fptr);
|
||||||
|
|
||||||
|
|
||||||
|
//Strategy to include
|
||||||
|
#include "strategies/picture.h"
|
||||||
|
|
||||||
|
static const strategy_to_select strategies_to_select[] = {
|
||||||
|
STRATEGIES_PICTURE_EXPORTS,
|
||||||
|
{NULL, NULL},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif //STRATEGYSELECTOR_H_
|
Loading…
Reference in a new issue