mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Strategies and runtime detection/choice of best algorithm
This commit is contained in:
parent
2a17e9a7aa
commit
bf7e755cf7
|
@ -50,7 +50,7 @@ LDFLAGS += -lm
|
|||
LD = gcc -fopenmp
|
||||
YASM = yasm
|
||||
ASMOBJS = cpu.o
|
||||
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o tables.o transform.o
|
||||
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o strategyselector.o tables.o transform.o
|
||||
PROG = ./kvazaar
|
||||
PROGS = $(PROG)
|
||||
|
||||
|
|
|
@ -42,9 +42,7 @@
|
|||
#include "picture.h"
|
||||
#include "transform.h"
|
||||
#include "scalinglist.h"
|
||||
|
||||
// Assembly optimization headers
|
||||
#include "x86/cpu.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
/**
|
||||
* \brief Program main function.
|
||||
|
@ -54,9 +52,6 @@
|
|||
*/
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int ecx = 0,edx =0;
|
||||
/* CPU feature bits */
|
||||
enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28};
|
||||
config *cfg = NULL; //!< Global configuration
|
||||
FILE *input = NULL; //!< input file (YUV)
|
||||
FILE *output = NULL; //!< output file (HEVC NAL stream)
|
||||
|
@ -77,6 +72,13 @@ int main(int argc, char *argv[])
|
|||
_setmode( _fileno( stderr ), _O_TEXT );
|
||||
#endif
|
||||
|
||||
//Initialize strategies
|
||||
if (!strategyselector_init()) {
|
||||
fprintf(stderr, "Failed to initialize strategies.\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
// Handle configuration
|
||||
cfg = config_alloc();
|
||||
|
||||
|
@ -179,21 +181,6 @@ int main(int argc, char *argv[])
|
|||
goto exit_failure;
|
||||
}
|
||||
|
||||
// Dig CPU features with cpuid
|
||||
kvz_cpu_cpuid(&ecx,&edx);
|
||||
fprintf(stderr, "CPU features enabled: ");
|
||||
// EDX
|
||||
if (edx & (1<<BIT_MMX)) fprintf(stderr, "MMX ");
|
||||
if (edx & (1<<BIT_SSE)) fprintf(stderr, "SSE ");
|
||||
if (edx & (1<<BIT_SSE2)) fprintf(stderr, "SSE2 ");
|
||||
// ECX
|
||||
if (ecx & (1<<BIT_SSE3)) fprintf(stderr, "SSE3 ");
|
||||
if (ecx & (1<<BIT_SSSE3)) fprintf(stderr, "SSSE3 ");
|
||||
if (ecx & (1<<BIT_SSE41)) fprintf(stderr, "SSE4.1 ");
|
||||
if (ecx & (1<<BIT_SSE42)) fprintf(stderr, "SSE4.2 ");
|
||||
if (ecx & (1<<BIT_AVX)) fprintf(stderr, "AVX ");
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
// Check if the input file name is a dash, this means stdin
|
||||
if (!strcmp(cfg->input, "-")) {
|
||||
input = stdin;
|
||||
|
@ -391,6 +378,8 @@ int main(int argc, char *argv[])
|
|||
|
||||
free_exp_golomb();
|
||||
|
||||
strategyselector_free();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
exit_failure:
|
||||
|
@ -398,5 +387,6 @@ exit_failure:
|
|||
if (input) fclose(input);
|
||||
if (output) fclose(output);
|
||||
if (recout) fclose(recout);
|
||||
strategyselector_free();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
*/
|
||||
|
||||
#include "picture.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
@ -769,42 +770,6 @@ static unsigned hor_sad(const pixel *pic_data, const pixel *ref_data,
|
|||
}
|
||||
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include "inline-optimizations/picture-sse2.c"
|
||||
#elif defined(__ALTIVEC__)
|
||||
#include "picture-altivec.c"
|
||||
#else
|
||||
//Generic implementations
|
||||
/**
|
||||
* \brief Calculate Sum of Absolute Differences (SAD)
|
||||
*
|
||||
* Calculate Sum of Absolute Differences (SAD) between two rectangular regions
|
||||
* located in arbitrary points in the picture.
|
||||
*
|
||||
* \param data1 Starting point of the first picture.
|
||||
* \param data2 Starting point of the second picture.
|
||||
* \param width Width of the region for which SAD is calculated.
|
||||
* \param height Height of the region for which SAD is calculated.
|
||||
* \param stride Width of the pixel array.
|
||||
*
|
||||
* \returns Sum of Absolute Differences
|
||||
*/
|
||||
static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||
{
|
||||
int y, x;
|
||||
unsigned sad = 0;
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||
}
|
||||
}
|
||||
|
||||
return sad;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \brief Handle special cases of comparing blocks that are not completely
|
||||
* inside the frame.
|
||||
|
|
60
src/strategies/picture-generic.c
Normal file
60
src/strategies/picture-generic.c
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of Kvazaar HEVC encoder.
|
||||
*
|
||||
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||
* COPYING file).
|
||||
*
|
||||
* Kvazaar is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* Kvazaar is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* \file
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../strategyselector.h"
|
||||
|
||||
|
||||
/**
|
||||
* \brief Calculate Sum of Absolute Differences (SAD)
|
||||
*
|
||||
* Calculate Sum of Absolute Differences (SAD) between two rectangular regions
|
||||
* located in arbitrary points in the picture.
|
||||
*
|
||||
* \param data1 Starting point of the first picture.
|
||||
* \param data2 Starting point of the second picture.
|
||||
* \param width Width of the region for which SAD is calculated.
|
||||
* \param height Height of the region for which SAD is calculated.
|
||||
* \param stride Width of the pixel array.
|
||||
*
|
||||
* \returns Sum of Absolute Differences
|
||||
*/
|
||||
static unsigned reg_sad_generic(const pixel * const data1, const pixel * const data2,
|
||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||
{
|
||||
int y, x;
|
||||
unsigned sad = 0;
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||
}
|
||||
}
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
static int strategy_register_picture_generic(void* opaque) {
|
||||
return strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic);
|
||||
}
|
56
src/strategies/picture-sse2.c
Normal file
56
src/strategies/picture-sse2.c
Normal file
|
@ -0,0 +1,56 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of Kvazaar HEVC encoder.
|
||||
*
|
||||
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||
* COPYING file).
|
||||
*
|
||||
* Kvazaar is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* Kvazaar is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* \file
|
||||
*/
|
||||
#include "../strategyselector.h"
|
||||
#include "../picture.h"
|
||||
#include <immintrin.h>
|
||||
#include <assert.h>
|
||||
|
||||
__attribute__ ((__target__ ("sse2")))
|
||||
static unsigned reg_sad_sse2(const pixel * const data1, const pixel * const data2,
|
||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||
{
|
||||
int y, x;
|
||||
unsigned sad = 0;
|
||||
__m128i sse_inc = _mm_setzero_si128 ();
|
||||
long long int sse_inc_array[2];
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x <= width-16; x+=16) {
|
||||
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
|
||||
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
|
||||
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
|
||||
}
|
||||
|
||||
for (; x < width; ++x) {
|
||||
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||
}
|
||||
}
|
||||
_mm_storeu_si128((__m128i*) sse_inc_array, sse_inc);
|
||||
sad += sse_inc_array[0] + sse_inc_array[1];
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
static int strategy_register_picture_sse2(void* opaque) {
|
||||
return strategyselector_register(opaque, "reg_sad", "sse2", 10, ®_sad_sse2);
|
||||
}
|
|
@ -20,12 +20,13 @@
|
|||
/*
|
||||
* \file
|
||||
*/
|
||||
|
||||
#include "../strategyselector.h"
|
||||
#include "../picture.h"
|
||||
#include <immintrin.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
||||
__attribute__ ((__target__ ("sse2,sse4.1")))
|
||||
static unsigned reg_sad_sse41(const pixel * const data1, const pixel * const data2,
|
||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||
{
|
||||
int y, x;
|
||||
|
@ -40,7 +41,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
|||
sse_inc = _mm_add_epi32(sse_inc, _mm_sad_epu8(a,b));
|
||||
}
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
{
|
||||
const __m128i a = _mm_loadu_si128((__m128i const*) &data1[y * stride1 + x]);
|
||||
const __m128i b = _mm_loadu_si128((__m128i const*) &data2[y * stride2 + x]);
|
||||
|
@ -74,7 +74,6 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
|||
}
|
||||
x = (width - (width%2));
|
||||
}
|
||||
#endif //__SSE4_1__
|
||||
|
||||
for (; x < width; ++x) {
|
||||
sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
|
||||
|
@ -85,6 +84,7 @@ static unsigned reg_sad(const pixel * const data1, const pixel * const data2,
|
|||
|
||||
return sad;
|
||||
}
|
||||
#else
|
||||
#error picture-sse2.c requires __SSE2__
|
||||
#endif //__SSE2__
|
||||
|
||||
static int strategy_register_picture_sse41(void* opaque) {
|
||||
return strategyselector_register(opaque, "reg_sad", "sse41", 20, ®_sad_sse41);
|
||||
}
|
27
src/strategies/picture.c
Normal file
27
src/strategies/picture.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
#include "picture-generic.c"
|
||||
#if COMPILE_INTEL_SSE2
|
||||
#include "picture-sse2.c"
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41
|
||||
#include "picture-sse41.c"
|
||||
#endif
|
||||
|
||||
unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2,
|
||||
const int width, const int height, const unsigned stride1, const unsigned stride2);
|
||||
|
||||
|
||||
static int strategy_register_picture(void* opaque) {
|
||||
if (!strategy_register_picture_generic(opaque)) return 0;
|
||||
|
||||
if (g_hardware_flags.intel_flags.sse2) {
|
||||
#if COMPILE_INTEL_SSE2
|
||||
if (!strategy_register_picture_sse2(opaque)) return 0;
|
||||
#endif
|
||||
if (g_hardware_flags.intel_flags.sse41) {
|
||||
#if COMPILE_INTEL_SSE2 && COMPILE_INTEL_SSE41
|
||||
if (!strategy_register_picture_sse41(opaque)) return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
30
src/strategies/picture.h
Normal file
30
src/strategies/picture.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
#ifndef STRATEGIES_PICTURE_H_
|
||||
#define STRATEGIES_PICTURE_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of Kvazaar HEVC encoder.
|
||||
*
|
||||
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||
* COPYING file).
|
||||
*
|
||||
* Kvazaar is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* Kvazaar is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||
****************************************************************************/
|
||||
|
||||
#include "../picture.h"
|
||||
|
||||
//Function pointer to reg_sad
|
||||
extern unsigned (*reg_sad)(const pixel * const data1, const pixel * const data2,
|
||||
const int width, const int height, const unsigned stride1, const unsigned stride2);
|
||||
|
||||
#define STRATEGIES_PICTURE_EXPORTS {"reg_sad", (void**) ®_sad}
|
||||
|
||||
#endif //STRATEGIES_PICTURE_H_
|
220
src/strategyselector.c
Normal file
220
src/strategyselector.c
Normal file
|
@ -0,0 +1,220 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of Kvazaar HEVC encoder.
|
||||
*
|
||||
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||
* COPYING file).
|
||||
*
|
||||
* Kvazaar is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* Kvazaar is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||
****************************************************************************/
|
||||
|
||||
/*
|
||||
* \file
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "strategyselector.h"
|
||||
|
||||
hardware_flags g_hardware_flags;
|
||||
|
||||
static void set_hardware_flags();
|
||||
static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type);
|
||||
|
||||
//Strategies to include (add new file here)
|
||||
#include "strategies/picture.c"
|
||||
|
||||
//Returns 1 if successful
|
||||
int strategyselector_init() {
|
||||
const strategy_to_select *cur_strategy_to_select = strategies_to_select;
|
||||
strategy_list strategies;
|
||||
|
||||
strategies.allocated = 0;
|
||||
strategies.count = 0;
|
||||
strategies.strategies = NULL;
|
||||
|
||||
set_hardware_flags();
|
||||
|
||||
//Add new register function here
|
||||
if (!strategy_register_picture(&strategies)) {
|
||||
fprintf(stderr, "strategy_register_picture failed!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while(cur_strategy_to_select->fptr) {
|
||||
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
|
||||
|
||||
if (!(*(cur_strategy_to_select->fptr))) {
|
||||
fprintf(stderr, "Could not find a strategy for %s!\n", cur_strategy_to_select->strategy_type);
|
||||
return 0;
|
||||
}
|
||||
++cur_strategy_to_select;
|
||||
}
|
||||
|
||||
//We can free the structure now, as all strategies are statically set to pointers
|
||||
if (strategies.allocated) {
|
||||
free(strategies.strategies);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void strategyselector_free() {
|
||||
//Do nothing (yet)
|
||||
}
|
||||
|
||||
//Returns 1 if successful, 0 otherwise
|
||||
int strategyselector_register(void * const opaque, const char * const type, const char * const strategy_name, int priority, void * const fptr) {
|
||||
strategy_list * const strategies = opaque;
|
||||
|
||||
if (strategies->allocated == strategies->count) {
|
||||
strategy* new_strategies = realloc(strategies->strategies, sizeof(strategy) * (strategies->allocated + STRATEGY_LIST_ALLOC_SIZE));
|
||||
if (!new_strategies) {
|
||||
fprintf(stderr, "Could not increase strategies list size!\n");
|
||||
return 0;
|
||||
}
|
||||
strategies->strategies = new_strategies;
|
||||
strategies->allocated += STRATEGY_LIST_ALLOC_SIZE;
|
||||
}
|
||||
|
||||
{
|
||||
strategy *new_strategy = &strategies->strategies[strategies->count++];
|
||||
new_strategy->type = type;
|
||||
new_strategy->strategy_name = strategy_name;
|
||||
new_strategy->priority = priority;
|
||||
new_strategy->fptr = fptr;
|
||||
}
|
||||
#ifdef _DEBUG
|
||||
fprintf(stderr, "Registered strategy %s:%s with priority %d (%p)\n", type, strategy_name, priority, fptr);
|
||||
#endif //_DEBUG
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void* strategyselector_choose_for(const strategy_list * const strategies, const char * const strategy_type) {
|
||||
unsigned int max_priority = 0;
|
||||
int max_priority_i = -1;
|
||||
char buffer[256];
|
||||
char *override = NULL;
|
||||
int i = 0;
|
||||
|
||||
snprintf(buffer, 255, "KVAZAAR_OVERRIDE_%s", strategy_type);
|
||||
override = getenv(buffer);
|
||||
|
||||
for (i=0; i < strategies->count; ++i) {
|
||||
if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
|
||||
if (override && strcmp(strategies->strategies[i].strategy_name, override) == 0) {
|
||||
fprintf(stderr, "%s environment variable present, choosing %s:%s\n", buffer, strategy_type, strategies->strategies[i].strategy_name);
|
||||
return strategies->strategies[i].fptr;
|
||||
}
|
||||
if (strategies->strategies[i].priority >= max_priority) {
|
||||
max_priority_i = i;
|
||||
max_priority = strategies->strategies[i].priority;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (override) {
|
||||
fprintf(stderr, "%s environment variable present, but no strategy %s was found!\n", buffer, override);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
fprintf(stderr, "Choosing strategy for %s:\n", strategy_type);
|
||||
for (i=0; i < strategies->count; ++i) {
|
||||
if (strcmp(strategies->strategies[i].type, strategy_type) == 0) {
|
||||
if (i != max_priority_i) {
|
||||
fprintf(stderr, "- %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
|
||||
} else {
|
||||
fprintf(stderr, "> %s (%d, %p)\n", strategies->strategies[i].strategy_name, strategies->strategies[i].priority, strategies->strategies[i].fptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif //_DEBUG
|
||||
|
||||
|
||||
if (max_priority_i == -1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return strategies->strategies[max_priority_i].fptr;
|
||||
}
|
||||
|
||||
#if COMPILE_INTEL
|
||||
#include "x86/cpu.h"
|
||||
#endif
|
||||
|
||||
static void set_hardware_flags() {
|
||||
memset(&g_hardware_flags, 0, sizeof(g_hardware_flags));
|
||||
|
||||
g_hardware_flags.arm = COMPILE_ARM;
|
||||
g_hardware_flags.intel = COMPILE_INTEL;
|
||||
g_hardware_flags.powerpc = COMPILE_POWERPC;
|
||||
|
||||
#if COMPILE_INTEL
|
||||
{
|
||||
int ecx = 0,edx =0;
|
||||
/* CPU feature bits */
|
||||
enum { BIT_SSE3 = 0,BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, BIT_AVX = 28};
|
||||
|
||||
// Dig CPU features with cpuid
|
||||
kvz_cpu_cpuid(&ecx,&edx);
|
||||
|
||||
// EDX
|
||||
if (edx & (1<<BIT_MMX)) g_hardware_flags.intel_flags.mmx = 1;
|
||||
if (edx & (1<<BIT_SSE)) g_hardware_flags.intel_flags.sse = 1;
|
||||
if (edx & (1<<BIT_SSE2)) g_hardware_flags.intel_flags.sse2 = 1;
|
||||
// ECX
|
||||
if (ecx & (1<<BIT_SSE3)) g_hardware_flags.intel_flags.sse3 = 1;;
|
||||
if (ecx & (1<<BIT_SSSE3)) g_hardware_flags.intel_flags.ssse3 = 1;
|
||||
if (ecx & (1<<BIT_SSE41)) g_hardware_flags.intel_flags.sse41 = 1;
|
||||
if (ecx & (1<<BIT_SSE42)) g_hardware_flags.intel_flags.sse42 = 1;
|
||||
if (ecx & (1<<BIT_AVX)) g_hardware_flags.intel_flags.avx = 1;
|
||||
|
||||
fprintf(stderr, "Compiled: INTEL, flags:");
|
||||
#if COMPILE_INTEL_MMX
|
||||
fprintf(stderr, " MMX");
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSE
|
||||
fprintf(stderr, " SSE");
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSE2
|
||||
fprintf(stderr, " SSE2");
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSE3
|
||||
fprintf(stderr, " SSE3");
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSSE3
|
||||
fprintf(stderr, " SSSE3");
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSE41
|
||||
fprintf(stderr, " SSE41");
|
||||
#endif
|
||||
#if COMPILE_INTEL_SSE42
|
||||
fprintf(stderr, " SSE42");
|
||||
#endif
|
||||
#if COMPILE_INTEL_AVX
|
||||
fprintf(stderr, " AVX");
|
||||
#endif
|
||||
fprintf(stderr, "\nRun on : INTEL, flags:");
|
||||
if (g_hardware_flags.intel_flags.mmx) fprintf(stderr, " MMX");
|
||||
if (g_hardware_flags.intel_flags.sse) fprintf(stderr, " SSE");
|
||||
if (g_hardware_flags.intel_flags.sse2) fprintf(stderr, " SSE2");
|
||||
if (g_hardware_flags.intel_flags.sse3) fprintf(stderr, " SSE3");
|
||||
if (g_hardware_flags.intel_flags.ssse3) fprintf(stderr, " SSSE3");
|
||||
if (g_hardware_flags.intel_flags.sse41) fprintf(stderr, " SSE41");
|
||||
if (g_hardware_flags.intel_flags.sse42) fprintf(stderr, " SSE42");
|
||||
if (g_hardware_flags.intel_flags.avx) fprintf(stderr, " AVX");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
#endif //COMPILE_INTEL
|
||||
}
|
141
src/strategyselector.h
Normal file
141
src/strategyselector.h
Normal file
|
@ -0,0 +1,141 @@
|
|||
#ifndef STRATEGYSELECTOR_H_
|
||||
#define STRATEGYSELECTOR_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of Kvazaar HEVC encoder.
|
||||
*
|
||||
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
|
||||
* COPYING file).
|
||||
*
|
||||
* Kvazaar is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as published
|
||||
* by the Free Software Foundation.
|
||||
*
|
||||
* Kvazaar is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||
****************************************************************************/
|
||||
|
||||
//Hardware data (abstraction of defines). Extend for other compilers
|
||||
|
||||
#if defined(_M_IX86) || defined(__i586__) || defined(__i686__) || defined(_M_X64) || defined(_M_AMD64) || defined(__amd64__) || defined(__x86_64__)
|
||||
#define COMPILE_INTEL 1
|
||||
|
||||
#if defined(__MMX__)
|
||||
#define COMPILE_INTEL_MMX 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE__)
|
||||
#define COMPILE_INTEL_SSE 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#define COMPILE_INTEL_SSE2 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE3__)
|
||||
#define COMPILE_INTEL_SSE3 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
#define COMPILE_INTEL_SSSE3 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
#define COMPILE_INTEL_SSE41 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
#define COMPILE_INTEL_SSE42 1
|
||||
#endif
|
||||
|
||||
#if defined(__AVX__)
|
||||
#define COMPILE_INTEL_AVX 1
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define COMPILE_INTEL 0
|
||||
#endif
|
||||
|
||||
#if defined (_M_PPC) || defined(__powerpc64__) || defined(__powerpc__)
|
||||
#define COMPILE_POWERPC 1
|
||||
#else
|
||||
#define COMPILE_POWERPC 0
|
||||
#endif
|
||||
|
||||
#if defined (_M_ARM) || defined(__arm__) || defined(__thumb__)
|
||||
#define COMPILE_ARM 1
|
||||
#else
|
||||
#define COMPILE_ARM 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
const char *type; //Type of the function, usually its name
|
||||
const char *strategy_name; //Name of the strategy (e.g. sse2)
|
||||
unsigned int priority; //Priority. 0 = lowest (default strategy)
|
||||
void *fptr; //Pointer to the function
|
||||
} strategy;
|
||||
|
||||
typedef struct {
|
||||
unsigned int count;
|
||||
unsigned int allocated;
|
||||
strategy* strategies;
|
||||
} strategy_list;
|
||||
|
||||
#define STRATEGY_LIST_ALLOC_SIZE 16
|
||||
|
||||
typedef struct {
|
||||
const char *strategy_type;
|
||||
void **fptr;
|
||||
} strategy_to_select;
|
||||
|
||||
typedef struct {
|
||||
int intel;
|
||||
struct {
|
||||
int mmx;
|
||||
int sse;
|
||||
int sse2;
|
||||
int sse3;
|
||||
int ssse3;
|
||||
int sse41;
|
||||
int sse42;
|
||||
int avx;
|
||||
} intel_flags;
|
||||
|
||||
int powerpc;
|
||||
struct {
|
||||
int altivec;
|
||||
} powerpc_flags;
|
||||
|
||||
int arm;
|
||||
struct {
|
||||
int neon;
|
||||
} arm_flags;
|
||||
} hardware_flags;
|
||||
|
||||
extern hardware_flags g_hardware_flags;
|
||||
|
||||
|
||||
int strategyselector_init();
|
||||
void strategyselector_free();
|
||||
int strategyselector_register(void *opaque, const char *type, const char *strategy_name, int priority, void *fptr);
|
||||
|
||||
|
||||
//Strategy to include
|
||||
#include "strategies/picture.h"
|
||||
|
||||
static const strategy_to_select strategies_to_select[] = {
|
||||
STRATEGIES_PICTURE_EXPORTS,
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif //STRATEGYSELECTOR_H_
|
Loading…
Reference in a new issue