mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-18 03:04:06 +00:00
9ea7bfd19a
Sometimes the tests overrun their time limit by varying amounts. Return calls per second based on the amount of time actually spent in the loop instead of how much time we tried to spend in the loop.
391 lines
11 KiB
C
391 lines
11 KiB
C
/*****************************************************************************
|
|
* This file is part of Kvazaar HEVC encoder.
|
|
*
|
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
|
* COPYING file).
|
|
*
|
|
* Kvazaar is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License version 2.1 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* Kvazaar is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
|
****************************************************************************/
|
|
|
|
#include "greatest/greatest.h"
|
|
|
|
#include "test_strategies.h"
|
|
|
|
#include "src/image.h"
|
|
#include "src/threads.h"
|
|
|
|
#include <math.h>
|
|
#include <stdlib.h>
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// MACROS
|
|
#define NUM_TESTS 113
|
|
#define NUM_CHUNKS 36
|
|
#define LCU_MAX_LOG_W 6
|
|
#define LCU_MIN_LOG_W 2
|
|
|
|
// Time per tested function, in seconds.
|
|
#define TIME_PER_TEST 1.0
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// GLOBALS
|
|
static kvz_pixel * bufs[NUM_TESTS]; // SIMD aligned pointers.
|
|
static kvz_pixel * actual_bufs[NUM_TESTS]; // pointers returned by malloc.
|
|
|
|
static struct test_env_t {
|
|
int log_width; // for selecting dim from bufs
|
|
void * tested_func;
|
|
const strategy_t * strategy;
|
|
char msg[1024];
|
|
} test_env;
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// SETUP, TEARDOWN AND HELPER FUNCTIONS
|
|
static void init_gradient(int x_px, int y_px, int width, int slope, kvz_pixel *buf)
|
|
{
|
|
for (int y = 0; y < width; ++y) {
|
|
for (int x = 0; x < width; ++x) {
|
|
int diff_x = x_px - x;
|
|
int diff_y = y_px - y;
|
|
int val = slope * sqrt(diff_x * diff_x + diff_y * diff_y) + 0.5;
|
|
buf[y * width + x] = CLIP(0, 255, val);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void setup_tests()
|
|
{
|
|
for (int test = 0; test < NUM_TESTS; ++test) {
|
|
unsigned size = NUM_CHUNKS * 64 * 64;
|
|
|
|
actual_bufs[test] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT);
|
|
bufs[test] = ALIGNED_POINTER(actual_bufs[test], SIMD_ALIGNMENT);
|
|
}
|
|
|
|
for (int test = 0; test < NUM_TESTS; ++test) {
|
|
for (int chunk = 0; chunk < NUM_CHUNKS; ++chunk) {
|
|
const int width = 64;
|
|
int x = (test + chunk) % width;
|
|
int y = (test + chunk) / width;
|
|
init_gradient(width - x, y, width, 255 / width, &bufs[test][chunk * 64*64]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void tear_down_tests()
|
|
{
|
|
for (int test = 0; test < NUM_TESTS; ++test) {
|
|
free(actual_bufs[test]);
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// TESTS
|
|
|
|
TEST test_intra_speed(const int width)
|
|
{
|
|
const int size = width * width;
|
|
uint64_t call_cnt = 0;
|
|
KVZ_CLOCK_T clock_now;
|
|
KVZ_GET_TIME(&clock_now);
|
|
double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST;
|
|
|
|
// Loop until time allocated for test has passed.
|
|
for (unsigned i = 0;
|
|
test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now);
|
|
++i)
|
|
{
|
|
int test = i % NUM_TESTS;
|
|
uint64_t sum = 0;
|
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
|
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
|
kvz_pixel * buf1 = &bufs[test][offset];
|
|
for (int chunk = 1; chunk < NUM_CHUNKS; ++chunk) {
|
|
kvz_pixel * buf2 = &bufs[test][chunk * size + offset];
|
|
|
|
cost_pixel_nxn_func *tested_func = test_env.tested_func;
|
|
sum += tested_func(buf1, buf2);
|
|
++call_cnt;
|
|
}
|
|
}
|
|
|
|
ASSERT(sum > 0);
|
|
KVZ_GET_TIME(&clock_now)
|
|
}
|
|
|
|
double test_time = TIME_PER_TEST + KVZ_CLOCK_T_AS_DOUBLE(clock_now) - test_end;
|
|
sprintf(test_env.msg, "%.3fM x %s:%s",
|
|
(double)call_cnt / 1000000.0 / test_time,
|
|
test_env.strategy->type,
|
|
test_env.strategy->strategy_name);
|
|
PASSm(test_env.msg);
|
|
}
|
|
|
|
|
|
TEST test_intra_dual_speed(const int width)
|
|
{
|
|
const int size = width * width;
|
|
uint64_t call_cnt = 0;
|
|
KVZ_CLOCK_T clock_now;
|
|
KVZ_GET_TIME(&clock_now);
|
|
double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST;
|
|
|
|
// Loop until time allocated for test has passed.
|
|
for (unsigned i = 0;
|
|
test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now);
|
|
++i)
|
|
{
|
|
int test = i % NUM_TESTS;
|
|
uint64_t sum = 0;
|
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
|
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
|
kvz_pixel * buf1 = &bufs[test][offset];
|
|
for (int chunk = 0; chunk < NUM_CHUNKS; chunk += 2) {
|
|
cost_pixel_nxn_multi_func *tested_func = test_env.tested_func;
|
|
const kvz_pixel *buf_pair[2] = { &bufs[test][chunk * size + offset], &bufs[test][(chunk + 1) * size + offset] };
|
|
unsigned costs[2] = { 0, 0 };
|
|
tested_func((pred_buffer)buf_pair, buf1, 2, costs);
|
|
sum += costs[0] + costs[1];
|
|
++call_cnt;
|
|
}
|
|
}
|
|
|
|
ASSERT(sum > 0);
|
|
KVZ_GET_TIME(&clock_now)
|
|
}
|
|
|
|
double test_time = TIME_PER_TEST + KVZ_CLOCK_T_AS_DOUBLE(clock_now) - test_end;
|
|
sprintf(test_env.msg, "%.3fM x %s:%s",
|
|
(double)call_cnt / 1000000.0 / test_time,
|
|
test_env.strategy->type,
|
|
test_env.strategy->strategy_name);
|
|
PASSm(test_env.msg);
|
|
}
|
|
|
|
|
|
TEST test_inter_speed(const int width)
|
|
{
|
|
const int size = width * width;
|
|
unsigned call_cnt = 0;
|
|
KVZ_CLOCK_T clock_now;
|
|
KVZ_GET_TIME(&clock_now);
|
|
double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST;
|
|
|
|
// Loop until time allocated for test has passed.
|
|
for (unsigned i = 0;
|
|
test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now);
|
|
++i)
|
|
{
|
|
int test = i % NUM_TESTS;
|
|
uint64_t sum = 0;
|
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
|
// Treat 4 consecutive chunks as one chunk with double width and height,
|
|
// and do a 8x8 grid search against the first chunk to simulate real usage.
|
|
kvz_pixel * buf1 = &bufs[test][offset];
|
|
for (int chunk = 0; chunk < NUM_CHUNKS; chunk += 4) {
|
|
kvz_pixel * buf2 = &bufs[test][chunk * size + offset];
|
|
for (int y = 0; y < 8; ++y) {
|
|
for (int x = 0; x < 8; ++x) {
|
|
const int stride1 = 2 * 64;
|
|
const int stride2 = 2 * 64;
|
|
reg_sad_func *tested_func = test_env.tested_func;
|
|
sum += tested_func(buf1, &buf2[y * stride2 + x], width, width, stride1, stride2);
|
|
++call_cnt;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ASSERT(sum > 0);
|
|
KVZ_GET_TIME(&clock_now)
|
|
}
|
|
|
|
double test_time = TIME_PER_TEST + KVZ_CLOCK_T_AS_DOUBLE(clock_now) - test_end;
|
|
sprintf(test_env.msg, "%.3fM x %s(%ix%i):%s",
|
|
(double)call_cnt / 1000000.0 / test_time,
|
|
test_env.strategy->type,
|
|
width,
|
|
width,
|
|
test_env.strategy->strategy_name);
|
|
PASSm(test_env.msg);
|
|
}
|
|
|
|
|
|
TEST dct_speed(const int width)
|
|
{
|
|
const int size = width * width;
|
|
uint64_t call_cnt = 0;
|
|
dct_func * tested_func = test_env.strategy->fptr;
|
|
|
|
KVZ_CLOCK_T clock_now;
|
|
KVZ_GET_TIME(&clock_now);
|
|
double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST;
|
|
|
|
int16_t _tmp_residual[32 * 32 + SIMD_ALIGNMENT];
|
|
int16_t _tmp_coeffs[32 * 32 + SIMD_ALIGNMENT];
|
|
int16_t *tmp_residual = ALIGNED_POINTER(_tmp_residual, SIMD_ALIGNMENT);
|
|
int16_t *tmp_coeffs = ALIGNED_POINTER(_tmp_coeffs, SIMD_ALIGNMENT);
|
|
|
|
// Loop until time allocated for test has passed.
|
|
for (unsigned i = 0;
|
|
test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now);
|
|
++i)
|
|
{
|
|
int test = i % NUM_TESTS;
|
|
uint64_t sum = 0;
|
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
|
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
|
for (int chunk = 0; chunk < NUM_CHUNKS; ++chunk) {
|
|
kvz_pixel * buf1 = &bufs[test][offset];
|
|
kvz_pixel * buf2 = &bufs[test][chunk * size + offset];
|
|
for (int p = 0; p < size; ++p) {
|
|
tmp_residual[p] = (int16_t)(buf1[p] - buf2[p]);
|
|
}
|
|
|
|
tested_func(8, tmp_residual, tmp_coeffs);
|
|
++call_cnt;
|
|
sum += tmp_coeffs[0];
|
|
}
|
|
}
|
|
|
|
ASSERT(sum > 0);
|
|
KVZ_GET_TIME(&clock_now)
|
|
}
|
|
|
|
double test_time = TIME_PER_TEST + KVZ_CLOCK_T_AS_DOUBLE(clock_now) - test_end;
|
|
sprintf(test_env.msg, "%.3fM x %s:%s",
|
|
(double)call_cnt / 1000000.0 / test_time,
|
|
test_env.strategy->type,
|
|
test_env.strategy->strategy_name);
|
|
PASSm(test_env.msg);
|
|
}
|
|
|
|
|
|
TEST intra_sad(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return test_intra_speed(width);
|
|
}
|
|
|
|
|
|
TEST intra_sad_dual(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return test_intra_dual_speed(width);
|
|
}
|
|
|
|
|
|
TEST intra_satd(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return test_intra_speed(width);
|
|
}
|
|
|
|
|
|
TEST intra_satd_dual(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return test_intra_dual_speed(width);
|
|
}
|
|
|
|
|
|
TEST inter_sad(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return test_inter_speed(width);
|
|
}
|
|
|
|
|
|
TEST fdct(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return dct_speed(width);
|
|
}
|
|
|
|
|
|
TEST idct(void)
|
|
{
|
|
const int width = 1 << test_env.log_width;
|
|
return dct_speed(width);
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// TEST FIXTURES
|
|
SUITE(speed_tests)
|
|
{
|
|
//SET_SETUP(sad_setup);
|
|
//SET_TEARDOWN(sad_teardown);
|
|
|
|
setup_tests();
|
|
|
|
// Loop through all strategies picking out the intra sad ones and run
|
|
// selectec strategies though all tests
|
|
for (unsigned i = 0; i < strategies.count; ++i) {
|
|
const strategy_t * strategy = &strategies.strategies[i];
|
|
|
|
// Select buffer width according to function name.
|
|
if (strstr(strategy->type, "_4x4")) {
|
|
test_env.log_width = 2;
|
|
} else if (strstr(strategy->type, "_8x8")) {
|
|
test_env.log_width = 3;
|
|
} else if (strstr(strategy->type, "_16x16")) {
|
|
test_env.log_width = 4;
|
|
} else if (strstr(strategy->type, "_32x32")) {
|
|
test_env.log_width = 5;
|
|
} else if (strstr(strategy->type, "_64x64")) {
|
|
test_env.log_width = 6;
|
|
} else {
|
|
test_env.log_width = 0;
|
|
}
|
|
|
|
test_env.tested_func = strategies.strategies[i].fptr;
|
|
test_env.strategy = strategy;
|
|
|
|
// Call different tests depending on type of function.
|
|
// This allows for selecting a subset of tests with -t parameter.
|
|
if (strncmp(strategy->type, "satd_", 5) == 0 && strcmp(strategy->type, "satd_any_size") != 0) {
|
|
if (strlen(strategy->type) <= 10) {
|
|
RUN_TEST(intra_satd);
|
|
} else if (strstr(strategy->type, "_dual")) {
|
|
RUN_TEST(intra_satd_dual);
|
|
}
|
|
} else if (strncmp(strategy->type, "sad_", 4) == 0) {
|
|
if (strlen(strategy->type) <= 9) {
|
|
RUN_TEST(intra_sad);
|
|
} else if (strstr(strategy->type, "_dual")) {
|
|
RUN_TEST(intra_sad_dual);
|
|
}
|
|
} else if (strcmp(strategy->type, "reg_sad") == 0) {
|
|
// Call reg_sad with all the sizes it is actually called with.
|
|
for (int width = 3; width <= 6; ++width) {
|
|
test_env.log_width = width;
|
|
RUN_TEST(inter_sad);
|
|
}
|
|
} else if (strncmp(strategy->type, "dct_", 4) == 0 ||
|
|
strcmp(strategy->type, "fast_forward_dst_4x4") == 0)
|
|
{
|
|
RUN_TEST(fdct);
|
|
} else if (strncmp(strategy->type, "idct_", 4) == 0 ||
|
|
strcmp(strategy->type, "fast_inverse_dst_4x4") == 0)
|
|
{
|
|
RUN_TEST(idct);
|
|
}
|
|
}
|
|
|
|
tear_down_tests();
|
|
}
|