Generate and use static tables

This commit is contained in:
Laurent Fasnacht 2014-04-16 08:09:10 +02:00
parent a982800e1b
commit 9112cbb58c
6 changed files with 372 additions and 246 deletions

View file

@ -50,7 +50,7 @@ LDFLAGS += -lm
LD = gcc LD = gcc
YASM = yasm YASM = yasm
ASMOBJS = cpu.o ASMOBJS = cpu.o
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o search.o transform.o extras/getopt.o OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o search.o tables.o transform.o extras/getopt.o
PROG = ./kvazaar PROG = ./kvazaar
PROGS = $(PROG) PROGS = $(PROG)
@ -71,6 +71,13 @@ cpu.o: x86/cpu.asm
%.d: %.c Makefile %.d: %.c Makefile
$(CC) $(CCFLAGS) -MF"$@" -MG -MM -MP -MT"$@" -MT"$(<:.c=.o)" "$<" $(CC) $(CCFLAGS) -MF"$@" -MG -MM -MP -MT"$@" -MT"$(<:.c=.o)" "$<"
generate_tables: tables.c global.h
$(CC) $(CCFLAGS) -DMAKE_GENERATE_TABLES=1 $< -o $@
tables_generated.h: generate_tables
./generate_tables > $@
clean: clean:
rm -f $(OBJS) $(PROGS) $(ASMOBJS) $(DEPS) rm -f $(OBJS) $(PROGS) $(ASMOBJS) $(DEPS)

View file

@ -29,6 +29,7 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
#include "tables.h"
#include "config.h" #include "config.h"
#include "cabac.h" #include "cabac.h"
#include "picture.h" #include "picture.h"
@ -44,8 +45,6 @@
double g_lambda_cost[55]; double g_lambda_cost[55];
double g_cur_lambda_cost; double g_cur_lambda_cost;
const uint32_t* g_sig_last_scan[3][5];
int8_t g_convert_to_bit[LCU_WIDTH + 1];
int8_t g_bitdepth = 8; int8_t g_bitdepth = 8;
/* Local functions. */ /* Local functions. */
@ -56,163 +55,6 @@ static void encode_sao(const encoder_control * const encoder,
unsigned x_lcu, uint16_t y_lcu, unsigned x_lcu, uint16_t y_lcu,
sao_info *sao_luma, sao_info *sao_chroma); sao_info *sao_luma, sao_info *sao_chroma);
/**
* Initialize g_sig_last_scan with scan positions for a transform block of
* size width x height.
*/
static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h,
uint32_t *buff_v,
int32_t width, int32_t height)
{
uint32_t num_scan_pos = width * width;
uint32_t next_scan_pos = 0;
int32_t xx, yy, x, y;
uint32_t scan_line;
uint32_t blk_y, blk_x;
uint32_t blk;
uint32_t cnt = 0;
assert(width == height && width <= 32);
if (width <= 4) {
uint32_t *buff_tmp = buff_d;
for (scan_line = 0; next_scan_pos < num_scan_pos; scan_line++) {
int primary_dim = scan_line;
int second_dim = 0;
while (primary_dim >= width) {
second_dim++;
primary_dim--;
}
while (primary_dim >= 0 && second_dim < width) {
buff_tmp[next_scan_pos] = primary_dim * width + second_dim ;
next_scan_pos++;
second_dim++;
primary_dim--;
}
}
}
if (width > 4 && width <= 32) {
uint32_t num_blk_side = width >> 2;
uint32_t num_blks = num_blk_side * num_blk_side;
uint32_t log2_width = g_to_bits[width];
for (blk = 0; blk < num_blks; blk++) {
uint32_t init_blk_pos = g_sig_last_scan_cg[log2_width][SCAN_DIAG][blk];
next_scan_pos = 0;
{
uint32_t offset_y = init_blk_pos / num_blk_side;
uint32_t offset_x = init_blk_pos - offset_y * num_blk_side;
uint32_t offset_d = 4 * (offset_x + offset_y * width);
uint32_t offset_scan = 16 * blk;
for (scan_line = 0; next_scan_pos < 16; scan_line++) {
int primary_dim = scan_line;
int second_dim = 0;
//TODO: optimize
while (primary_dim >= 4) {
second_dim++;
primary_dim--;
}
while (primary_dim >= 0 && second_dim < 4) {
buff_d[next_scan_pos + offset_scan] = primary_dim * width +
second_dim + offset_d;
next_scan_pos++;
second_dim++;
primary_dim--;
}
}
}
}
}
if (width > 2) {
uint32_t num_blk_side = width >> 2;
for (blk_y = 0; blk_y < num_blk_side; blk_y++) {
for (blk_x = 0; blk_x < num_blk_side; blk_x++) {
uint32_t offset = blk_y * 4 * width + blk_x * 4;
for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) {
buff_h[cnt] = y * width + x + offset;
cnt ++;
}
}
}
}
cnt = 0;
for (blk_x = 0; blk_x < num_blk_side; blk_x++) {
for (blk_y = 0; blk_y < num_blk_side; blk_y++) {
uint32_t offset = blk_y * 4 * width + blk_x * 4;
for (x = 0; x < 4; x++) {
for (y = 0; y < 4; y++) {
buff_v[cnt] = y * width + x + offset;
cnt ++;
}
}
}
}
} else {
for (yy = 0; yy < height; yy++) {
for (xx = 0; xx < width; xx++) {
buff_h[cnt] = yy * width + xx;
cnt ++;
}
}
cnt = 0;
for (xx = 0; xx < width; xx++) {
for (yy = 0; yy < height; yy++) {
buff_v[cnt] = yy * width + xx;
cnt ++;
}
}
}
}
void init_tables(void)
{
int i;
int c = 0;
memset( g_convert_to_bit,-1, sizeof( g_convert_to_bit ) );
for (i = 4; i < LCU_WIDTH; i *= 2) {
g_convert_to_bit[i] = (int8_t)c;
c++;
}
g_convert_to_bit[i] = (int8_t)c;
c = 2;
for (i = 0; i < 5; i++) {
uint32_t *sls0, *sls1, *sls2;
sls0 = (uint32_t*)malloc(c*c*sizeof(uint32_t));
sls1 = (uint32_t*)malloc(c*c*sizeof(uint32_t));
sls2 = (uint32_t*)malloc(c*c*sizeof(uint32_t));
init_sig_last_scan(sls0, sls1, sls2, c, c);
g_sig_last_scan[0][i] = sls0;
g_sig_last_scan[1][i] = sls1;
g_sig_last_scan[2][i] = sls2;
c <<= 1;
}
}
/*! /*!
\brief Initializes lambda-value for current QP \brief Initializes lambda-value for current QP
@ -243,15 +85,6 @@ void init_lambda(const encoder_control * const encoder)
g_cur_lambda_cost = lambda; g_cur_lambda_cost = lambda;
} }
void free_tables(void)
{
int i;
for (i = 0; i < 5; i++) {
FREE_POINTER(g_sig_last_scan[0][i]);
FREE_POINTER(g_sig_last_scan[1][i]);
FREE_POINTER(g_sig_last_scan[2][i]);
}
}
encoder_control *init_encoder_control(config *cfg) encoder_control *init_encoder_control(config *cfg)
{ {
encoder_control *enc_c = NULL; encoder_control *enc_c = NULL;

View file

@ -30,6 +30,7 @@
#include "bitstream.h" #include "bitstream.h"
#include "cabac.h" #include "cabac.h"
#include "config.h" #include "config.h"
#include "tables.h"
typedef struct { typedef struct {
int32_t scaling_list_dc [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; int32_t scaling_list_dc [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM];
@ -112,9 +113,7 @@ typedef struct
scaling_list scaling_list; scaling_list scaling_list;
} encoder_control; } encoder_control;
void init_tables(void);
void init_lambda(const encoder_control * const encoder); void init_lambda(const encoder_control * const encoder);
void free_tables(void);
encoder_control *init_encoder_control(config *cfg); encoder_control *init_encoder_control(config *cfg);
void init_encoder_input(encoder_input *input, FILE* inputfile, void init_encoder_input(encoder_input *input, FILE* inputfile,
int32_t width, int32_t height); int32_t width, int32_t height);
@ -144,7 +143,6 @@ void encode_block_residual(const encoder_control * const encoder,
extern double g_lambda_cost[55]; extern double g_lambda_cost[55];
extern double g_cur_lambda_cost; extern double g_cur_lambda_cost;
extern int8_t g_convert_to_bit[LCU_WIDTH + 1];
extern int8_t g_bitdepth; extern int8_t g_bitdepth;
static const uint8_t g_group_idx[32] = { static const uint8_t g_group_idx[32] = {
@ -156,86 +154,12 @@ static const uint8_t g_group_idx[32] = {
static const uint8_t g_min_in_group[10] = { static const uint8_t g_min_in_group[10] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24 }; 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
/**
* List of mappings for coefficients within a transform block.
* First index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical
* Second index: (log2 - 1) size of transform block. 2x2 .. 32x32
*/
extern const uint32_t* g_sig_last_scan[3][5];
/***
* List of indices for 4x4 coefficient groups within 8x8 transform block.
* First index: 0 = diagonal, 1 = vertical, 2 horizontal scan pattern.
* Second index: (log2 - 2) size of transform block. 4x4 .. 32x32
*/
static const uint32_t g_sig_last_scan_8x8[3][4] =
{ {0, 2, 1, 3},
{0, 1, 2, 3},
{0, 2, 1, 3}
};
/***
* List of indices for 4x4 coefficient groups within 16x16 transform block.
*/
static const uint32_t g_sig_last_scan_16x16[16] = {
0, 4, 1, 8,
5, 2, 12, 9,
6, 3, 13, 10,
7, 14, 11, 15
};
/***
* List of indices for 4x4 coefficient groups within 32x32 transform block.
*/
static const uint32_t g_sig_last_scan_32x32[64] = {
0, 8, 1, 16, 9, 2, 24, 17,
10, 3, 32, 25, 18, 11, 4, 40,
33, 26, 19, 12, 5, 48, 41, 34,
27, 20, 13, 6, 56, 49, 42, 35,
28, 21, 14, 7, 57, 50, 43, 36,
29, 22, 15, 58, 51, 44, 37, 30,
23, 59, 52, 45, 38, 31, 60, 53,
46, 39, 61, 54, 47, 62, 55, 63
};
/**
* List of pointers to coefficient group mappings.
* First index: (log2 - 2) of transform block size
* Second index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical
*/
static const uint32_t *const g_sig_last_scan_cg[4][3] = {
{ g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] }, // 4x4, only first element is used
{ g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] },
{ g_sig_last_scan_16x16, 0, 0 },
{ g_sig_last_scan_32x32, 0, 0 }
};
//4 8 16 32 64 128
//0 1 2 3 4 5
static const uint8_t g_to_bits[129] =
{
0,
0,0,0,0,
0,0,0,1,
0,0,0,0,0,0,0,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
};
#define TOBITS(len) g_to_bits[len]
#define C1FLAG_NUMBER 8 // maximum number of largerThan1 flag coded in one chunk #define C1FLAG_NUMBER 8 // maximum number of largerThan1 flag coded in one chunk
#define C2FLAG_NUMBER 1 // maximum number of largerThan2 flag coded in one chunk #define C2FLAG_NUMBER 1 // maximum number of largerThan2 flag coded in one chunk
enum COEFF_SCAN_TYPE
{
SCAN_DIAG = 0, // up-right diagonal scan
SCAN_HOR, // horizontal first scan
SCAN_VER // vertical first scan
};
#endif #endif

252
src/tables.c Normal file
View file

@ -0,0 +1,252 @@
#if MAKE_GENERATE_TABLES
#define DONT_USE_GENERATED_TABLES 1
#endif
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "global.h"
#include "tables.h"
#ifndef USING_GENERATED_TABLES
const uint32_t* g_sig_last_scan[3][5];
int8_t g_convert_to_bit[LCU_WIDTH + 1];
/**
* Initialize g_sig_last_scan with scan positions for a transform block of
* size width x height.
*/
static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h,
uint32_t *buff_v,
int32_t width, int32_t height)
{
uint32_t num_scan_pos = width * width;
uint32_t next_scan_pos = 0;
int32_t xx, yy, x, y;
uint32_t scan_line;
uint32_t blk_y, blk_x;
uint32_t blk;
uint32_t cnt = 0;
assert(width == height && width <= 32);
if (width <= 4) {
uint32_t *buff_tmp = buff_d;
for (scan_line = 0; next_scan_pos < num_scan_pos; scan_line++) {
int primary_dim = scan_line;
int second_dim = 0;
while (primary_dim >= width) {
second_dim++;
primary_dim--;
}
while (primary_dim >= 0 && second_dim < width) {
buff_tmp[next_scan_pos] = primary_dim * width + second_dim ;
next_scan_pos++;
second_dim++;
primary_dim--;
}
}
}
if (width > 4 && width <= 32) {
uint32_t num_blk_side = width >> 2;
uint32_t num_blks = num_blk_side * num_blk_side;
uint32_t log2_width = g_to_bits[width];
for (blk = 0; blk < num_blks; blk++) {
uint32_t init_blk_pos = g_sig_last_scan_cg[log2_width][SCAN_DIAG][blk];
next_scan_pos = 0;
{
uint32_t offset_y = init_blk_pos / num_blk_side;
uint32_t offset_x = init_blk_pos - offset_y * num_blk_side;
uint32_t offset_d = 4 * (offset_x + offset_y * width);
uint32_t offset_scan = 16 * blk;
for (scan_line = 0; next_scan_pos < 16; scan_line++) {
int primary_dim = scan_line;
int second_dim = 0;
//TODO: optimize
while (primary_dim >= 4) {
second_dim++;
primary_dim--;
}
while (primary_dim >= 0 && second_dim < 4) {
buff_d[next_scan_pos + offset_scan] = primary_dim * width +
second_dim + offset_d;
next_scan_pos++;
second_dim++;
primary_dim--;
}
}
}
}
}
if (width > 2) {
uint32_t num_blk_side = width >> 2;
for (blk_y = 0; blk_y < num_blk_side; blk_y++) {
for (blk_x = 0; blk_x < num_blk_side; blk_x++) {
uint32_t offset = blk_y * 4 * width + blk_x * 4;
for (y = 0; y < 4; y++) {
for (x = 0; x < 4; x++) {
buff_h[cnt] = y * width + x + offset;
cnt ++;
}
}
}
}
cnt = 0;
for (blk_x = 0; blk_x < num_blk_side; blk_x++) {
for (blk_y = 0; blk_y < num_blk_side; blk_y++) {
uint32_t offset = blk_y * 4 * width + blk_x * 4;
for (x = 0; x < 4; x++) {
for (y = 0; y < 4; y++) {
buff_v[cnt] = y * width + x + offset;
cnt ++;
}
}
}
}
} else {
for (yy = 0; yy < height; yy++) {
for (xx = 0; xx < width; xx++) {
buff_h[cnt] = yy * width + xx;
cnt ++;
}
}
cnt = 0;
for (xx = 0; xx < width; xx++) {
for (yy = 0; yy < height; yy++) {
buff_v[cnt] = yy * width + xx;
cnt ++;
}
}
}
}
void init_tables(void)
{
int i;
int c = 0;
memset( g_convert_to_bit,-1, sizeof( g_convert_to_bit ) );
for (i = 4; i < LCU_WIDTH; i *= 2) {
g_convert_to_bit[i] = (int8_t)c;
c++;
}
g_convert_to_bit[i] = (int8_t)c;
c = 2;
for (i = 0; i < 5; i++) {
uint32_t *sls0, *sls1, *sls2;
sls0 = (uint32_t*)malloc(c*c*sizeof(uint32_t));
sls1 = (uint32_t*)malloc(c*c*sizeof(uint32_t));
sls2 = (uint32_t*)malloc(c*c*sizeof(uint32_t));
init_sig_last_scan(sls0, sls1, sls2, c, c);
g_sig_last_scan[0][i] = sls0;
g_sig_last_scan[1][i] = sls1;
g_sig_last_scan[2][i] = sls2;
c <<= 1;
}
}
void free_tables(void)
{
int i;
for (i = 0; i < 5; i++) {
FREE_POINTER(g_sig_last_scan[0][i]);
FREE_POINTER(g_sig_last_scan[1][i]);
FREE_POINTER(g_sig_last_scan[2][i]);
}
}
#else //USING_GENERATED_TABLES
//do nothing (precomputed)
void init_tables(void) {}
void free_tables(void) {}
#endif
#if MAKE_GENERATE_TABLES == 1 && DONT_USE_GENERATED_TABLES == 1
int main() {
int i, c, j, h;
printf("//This file is automatically generated by generate_tables, do not edit.\n\n");
printf("#ifndef TABLES_GENERATED_H_\n");
printf("#define TABLES_GENERATED_H_\n\n");
printf("#include \"global.h\"\n\n");
printf("#if LCU_WIDTH==%d && !DONT_USE_GENERATED_TABLES\n", LCU_WIDTH);
printf("#define USING_GENERATED_TABLES\n\n");
init_tables();
printf("static const int8_t g_convert_to_bit[LCU_WIDTH + 1] = {");
for (i=0; i < LCU_WIDTH + 1; ++i) {
if (i!=LCU_WIDTH) {
printf("%d, ", g_convert_to_bit[i]);
} else {
printf("%d", g_convert_to_bit[i]);
}
}
printf("};\n\n");
c = 2;
for (i = 0; i < 5; i++) {
for (h = 0; h < 3; h++) {
printf("static const uint32_t g_sig_last_scan_%d_%d[%d] = {",h,i,c*c);
for (j = 0; j < c*c; ++j) {
if (j!=c*c-1) {
printf("%u, ", g_sig_last_scan[h][i][j]);
} else {
printf("%u", g_sig_last_scan[h][i][j]);
}
}
printf("};\n");
}
printf("\n");
c <<= 1;
}
printf("static const uint32_t* const g_sig_last_scan[3][5] = {\n");
for (h = 0; h < 3; h++) {
printf(" {");
for (i = 0; i < 5; i++) {
if (i!=4) {
printf("g_sig_last_scan_%d_%d, ", h, i);
} else {
printf("g_sig_last_scan_%d_%d", h, i);
}
}
if (h<2) {
printf("},\n");
} else {
printf("}\n");
}
}
printf("};\n");
printf("#endif //LCU_WIDTH==%d\n", LCU_WIDTH);
printf("#endif //TABLES_GENERATED_H_\n");
return 0;
}
#endif

109
src/tables.h Normal file
View file

@ -0,0 +1,109 @@
#ifndef TABLES_H_
#define TABLES_H_
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2014 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Kvazaar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "global.h"
#include "tables_generated.h"
void init_tables(void);
void free_tables(void);
//4 8 16 32 64 128
//0 1 2 3 4 5
static const uint8_t g_to_bits[129] =
{
0,
0,0,0,0,
0,0,0,1,
0,0,0,0,0,0,0,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
};
#define TOBITS(len) g_to_bits[len]
/***
* List of indices for 4x4 coefficient groups within 8x8 transform block.
* First index: 0 = diagonal, 1 = vertical, 2 horizontal scan pattern.
* Second index: (log2 - 2) size of transform block. 4x4 .. 32x32
*/
static const uint32_t g_sig_last_scan_8x8[3][4] =
{ {0, 2, 1, 3},
{0, 1, 2, 3},
{0, 2, 1, 3}
};
/***
* List of indices for 4x4 coefficient groups within 16x16 transform block.
*/
static const uint32_t g_sig_last_scan_16x16[16] = {
0, 4, 1, 8,
5, 2, 12, 9,
6, 3, 13, 10,
7, 14, 11, 15
};
/***
* List of indices for 4x4 coefficient groups within 32x32 transform block.
*/
static const uint32_t g_sig_last_scan_32x32[64] = {
0, 8, 1, 16, 9, 2, 24, 17,
10, 3, 32, 25, 18, 11, 4, 40,
33, 26, 19, 12, 5, 48, 41, 34,
27, 20, 13, 6, 56, 49, 42, 35,
28, 21, 14, 7, 57, 50, 43, 36,
29, 22, 15, 58, 51, 44, 37, 30,
23, 59, 52, 45, 38, 31, 60, 53,
46, 39, 61, 54, 47, 62, 55, 63
};
/**
* List of pointers to coefficient group mappings.
* First index: (log2 - 2) of transform block size
* Second index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical
*/
static const uint32_t *const g_sig_last_scan_cg[4][3] = {
{ g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] }, // 4x4, only first element is used
{ g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] },
{ g_sig_last_scan_16x16, 0, 0 },
{ g_sig_last_scan_32x32, 0, 0 }
};
enum COEFF_SCAN_TYPE
{
SCAN_DIAG = 0, // up-right diagonal scan
SCAN_HOR, // horizontal first scan
SCAN_VER // vertical first scan
};
#ifndef USING_GENERATED_TABLES
/**
* List of mappings for coefficients within a transform block.
* First index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical
* Second index: (log2 - 1) size of transform block. 2x2 .. 32x32
*/
extern const uint32_t* g_sig_last_scan[3][5];
extern int8_t g_convert_to_bit[LCU_WIDTH + 1];
#endif
#endif //TABLES_H_

1
src/tables_generated.h Normal file
View file

@ -0,0 +1 @@
//This file will be autogenerated by generate_tables