From 9112cbb58cfb91d854b6ce44b6f16f25f499e75f Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 16 Apr 2014 08:09:10 +0200 Subject: [PATCH] Generate and use static tables --- src/Makefile | 9 +- src/encoder.c | 169 +-------------------------- src/encoder.h | 78 +------------ src/tables.c | 252 +++++++++++++++++++++++++++++++++++++++++ src/tables.h | 109 ++++++++++++++++++ src/tables_generated.h | 1 + 6 files changed, 372 insertions(+), 246 deletions(-) create mode 100644 src/tables.c create mode 100644 src/tables.h create mode 100644 src/tables_generated.h diff --git a/src/Makefile b/src/Makefile index f4b75625..a7dd0d28 100644 --- a/src/Makefile +++ b/src/Makefile @@ -50,7 +50,7 @@ LDFLAGS += -lm LD = gcc YASM = yasm ASMOBJS = cpu.o -OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o search.o transform.o extras/getopt.o +OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o search.o tables.o transform.o extras/getopt.o PROG = ./kvazaar PROGS = $(PROG) @@ -71,6 +71,13 @@ cpu.o: x86/cpu.asm %.d: %.c Makefile $(CC) $(CCFLAGS) -MF"$@" -MG -MM -MP -MT"$@" -MT"$(<:.c=.o)" "$<" +generate_tables: tables.c global.h + $(CC) $(CCFLAGS) -DMAKE_GENERATE_TABLES=1 $< -o $@ + +tables_generated.h: generate_tables + ./generate_tables > $@ + + clean: rm -f $(OBJS) $(PROGS) $(ASMOBJS) $(DEPS) diff --git a/src/encoder.c b/src/encoder.c index d3d7ec95..1c237611 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -29,6 +29,7 @@ #include #include +#include "tables.h" #include "config.h" #include "cabac.h" #include "picture.h" @@ -44,8 +45,6 @@ double g_lambda_cost[55]; double g_cur_lambda_cost; -const uint32_t* g_sig_last_scan[3][5]; -int8_t g_convert_to_bit[LCU_WIDTH + 1]; int8_t g_bitdepth = 8; /* Local functions. */ @@ -56,163 +55,6 @@ static void encode_sao(const encoder_control * const encoder, unsigned x_lcu, uint16_t y_lcu, sao_info *sao_luma, sao_info *sao_chroma); -/** - * Initialize g_sig_last_scan with scan positions for a transform block of - * size width x height. - */ -static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h, - uint32_t *buff_v, - int32_t width, int32_t height) -{ - uint32_t num_scan_pos = width * width; - uint32_t next_scan_pos = 0; - int32_t xx, yy, x, y; - uint32_t scan_line; - uint32_t blk_y, blk_x; - uint32_t blk; - uint32_t cnt = 0; - - assert(width == height && width <= 32); - - if (width <= 4) { - uint32_t *buff_tmp = buff_d; - - for (scan_line = 0; next_scan_pos < num_scan_pos; scan_line++) { - int primary_dim = scan_line; - int second_dim = 0; - - while (primary_dim >= width) { - second_dim++; - primary_dim--; - } - - while (primary_dim >= 0 && second_dim < width) { - buff_tmp[next_scan_pos] = primary_dim * width + second_dim ; - next_scan_pos++; - second_dim++; - primary_dim--; - } - } - } - - if (width > 4 && width <= 32) { - uint32_t num_blk_side = width >> 2; - uint32_t num_blks = num_blk_side * num_blk_side; - uint32_t log2_width = g_to_bits[width]; - - for (blk = 0; blk < num_blks; blk++) { - uint32_t init_blk_pos = g_sig_last_scan_cg[log2_width][SCAN_DIAG][blk]; - next_scan_pos = 0; - - { - uint32_t offset_y = init_blk_pos / num_blk_side; - uint32_t offset_x = init_blk_pos - offset_y * num_blk_side; - uint32_t offset_d = 4 * (offset_x + offset_y * width); - uint32_t offset_scan = 16 * blk; - - for (scan_line = 0; next_scan_pos < 16; scan_line++) { - int primary_dim = scan_line; - int second_dim = 0; - - //TODO: optimize - while (primary_dim >= 4) { - second_dim++; - primary_dim--; - } - - while (primary_dim >= 0 && second_dim < 4) { - buff_d[next_scan_pos + offset_scan] = primary_dim * width + - second_dim + offset_d; - next_scan_pos++; - second_dim++; - primary_dim--; - } - } - } - } - } - - if (width > 2) { - uint32_t num_blk_side = width >> 2; - - for (blk_y = 0; blk_y < num_blk_side; blk_y++) { - for (blk_x = 0; blk_x < num_blk_side; blk_x++) { - uint32_t offset = blk_y * 4 * width + blk_x * 4; - - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { - buff_h[cnt] = y * width + x + offset; - cnt ++; - } - } - } - } - - cnt = 0; - - for (blk_x = 0; blk_x < num_blk_side; blk_x++) { - for (blk_y = 0; blk_y < num_blk_side; blk_y++) { - uint32_t offset = blk_y * 4 * width + blk_x * 4; - - for (x = 0; x < 4; x++) { - for (y = 0; y < 4; y++) { - buff_v[cnt] = y * width + x + offset; - cnt ++; - } - } - } - } - } else { - for (yy = 0; yy < height; yy++) { - for (xx = 0; xx < width; xx++) { - buff_h[cnt] = yy * width + xx; - cnt ++; - } - } - - cnt = 0; - - for (xx = 0; xx < width; xx++) { - for (yy = 0; yy < height; yy++) { - buff_v[cnt] = yy * width + xx; - cnt ++; - } - } - } -} - - -void init_tables(void) -{ - int i; - int c = 0; - - memset( g_convert_to_bit,-1, sizeof( g_convert_to_bit ) ); - - for (i = 4; i < LCU_WIDTH; i *= 2) { - g_convert_to_bit[i] = (int8_t)c; - c++; - } - - g_convert_to_bit[i] = (int8_t)c; - - c = 2; - for (i = 0; i < 5; i++) { - uint32_t *sls0, *sls1, *sls2; - sls0 = (uint32_t*)malloc(c*c*sizeof(uint32_t)); - sls1 = (uint32_t*)malloc(c*c*sizeof(uint32_t)); - sls2 = (uint32_t*)malloc(c*c*sizeof(uint32_t)); - - init_sig_last_scan(sls0, sls1, sls2, c, c); - - g_sig_last_scan[0][i] = sls0; - g_sig_last_scan[1][i] = sls1; - g_sig_last_scan[2][i] = sls2; - - c <<= 1; - } -} - /*! \brief Initializes lambda-value for current QP @@ -243,15 +85,6 @@ void init_lambda(const encoder_control * const encoder) g_cur_lambda_cost = lambda; } -void free_tables(void) -{ - int i; - for (i = 0; i < 5; i++) { - FREE_POINTER(g_sig_last_scan[0][i]); - FREE_POINTER(g_sig_last_scan[1][i]); - FREE_POINTER(g_sig_last_scan[2][i]); - } -} encoder_control *init_encoder_control(config *cfg) { encoder_control *enc_c = NULL; diff --git a/src/encoder.h b/src/encoder.h index 83ab31a9..c219677d 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -30,6 +30,7 @@ #include "bitstream.h" #include "cabac.h" #include "config.h" +#include "tables.h" typedef struct { int32_t scaling_list_dc [SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM]; @@ -112,9 +113,7 @@ typedef struct scaling_list scaling_list; } encoder_control; -void init_tables(void); void init_lambda(const encoder_control * const encoder); -void free_tables(void); encoder_control *init_encoder_control(config *cfg); void init_encoder_input(encoder_input *input, FILE* inputfile, int32_t width, int32_t height); @@ -144,7 +143,6 @@ void encode_block_residual(const encoder_control * const encoder, extern double g_lambda_cost[55]; extern double g_cur_lambda_cost; -extern int8_t g_convert_to_bit[LCU_WIDTH + 1]; extern int8_t g_bitdepth; static const uint8_t g_group_idx[32] = { @@ -156,86 +154,12 @@ static const uint8_t g_group_idx[32] = { static const uint8_t g_min_in_group[10] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 }; -/** - * List of mappings for coefficients within a transform block. - * First index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical - * Second index: (log2 - 1) size of transform block. 2x2 .. 32x32 - */ -extern const uint32_t* g_sig_last_scan[3][5]; -/*** - * List of indices for 4x4 coefficient groups within 8x8 transform block. - * First index: 0 = diagonal, 1 = vertical, 2 horizontal scan pattern. - * Second index: (log2 - 2) size of transform block. 4x4 .. 32x32 - */ -static const uint32_t g_sig_last_scan_8x8[3][4] = -{ {0, 2, 1, 3}, - {0, 1, 2, 3}, - {0, 2, 1, 3} -}; - -/*** - * List of indices for 4x4 coefficient groups within 16x16 transform block. - */ -static const uint32_t g_sig_last_scan_16x16[16] = { - 0, 4, 1, 8, - 5, 2, 12, 9, - 6, 3, 13, 10, - 7, 14, 11, 15 -}; - -/*** - * List of indices for 4x4 coefficient groups within 32x32 transform block. - */ -static const uint32_t g_sig_last_scan_32x32[64] = { - 0, 8, 1, 16, 9, 2, 24, 17, - 10, 3, 32, 25, 18, 11, 4, 40, - 33, 26, 19, 12, 5, 48, 41, 34, - 27, 20, 13, 6, 56, 49, 42, 35, - 28, 21, 14, 7, 57, 50, 43, 36, - 29, 22, 15, 58, 51, 44, 37, 30, - 23, 59, 52, 45, 38, 31, 60, 53, - 46, 39, 61, 54, 47, 62, 55, 63 -}; - -/** - * List of pointers to coefficient group mappings. - * First index: (log2 - 2) of transform block size - * Second index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical - */ -static const uint32_t *const g_sig_last_scan_cg[4][3] = { - { g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] }, // 4x4, only first element is used - { g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] }, - { g_sig_last_scan_16x16, 0, 0 }, - { g_sig_last_scan_32x32, 0, 0 } -}; - - -//4 8 16 32 64 128 -//0 1 2 3 4 5 -static const uint8_t g_to_bits[129] = -{ - 0, - 0,0,0,0, - 0,0,0,1, - 0,0,0,0,0,0,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5 -}; -#define TOBITS(len) g_to_bits[len] #define C1FLAG_NUMBER 8 // maximum number of largerThan1 flag coded in one chunk #define C2FLAG_NUMBER 1 // maximum number of largerThan2 flag coded in one chunk -enum COEFF_SCAN_TYPE -{ - SCAN_DIAG = 0, // up-right diagonal scan - SCAN_HOR, // horizontal first scan - SCAN_VER // vertical first scan -}; #endif diff --git a/src/tables.c b/src/tables.c new file mode 100644 index 00000000..1bf303bb --- /dev/null +++ b/src/tables.c @@ -0,0 +1,252 @@ +#if MAKE_GENERATE_TABLES +#define DONT_USE_GENERATED_TABLES 1 +#endif + +#include +#include +#include + +#include "global.h" +#include "tables.h" + +#ifndef USING_GENERATED_TABLES +const uint32_t* g_sig_last_scan[3][5]; +int8_t g_convert_to_bit[LCU_WIDTH + 1]; + +/** + * Initialize g_sig_last_scan with scan positions for a transform block of + * size width x height. + */ +static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h, + uint32_t *buff_v, + int32_t width, int32_t height) +{ + uint32_t num_scan_pos = width * width; + uint32_t next_scan_pos = 0; + int32_t xx, yy, x, y; + uint32_t scan_line; + uint32_t blk_y, blk_x; + uint32_t blk; + uint32_t cnt = 0; + + assert(width == height && width <= 32); + + if (width <= 4) { + uint32_t *buff_tmp = buff_d; + + for (scan_line = 0; next_scan_pos < num_scan_pos; scan_line++) { + int primary_dim = scan_line; + int second_dim = 0; + + while (primary_dim >= width) { + second_dim++; + primary_dim--; + } + + while (primary_dim >= 0 && second_dim < width) { + buff_tmp[next_scan_pos] = primary_dim * width + second_dim ; + next_scan_pos++; + second_dim++; + primary_dim--; + } + } + } + + if (width > 4 && width <= 32) { + uint32_t num_blk_side = width >> 2; + uint32_t num_blks = num_blk_side * num_blk_side; + uint32_t log2_width = g_to_bits[width]; + + for (blk = 0; blk < num_blks; blk++) { + uint32_t init_blk_pos = g_sig_last_scan_cg[log2_width][SCAN_DIAG][blk]; + next_scan_pos = 0; + + { + uint32_t offset_y = init_blk_pos / num_blk_side; + uint32_t offset_x = init_blk_pos - offset_y * num_blk_side; + uint32_t offset_d = 4 * (offset_x + offset_y * width); + uint32_t offset_scan = 16 * blk; + + for (scan_line = 0; next_scan_pos < 16; scan_line++) { + int primary_dim = scan_line; + int second_dim = 0; + + //TODO: optimize + while (primary_dim >= 4) { + second_dim++; + primary_dim--; + } + + while (primary_dim >= 0 && second_dim < 4) { + buff_d[next_scan_pos + offset_scan] = primary_dim * width + + second_dim + offset_d; + next_scan_pos++; + second_dim++; + primary_dim--; + } + } + } + } + } + + if (width > 2) { + uint32_t num_blk_side = width >> 2; + + for (blk_y = 0; blk_y < num_blk_side; blk_y++) { + for (blk_x = 0; blk_x < num_blk_side; blk_x++) { + uint32_t offset = blk_y * 4 * width + blk_x * 4; + + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + buff_h[cnt] = y * width + x + offset; + cnt ++; + } + } + } + } + + cnt = 0; + + for (blk_x = 0; blk_x < num_blk_side; blk_x++) { + for (blk_y = 0; blk_y < num_blk_side; blk_y++) { + uint32_t offset = blk_y * 4 * width + blk_x * 4; + + for (x = 0; x < 4; x++) { + for (y = 0; y < 4; y++) { + buff_v[cnt] = y * width + x + offset; + cnt ++; + } + } + } + } + } else { + for (yy = 0; yy < height; yy++) { + for (xx = 0; xx < width; xx++) { + buff_h[cnt] = yy * width + xx; + cnt ++; + } + } + + cnt = 0; + + for (xx = 0; xx < width; xx++) { + for (yy = 0; yy < height; yy++) { + buff_v[cnt] = yy * width + xx; + cnt ++; + } + } + } +} + + +void init_tables(void) +{ + int i; + int c = 0; + + memset( g_convert_to_bit,-1, sizeof( g_convert_to_bit ) ); + + for (i = 4; i < LCU_WIDTH; i *= 2) { + g_convert_to_bit[i] = (int8_t)c; + c++; + } + + g_convert_to_bit[i] = (int8_t)c; + + c = 2; + for (i = 0; i < 5; i++) { + uint32_t *sls0, *sls1, *sls2; + sls0 = (uint32_t*)malloc(c*c*sizeof(uint32_t)); + sls1 = (uint32_t*)malloc(c*c*sizeof(uint32_t)); + sls2 = (uint32_t*)malloc(c*c*sizeof(uint32_t)); + + init_sig_last_scan(sls0, sls1, sls2, c, c); + + g_sig_last_scan[0][i] = sls0; + g_sig_last_scan[1][i] = sls1; + g_sig_last_scan[2][i] = sls2; + + c <<= 1; + } +} + +void free_tables(void) +{ + int i; + for (i = 0; i < 5; i++) { + FREE_POINTER(g_sig_last_scan[0][i]); + FREE_POINTER(g_sig_last_scan[1][i]); + FREE_POINTER(g_sig_last_scan[2][i]); + } +} + +#else //USING_GENERATED_TABLES +//do nothing (precomputed) +void init_tables(void) {} +void free_tables(void) {} +#endif + + +#if MAKE_GENERATE_TABLES == 1 && DONT_USE_GENERATED_TABLES == 1 +int main() { + int i, c, j, h; + printf("//This file is automatically generated by generate_tables, do not edit.\n\n"); + printf("#ifndef TABLES_GENERATED_H_\n"); + printf("#define TABLES_GENERATED_H_\n\n"); + printf("#include \"global.h\"\n\n"); + printf("#if LCU_WIDTH==%d && !DONT_USE_GENERATED_TABLES\n", LCU_WIDTH); + printf("#define USING_GENERATED_TABLES\n\n"); + + init_tables(); + + printf("static const int8_t g_convert_to_bit[LCU_WIDTH + 1] = {"); + for (i=0; i < LCU_WIDTH + 1; ++i) { + if (i!=LCU_WIDTH) { + printf("%d, ", g_convert_to_bit[i]); + } else { + printf("%d", g_convert_to_bit[i]); + } + } + printf("};\n\n"); + + c = 2; + for (i = 0; i < 5; i++) { + for (h = 0; h < 3; h++) { + printf("static const uint32_t g_sig_last_scan_%d_%d[%d] = {",h,i,c*c); + + for (j = 0; j < c*c; ++j) { + if (j!=c*c-1) { + printf("%u, ", g_sig_last_scan[h][i][j]); + } else { + printf("%u", g_sig_last_scan[h][i][j]); + } + } + printf("};\n"); + } + printf("\n"); + c <<= 1; + } + + printf("static const uint32_t* const g_sig_last_scan[3][5] = {\n"); + for (h = 0; h < 3; h++) { + printf(" {"); + for (i = 0; i < 5; i++) { + if (i!=4) { + printf("g_sig_last_scan_%d_%d, ", h, i); + } else { + printf("g_sig_last_scan_%d_%d", h, i); + } + } + if (h<2) { + printf("},\n"); + } else { + printf("}\n"); + } + } + printf("};\n"); + + printf("#endif //LCU_WIDTH==%d\n", LCU_WIDTH); + printf("#endif //TABLES_GENERATED_H_\n"); + return 0; +} +#endif diff --git a/src/tables.h b/src/tables.h new file mode 100644 index 00000000..29e9023e --- /dev/null +++ b/src/tables.h @@ -0,0 +1,109 @@ +#ifndef TABLES_H_ +#define TABLES_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2014 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see . + ****************************************************************************/ + +#include "global.h" +#include "tables_generated.h" + +void init_tables(void); +void free_tables(void); + +//4 8 16 32 64 128 +//0 1 2 3 4 5 +static const uint8_t g_to_bits[129] = +{ + 0, + 0,0,0,0, + 0,0,0,1, + 0,0,0,0,0,0,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5 +}; +#define TOBITS(len) g_to_bits[len] + +/*** + * List of indices for 4x4 coefficient groups within 8x8 transform block. + * First index: 0 = diagonal, 1 = vertical, 2 horizontal scan pattern. + * Second index: (log2 - 2) size of transform block. 4x4 .. 32x32 + */ +static const uint32_t g_sig_last_scan_8x8[3][4] = +{ {0, 2, 1, 3}, + {0, 1, 2, 3}, + {0, 2, 1, 3} +}; + +/*** + * List of indices for 4x4 coefficient groups within 16x16 transform block. + */ +static const uint32_t g_sig_last_scan_16x16[16] = { + 0, 4, 1, 8, + 5, 2, 12, 9, + 6, 3, 13, 10, + 7, 14, 11, 15 +}; + +/*** + * List of indices for 4x4 coefficient groups within 32x32 transform block. + */ +static const uint32_t g_sig_last_scan_32x32[64] = { + 0, 8, 1, 16, 9, 2, 24, 17, + 10, 3, 32, 25, 18, 11, 4, 40, + 33, 26, 19, 12, 5, 48, 41, 34, + 27, 20, 13, 6, 56, 49, 42, 35, + 28, 21, 14, 7, 57, 50, 43, 36, + 29, 22, 15, 58, 51, 44, 37, 30, + 23, 59, 52, 45, 38, 31, 60, 53, + 46, 39, 61, 54, 47, 62, 55, 63 +}; + +/** + * List of pointers to coefficient group mappings. + * First index: (log2 - 2) of transform block size + * Second index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical + */ +static const uint32_t *const g_sig_last_scan_cg[4][3] = { + { g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] }, // 4x4, only first element is used + { g_sig_last_scan_8x8[0], g_sig_last_scan_8x8[1], g_sig_last_scan_8x8[2] }, + { g_sig_last_scan_16x16, 0, 0 }, + { g_sig_last_scan_32x32, 0, 0 } +}; + + +enum COEFF_SCAN_TYPE +{ + SCAN_DIAG = 0, // up-right diagonal scan + SCAN_HOR, // horizontal first scan + SCAN_VER // vertical first scan +}; + + +#ifndef USING_GENERATED_TABLES +/** + * List of mappings for coefficients within a transform block. + * First index: scan pattern 0 = diagonal, 1 = horizontal, 2 = vertical + * Second index: (log2 - 1) size of transform block. 2x2 .. 32x32 + */ +extern const uint32_t* g_sig_last_scan[3][5]; + +extern int8_t g_convert_to_bit[LCU_WIDTH + 1]; +#endif +#endif //TABLES_H_ diff --git a/src/tables_generated.h b/src/tables_generated.h new file mode 100644 index 00000000..dc916030 --- /dev/null +++ b/src/tables_generated.h @@ -0,0 +1 @@ +//This file will be autogenerated by generate_tables