mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Merge branch 'improve-intra-search'
This commit is contained in:
commit
153afc6739
19
README.md
19
README.md
|
@ -145,11 +145,20 @@ Video structure:
|
|||
- frametile: Constrain within the tile.
|
||||
- frametilemargin: Constrain even more.
|
||||
--roi <filename> : Use a delta QP map for region of interest.
|
||||
Reads an array of delta QP values from a text
|
||||
file. The file format is: width and height of
|
||||
the QP delta map followed by width*height delta
|
||||
QP values in raster order. The map can be of any
|
||||
size and will be scaled to the video size.
|
||||
Reads an array of delta QP values from a file.
|
||||
Text and binary files are supported and detected
|
||||
from the file extension (.txt/.bin). If a known
|
||||
extension is not found, the file is treated as
|
||||
a text file. The file can include one or many
|
||||
ROI frames each in the following format:
|
||||
width and height of the QP delta map followed
|
||||
by width * height delta QP values in raster
|
||||
order. In binary format, width and height are
|
||||
32-bit integers whereas the delta QP values are
|
||||
signed 8-bit values. The map can be of any size
|
||||
and will be scaled to the video size. The file
|
||||
reading will loop if end of the file is reached.
|
||||
See roi.txt in the examples folder.
|
||||
--set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.
|
||||
in PPS and slice_qp_delta in slize header zero.
|
||||
--(no-)erp-aqp : Use adaptive QP for 360 degree video with
|
||||
|
|
19
doc/uvg266.1
19
doc/uvg266.1
|
@ -164,11 +164,20 @@ Constrain movement vectors. [none]
|
|||
.TP
|
||||
\fB\-\-roi <filename>
|
||||
Use a delta QP map for region of interest.
|
||||
Reads an array of delta QP values from a text
|
||||
file. The file format is: width and height of
|
||||
the QP delta map followed by width*height delta
|
||||
QP values in raster order. The map can be of any
|
||||
size and will be scaled to the video size.
|
||||
Reads an array of delta QP values from a file.
|
||||
Text and binary files are supported and detected
|
||||
from the file extension (.txt/.bin). If a known
|
||||
extension is not found, the file is treated as
|
||||
a text file. The file can include one or many
|
||||
ROI frames each in the following format:
|
||||
width and height of the QP delta map followed
|
||||
by width * height delta QP values in raster
|
||||
order. In binary format, width and height are
|
||||
32\-bit integers whereas the delta QP values are
|
||||
signed 8\-bit values. The map can be of any size
|
||||
and will be scaled to the video size. The file
|
||||
reading will loop if end of the file is reached.
|
||||
See roi.txt in the examples folder.
|
||||
.TP
|
||||
\fB\-\-set\-qp\-in\-cu
|
||||
Set QP at CU level keeping pic_init_qp_minus26.
|
||||
|
|
|
@ -1236,19 +1236,19 @@ static void code_alf_ctu_filter_index(encoder_state_t * const state,
|
|||
assert(filter_set_idx < num_available_filt_sets); //"temporal non-latest set"
|
||||
if (num_aps > 1)
|
||||
{
|
||||
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx - ALF_NUM_FIXED_FILTER_SETS, num_available_filt_sets - ALF_NUM_FIXED_FILTER_SETS);
|
||||
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx - ALF_NUM_FIXED_FILTER_SETS, num_available_filt_sets - ALF_NUM_FIXED_FILTER_SETS, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //"fixed set larger than temporal"
|
||||
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS);
|
||||
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS, NULL);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //Fixed set numavail < num_fixed
|
||||
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS);
|
||||
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "bitstream.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
|
22
src/cabac.c
22
src/cabac.c
|
@ -70,6 +70,7 @@ void uvg_cabac_start(cabac_data_t * const data)
|
|||
data->num_buffered_bytes = 0;
|
||||
data->buffered_byte = 0xff;
|
||||
data->only_count = 0; // By default, write bits out
|
||||
data->update = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -199,7 +200,7 @@ void uvg_cabac_encode_bin_trm(cabac_data_t * const data, const uint8_t bin_value
|
|||
/**
|
||||
* \brief encode truncated binary code
|
||||
*/
|
||||
void uvg_cabac_encode_trunc_bin(cabac_data_t * const data, const uint32_t bin_value, const uint32_t max_value) {
|
||||
void uvg_cabac_encode_trunc_bin(cabac_data_t * const data, const uint32_t bin_value, const uint32_t max_value, double* bits_out) {
|
||||
int thresh;
|
||||
int symbol = bin_value;
|
||||
if (max_value > 256) {
|
||||
|
@ -219,9 +220,11 @@ void uvg_cabac_encode_trunc_bin(cabac_data_t * const data, const uint32_t bin_va
|
|||
int b = max_value - val;
|
||||
if (symbol < val - b) {
|
||||
CABAC_BINS_EP(data, symbol, thresh, "TruncSymbols");
|
||||
if (bits_out) *bits_out += thresh;
|
||||
} else {
|
||||
symbol += val - b;
|
||||
CABAC_BINS_EP(data, symbol, thresh + 1, "TruncSymbols");
|
||||
if (bits_out) *bits_out += thresh + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -349,7 +352,12 @@ void uvg_cabac_write_coeff_remain(cabac_data_t * const cabac, const uint32_t rem
|
|||
/**
|
||||
* \brief
|
||||
*/
|
||||
void uvg_cabac_write_unary_max_symbol(cabac_data_t * const data, cabac_ctx_t * const ctx, uint32_t symbol, const int32_t offset, const uint32_t max_symbol)
|
||||
void uvg_cabac_write_unary_max_symbol(cabac_data_t * const data,
|
||||
cabac_ctx_t * const ctx,
|
||||
uint32_t symbol,
|
||||
const int32_t offset,
|
||||
const uint32_t max_symbol,
|
||||
double* bits_out)
|
||||
{
|
||||
int8_t code_last = max_symbol > symbol;
|
||||
|
||||
|
@ -357,18 +365,17 @@ void uvg_cabac_write_unary_max_symbol(cabac_data_t * const data, cabac_ctx_t * c
|
|||
|
||||
if (!max_symbol) return;
|
||||
|
||||
data->cur_ctx = ctx;
|
||||
CABAC_BIN(data, symbol, "ums");
|
||||
CABAC_FBITS_UPDATE(data, ctx, symbol, *bits_out, "ums");
|
||||
|
||||
if (!symbol) return;
|
||||
|
||||
data->cur_ctx = &ctx[offset];
|
||||
|
||||
while (--symbol) {
|
||||
CABAC_BIN(data, 1, "ums");
|
||||
CABAC_FBITS_UPDATE(data, &ctx[offset], 1, *bits_out, "ums");
|
||||
}
|
||||
if (code_last) {
|
||||
CABAC_BIN(data, 0, "ums");
|
||||
CABAC_FBITS_UPDATE(data, &ctx[offset], 0,*bits_out, "ums");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -405,7 +412,7 @@ void uvg_cabac_write_unary_max_symbol_ep(cabac_data_t * const data, unsigned int
|
|||
/**
|
||||
* \brief
|
||||
*/
|
||||
void uvg_cabac_write_ep_ex_golomb(encoder_state_t * const state,
|
||||
uint32_t uvg_cabac_write_ep_ex_golomb(encoder_state_t * const state,
|
||||
cabac_data_t * const data,
|
||||
uint32_t symbol,
|
||||
uint32_t count)
|
||||
|
@ -426,4 +433,5 @@ void uvg_cabac_write_ep_ex_golomb(encoder_state_t * const state,
|
|||
num_bins += count;
|
||||
|
||||
CABAC_BINS_EP(data, bins, num_bins, "ep_ex_golomb");
|
||||
return num_bins;
|
||||
}
|
||||
|
|
39
src/cabac.h
39
src/cabac.h
|
@ -59,7 +59,8 @@ typedef struct
|
|||
uint32_t buffered_byte;
|
||||
int32_t num_buffered_bytes;
|
||||
int32_t bits_left;
|
||||
int8_t only_count;
|
||||
int8_t only_count : 4;
|
||||
int8_t update : 4;
|
||||
bitstream_t *stream;
|
||||
|
||||
// CONTEXTS
|
||||
|
@ -133,18 +134,18 @@ extern const uint8_t uvg_g_auc_renorm_table[32];
|
|||
void uvg_cabac_start(cabac_data_t *data);
|
||||
void uvg_cabac_encode_bin(cabac_data_t *data, uint32_t bin_value);
|
||||
void uvg_cabac_encode_bin_ep(cabac_data_t *data, uint32_t bin_value);
|
||||
void uvg_cabac_encode_trunc_bin(cabac_data_t *data, uint32_t bin_value, uint32_t max_value);
|
||||
void uvg_cabac_encode_trunc_bin(cabac_data_t *data, uint32_t bin_value, uint32_t max_value, double* bits_out);
|
||||
void uvg_cabac_encode_bins_ep(cabac_data_t *data, uint32_t bin_values, int num_bins);
|
||||
void uvg_cabac_encode_bin_trm(cabac_data_t *data, uint8_t bin_value);
|
||||
void uvg_cabac_write(cabac_data_t *data);
|
||||
void uvg_cabac_finish(cabac_data_t *data);
|
||||
void uvg_cabac_write_coeff_remain(cabac_data_t *cabac, uint32_t symbol,
|
||||
uint32_t r_param, const unsigned int cutoff);
|
||||
void uvg_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data,
|
||||
uint32_t uvg_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data,
|
||||
uint32_t symbol, uint32_t count);
|
||||
void uvg_cabac_write_unary_max_symbol(cabac_data_t *data, cabac_ctx_t *ctx,
|
||||
uint32_t symbol, int32_t offset,
|
||||
uint32_t max_symbol);
|
||||
uint32_t symbol, int32_t offset,
|
||||
uint32_t max_symbol, double* bits_out);
|
||||
void uvg_cabac_write_unary_max_symbol_ep(cabac_data_t *data, unsigned int symbol, unsigned int max_symbol);
|
||||
|
||||
#define CTX_PROB_BITS 15
|
||||
|
@ -153,6 +154,18 @@ void uvg_cabac_write_unary_max_symbol_ep(cabac_data_t *data, unsigned int symbol
|
|||
#define CTX_MASK_0 (~(~0u << CTX_PROB_BITS_0) << (CTX_PROB_BITS - CTX_PROB_BITS_0))
|
||||
#define CTX_MASK_1 (~(~0u << CTX_PROB_BITS_1) << (CTX_PROB_BITS - CTX_PROB_BITS_1))
|
||||
|
||||
// Floating point fractional bits, derived from kvz_entropy_bits
|
||||
extern const float uvg_f_entropy_bits[512];
|
||||
#define CTX_ENTROPY_FBITS(ctx, val) uvg_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
|
||||
|
||||
#define CABAC_FBITS_UPDATE(cabac, ctx, val, bits, name) do { \
|
||||
if((cabac)->only_count) (bits) += uvg_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]; \
|
||||
if((cabac)->update) {\
|
||||
(cabac)->cur_ctx = ctx;\
|
||||
CABAC_BIN((cabac), (val), (name));\
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// Macros
|
||||
#define CTX_GET_STATE(ctx) ( (ctx)->state[0]+(ctx)->state[1] )
|
||||
#define CTX_STATE(ctx) ( CTX_GET_STATE(ctx)>>8 )
|
||||
|
@ -185,23 +198,23 @@ extern uint32_t uvg_cabac_bins_count;
|
|||
extern bool uvg_cabac_bins_verbose;
|
||||
#define CABAC_BIN(data, value, name) { \
|
||||
uint32_t prev_state = CTX_STATE(data->cur_ctx); \
|
||||
if(uvg_cabac_bins_verbose && !data->only_count) {printf("%d %d [%d:%d] %s = %u, range = %u LPS = %u state = %u -> ", \
|
||||
uvg_cabac_bins_count++, (data)->range, (data)->range-CTX_LPS(data->cur_ctx,(data)->range), CTX_LPS(data->cur_ctx,(data)->range), (name), (uint32_t)(value), (data)->range, CTX_LPS(data->cur_ctx,(data)->range), prev_state); }\
|
||||
if(uvg_cabac_bins_verbose && !(data)->only_count) {printf("%d %d [%d:%d] %s = %u, range = %u LPS = %u state = %u -> ", \
|
||||
uvg_cabac_bins_count++, (data)->range, (data)->range-CTX_LPS((data)->cur_ctx,(data)->range), CTX_LPS((data)->cur_ctx,(data)->range), (name), (uint32_t)(value), (data)->range, CTX_LPS((data)->cur_ctx,(data)->range), prev_state); }\
|
||||
uvg_cabac_encode_bin((data), (value)); \
|
||||
if(uvg_cabac_bins_verbose && !data->only_count) printf("%u\n", CTX_STATE(data->cur_ctx)); }
|
||||
if(uvg_cabac_bins_verbose && !(data)->only_count) printf("%u\n", CTX_STATE((data)->cur_ctx)); }
|
||||
|
||||
|
||||
#define CABAC_BINS_EP(data, value, bins, name) { \
|
||||
uint32_t prev_state = CTX_STATE(data->cur_ctx); \
|
||||
uint32_t prev_state = (!(data)->only_count) ? CTX_STATE(data->cur_ctx) : 0; \
|
||||
uvg_cabac_encode_bins_ep((data), (value), (bins)); \
|
||||
if(uvg_cabac_bins_verbose && !data->only_count) { printf("%d %s = %u(%u bins), state = %u -> %u\n", \
|
||||
uvg_cabac_bins_count, (name), (uint32_t)(value), (bins), prev_state, CTX_STATE(data->cur_ctx)); uvg_cabac_bins_count+=bins;}}
|
||||
uvg_cabac_bins_count, (name), (uint32_t)(value), (bins), prev_state, CTX_STATE((data)->cur_ctx)); uvg_cabac_bins_count+=(bins);}}
|
||||
|
||||
#define CABAC_BIN_EP(data, value, name) { \
|
||||
uint32_t prev_state = CTX_STATE(data->cur_ctx); \
|
||||
uint32_t prev_state = (!(data)->only_count) ? CTX_STATE((data)->cur_ctx) : 0;; \
|
||||
uvg_cabac_encode_bin_ep((data), (value)); \
|
||||
if(uvg_cabac_bins_verbose && !data->only_count) {printf("%d %s = %u, state = %u -> %u\n", \
|
||||
uvg_cabac_bins_count++, (name), (uint32_t)(value), prev_state, CTX_STATE(data->cur_ctx)); }}
|
||||
if(uvg_cabac_bins_verbose && !(data)->only_count) {printf("%d %s = %u, state = %u -> %u\n", \
|
||||
uvg_cabac_bins_count++, (name), (uint32_t)(value), prev_state, CTX_STATE((data)->cur_ctx)); }}
|
||||
#else
|
||||
#define CABAC_BIN(data, value, name) \
|
||||
uvg_cabac_encode_bin((data), (value));
|
||||
|
|
84
src/cfg.c
84
src/cfg.c
|
@ -147,9 +147,9 @@ int uvg_config_init(uvg_config *cfg)
|
|||
cfg->gop_lp_definition.t = 1;
|
||||
cfg->open_gop = true;
|
||||
|
||||
cfg->roi.width = 0;
|
||||
cfg->roi.height = 0;
|
||||
cfg->roi.dqps = NULL;
|
||||
cfg->roi.file_path = NULL;
|
||||
cfg->roi.format = UVG_ROI_TXT;
|
||||
|
||||
cfg->set_qp_in_cu = false;
|
||||
|
||||
cfg->erp_aqp = false;
|
||||
|
@ -212,6 +212,9 @@ int uvg_config_init(uvg_config *cfg)
|
|||
|
||||
cfg->cclm = 0;
|
||||
|
||||
|
||||
cfg->combine_intra_cus = 1;
|
||||
cfg->force_inter = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -219,11 +222,11 @@ int uvg_config_destroy(uvg_config *cfg)
|
|||
{
|
||||
if (cfg) {
|
||||
FREE_POINTER(cfg->cqmfile);
|
||||
FREE_POINTER(cfg->roi.file_path);
|
||||
FREE_POINTER(cfg->fast_coeff_table_fn);
|
||||
FREE_POINTER(cfg->tiles_width_split);
|
||||
FREE_POINTER(cfg->tiles_height_split);
|
||||
FREE_POINTER(cfg->slice_addresses_in_ts);
|
||||
FREE_POINTER(cfg->roi.dqps);
|
||||
FREE_POINTER(cfg->fastrd_learning_outdir_fn);
|
||||
}
|
||||
free(cfg);
|
||||
|
@ -1269,60 +1272,29 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
|||
}
|
||||
else if OPT("implicit-rdpcm")
|
||||
cfg->implicit_rdpcm = (bool)atobool(value);
|
||||
|
||||
else if OPT("roi") {
|
||||
// The ROI description is as follows:
|
||||
// First number is width, second number is height,
|
||||
// then follows width * height number of dqp values.
|
||||
FILE* f = fopen(value, "rb");
|
||||
if (!f) {
|
||||
fprintf(stderr, "Could not open ROI file.\n");
|
||||
static enum uvg_roi_format const formats[] = { UVG_ROI_TXT, UVG_ROI_BIN };
|
||||
static const char * const format_names[] = { "txt", "bin", NULL };
|
||||
|
||||
char *roi_file = strdup(value);
|
||||
if (!roi_file) {
|
||||
fprintf(stderr, "Failed to allocate memory for ROI file name.\n");
|
||||
return 0;
|
||||
}
|
||||
FREE_POINTER(cfg->roi.file_path);
|
||||
cfg->roi.file_path = roi_file;
|
||||
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
if (!fscanf(f, "%d", &width) || !fscanf(f, "%d", &height)) {
|
||||
fprintf(stderr, "Failed to read ROI size.\n");
|
||||
fclose(f);
|
||||
return 0;
|
||||
// Get file extension or the substring after the last dot
|
||||
char *maybe_extension = strrchr(cfg->roi.file_path, '.');
|
||||
if (!maybe_extension) {
|
||||
cfg->roi.format = UVG_ROI_TXT;
|
||||
} else {
|
||||
maybe_extension++;
|
||||
int8_t format;
|
||||
bool unknown_format = !parse_enum(maybe_extension, format_names, &format);
|
||||
cfg->roi.format = unknown_format ? UVG_ROI_TXT : formats[format];
|
||||
}
|
||||
|
||||
if (width <= 0 || height <= 0) {
|
||||
fprintf(stderr, "Invalid ROI size: %dx%d.\n", width, height);
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (width > 10000 || height > 10000) {
|
||||
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned size = width * height;
|
||||
int8_t *dqp_array = calloc((size_t)size, sizeof(cfg->roi.dqps[0]));
|
||||
if (!dqp_array) {
|
||||
fprintf(stderr, "Failed to allocate memory for ROI table.\n");
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
FREE_POINTER(cfg->roi.dqps);
|
||||
cfg->roi.dqps = dqp_array;
|
||||
cfg->roi.width = width;
|
||||
cfg->roi.height = height;
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
int number; // Need a pointer to int for fscanf
|
||||
if (fscanf(f, "%d", &number) != 1) {
|
||||
fprintf(stderr, "Reading ROI file failed.\n");
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
dqp_array[i] = CLIP(-51, 51, number);
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
else if OPT("set-qp-in-cu") {
|
||||
cfg->set_qp_in_cu = (bool)atobool(value);
|
||||
|
@ -1476,6 +1448,12 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
|||
else if OPT("cclm") {
|
||||
cfg->cclm = (bool)atobool(value);
|
||||
}
|
||||
else if OPT("combine-intra-cus") {
|
||||
cfg->combine_intra_cus = atobool(value);
|
||||
}
|
||||
else if OPT("force-inter") {
|
||||
cfg->force_inter = atobool(value);
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
|
|
34
src/cli.c
34
src/cli.c
|
@ -141,6 +141,7 @@ static const struct option long_options[] = {
|
|||
{ "force-level", required_argument, NULL, 0 },
|
||||
{ "high-tier", no_argument, NULL, 0 },
|
||||
{ "me-steps", required_argument, NULL, 0 },
|
||||
{ "roi-file", required_argument, NULL, 0 },
|
||||
{ "fast-residual-cost", required_argument, NULL, 0 },
|
||||
{ "set-qp-in-cu", no_argument, NULL, 0 },
|
||||
{ "open-gop", no_argument, NULL, 0 },
|
||||
|
@ -179,6 +180,10 @@ static const struct option long_options[] = {
|
|||
{ "no-amvr", no_argument, NULL, 0 },
|
||||
{ "cclm", no_argument, NULL, 0 },
|
||||
{ "no-cclm", no_argument, NULL, 0 },
|
||||
{ "combine-intra-cus", no_argument, NULL, 0 },
|
||||
{ "no-combine-intra-cus", no_argument, NULL, 0 },
|
||||
{ "force-inter", no_argument, NULL, 0 },
|
||||
{ "no-force-inter", no_argument, NULL, 0 },
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
@ -499,11 +504,20 @@ void print_help(void)
|
|||
" - frametile: Constrain within the tile.\n"
|
||||
" - frametilemargin: Constrain even more.\n"
|
||||
" --roi <filename> : Use a delta QP map for region of interest.\n"
|
||||
" Reads an array of delta QP values from a text\n"
|
||||
" file. The file format is: width and height of\n"
|
||||
" the QP delta map followed by width*height delta\n"
|
||||
" QP values in raster order. The map can be of any\n"
|
||||
" size and will be scaled to the video size.\n"
|
||||
" Reads an array of delta QP values from a file.\n"
|
||||
" Text and binary files are supported and detected\n"
|
||||
" from the file extension (.txt/.bin). If a known\n"
|
||||
" extension is not found, the file is treated as\n"
|
||||
" a text file. The file can include one or many\n"
|
||||
" ROI frames each in the following format:\n"
|
||||
" width and height of the QP delta map followed\n"
|
||||
" by width * height delta QP values in raster\n"
|
||||
" order. In binary format, width and height are\n"
|
||||
" 32-bit integers whereas the delta QP values are\n"
|
||||
" signed 8-bit values. The map can be of any size\n"
|
||||
" and will be scaled to the video size. The file\n"
|
||||
" reading will loop if end of the file is reached.\n"
|
||||
" See roi.txt in the examples folder.\n"
|
||||
" --set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.\n"
|
||||
" in PPS and slice_qp_delta in slize header zero.\n"
|
||||
" --(no-)erp-aqp : Use adaptive QP for 360 degree video with\n"
|
||||
|
@ -587,6 +601,16 @@ void print_help(void)
|
|||
" --ml-pu-depth-intra : Predict the pu-depth-intra using machine\n"
|
||||
" learning trees, overrides the\n"
|
||||
" --pu-depth-intra parameter. [disabled]\n"
|
||||
" --(no-)combine-intra-cus: Whether the encoder tries to code a cu\n"
|
||||
" on lower depth even when search is not\n"
|
||||
" performed on said depth. Should only\n"
|
||||
" be disabled if cus absolutely must not\n"
|
||||
" be larger than limited by the search.\n"
|
||||
" [enabled]"
|
||||
" --force-inter : Force the encoder to use inter always.\n"
|
||||
" This is mostly for debugging and is not\n"
|
||||
" guaranteed to produce sensible bitstream or\n"
|
||||
" work at all. [disabled]"
|
||||
" --tr-depth-intra <int> : Transform split depth for intra blocks [0]\n"
|
||||
" --(no-)bipred : Bi-prediction [disabled]\n"
|
||||
" --cu-split-termination <string> : CU split search termination [zero]\n"
|
||||
|
|
12
src/cu.h
12
src/cu.h
|
@ -148,7 +148,7 @@ typedef struct
|
|||
uint8_t merge_idx : 3; //!< \brief merge index
|
||||
uint8_t tr_skip : 1; //!< \brief transform skip flag
|
||||
uint8_t tr_idx : 3; //!< \brief transform index
|
||||
uint8_t joint_cb_cr : 2; //!< \brief joint chroma residual coding
|
||||
uint8_t joint_cb_cr : 3; //!< \brief joint chroma residual coding
|
||||
|
||||
uint16_t cbf;
|
||||
|
||||
|
@ -183,6 +183,16 @@ typedef struct
|
|||
};
|
||||
} cu_info_t;
|
||||
|
||||
typedef struct {
|
||||
int16_t x;
|
||||
int16_t y;
|
||||
int8_t width;
|
||||
int8_t height;
|
||||
int8_t chroma_width;
|
||||
int8_t chroma_height;
|
||||
} cu_loc_t;
|
||||
|
||||
|
||||
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
|
||||
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)
|
||||
|
||||
|
|
|
@ -441,6 +441,7 @@ int main(int argc, char *argv[])
|
|||
FILE *input = NULL; //!< input file (YUV)
|
||||
FILE *output = NULL; //!< output file (HEVC NAL stream)
|
||||
FILE *recout = NULL; //!< reconstructed YUV output, --debug
|
||||
FILE *roifile = NULL;
|
||||
clock_t start_time = clock();
|
||||
clock_t encoding_start_cpu_time;
|
||||
UVG_CLOCK_T encoding_start_real_time;
|
||||
|
@ -587,7 +588,7 @@ int main(int argc, char *argv[])
|
|||
// Give arguments via struct to the input thread
|
||||
input_handler_args in_args = {
|
||||
.available_input_slots = available_input_slots,
|
||||
.filled_input_slots = filled_input_slots,
|
||||
.filled_input_slots = filled_input_slots,
|
||||
|
||||
.input = input,
|
||||
.api = api,
|
||||
|
@ -828,6 +829,7 @@ done:
|
|||
if (input) fclose(input);
|
||||
if (output) fclose(output);
|
||||
if (recout) fclose(recout);
|
||||
if (roifile) fclose(roifile);
|
||||
|
||||
DBG_YUVIEW_CLEANUP();
|
||||
CHECKPOINTS_FINALIZE();
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -56,7 +56,33 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
|
|||
void uvg_encode_mvd(encoder_state_t * const state,
|
||||
cabac_data_t *cabac,
|
||||
int32_t mvd_hor,
|
||||
int32_t mvd_ver);
|
||||
int32_t mvd_ver,
|
||||
double* bits_out);
|
||||
|
||||
double uvg_mock_encode_coding_unit(
|
||||
encoder_state_t* const state,
|
||||
cabac_data_t* cabac,
|
||||
int x, int y, int depth,
|
||||
lcu_t* lcu, cu_info_t* cur_cu);
|
||||
|
||||
int uvg_encode_inter_prediction_unit(encoder_state_t* const state,
|
||||
cabac_data_t* const cabac,
|
||||
const cu_info_t* const cur_cu,
|
||||
int x, int y, int width, int height,
|
||||
int depth,
|
||||
lcu_t* lcu,
|
||||
double* bits_out);
|
||||
|
||||
void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state,
|
||||
cabac_data_t* const cabac,
|
||||
const cu_info_t* const cur_cu,
|
||||
int x, int y, int depth, const lcu_t* lcu, double* bits_out);
|
||||
|
||||
|
||||
bool uvg_write_split_flag(const encoder_state_t* const state, cabac_data_t* cabac,
|
||||
const cu_info_t* left_cu, const cu_info_t* above_cu,
|
||||
uint8_t split_flag,
|
||||
int depth, int cu_width, int x, int y, double* bits_out);
|
||||
|
||||
void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
|
||||
uint8_t lastpos_x, uint8_t lastpos_y,
|
||||
|
|
112
src/encoder.c
112
src/encoder.c
|
@ -32,7 +32,6 @@
|
|||
|
||||
#include "encoder.h"
|
||||
|
||||
// This define is required for M_PI on Windows.
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
@ -45,14 +44,6 @@
|
|||
#include "uvg_math.h"
|
||||
#include "fast_coeff_cost.h"
|
||||
|
||||
/**
|
||||
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
|
||||
*
|
||||
* Determined empirically.
|
||||
*/
|
||||
static const double ERP_AQP_STRENGTH = 3.0;
|
||||
|
||||
|
||||
static int encoder_control_init_gop_layer_weights(encoder_control_t * const);
|
||||
|
||||
static unsigned cfg_num_threads(void)
|
||||
|
@ -136,22 +127,6 @@ static int get_max_parallelism(const encoder_control_t *const encoder)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Return weight for 360 degree ERP video
|
||||
*
|
||||
* Returns the scaling factor of area from equirectangular projection to
|
||||
* spherical surface.
|
||||
*
|
||||
* \param y y-coordinate of the pixel
|
||||
* \param h height of the picture
|
||||
*/
|
||||
static double ws_weight(int y, int h)
|
||||
{
|
||||
return cos((y - 0.5 * h + 0.5) * (M_PI / h));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* \brief Update ROI QPs for 360 video with equirectangular projection.
|
||||
*
|
||||
|
@ -162,55 +137,6 @@ static double ws_weight(int y, int h)
|
|||
* \param orig_width width of orig_roi
|
||||
* \param orig_height height of orig_roi
|
||||
*/
|
||||
static void init_erp_aqp_roi(encoder_control_t* encoder,
|
||||
int8_t *orig_roi,
|
||||
int32_t orig_width,
|
||||
int32_t orig_height)
|
||||
{
|
||||
// Update ROI with WS-PSNR delta QPs.
|
||||
int height = encoder->in.height_in_lcu;
|
||||
int width = orig_roi ? orig_width : 1;
|
||||
|
||||
int frame_height = encoder->in.real_height;
|
||||
|
||||
encoder->cfg.roi.width = width;
|
||||
encoder->cfg.roi.height = height;
|
||||
encoder->cfg.roi.dqps = calloc(width * height, sizeof(orig_roi[0]));
|
||||
|
||||
double total_weight = 0.0;
|
||||
for (int y = 0; y < frame_height; y++) {
|
||||
total_weight += ws_weight(y, frame_height);
|
||||
}
|
||||
|
||||
for (int y_lcu = 0; y_lcu < height; y_lcu++) {
|
||||
int y_orig = LCU_WIDTH * y_lcu;
|
||||
int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig);
|
||||
|
||||
double lcu_weight = 0.0;
|
||||
for (int y = y_orig; y < y_orig + lcu_height; y++) {
|
||||
lcu_weight += ws_weight(y, frame_height);
|
||||
}
|
||||
// Normalize.
|
||||
lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height);
|
||||
|
||||
int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight));
|
||||
|
||||
if (orig_roi) {
|
||||
// If a ROI array already exists, we copy the existing values to the
|
||||
// new array while adding qp_delta to each.
|
||||
int y_roi = y_lcu * orig_height / height;
|
||||
for (int x = 0; x < width; x++) {
|
||||
encoder->cfg.roi.dqps[x + y_lcu * width] =
|
||||
CLIP(-51, 51, orig_roi[x + y_roi * width] + qp_delta);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Otherwise, simply write qp_delta to the ROI array.
|
||||
encoder->cfg.roi.dqps[y_lcu] = qp_delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int8_t* derive_chroma_QP_mapping_table(const uvg_config* const cfg, int i)
|
||||
{
|
||||
|
@ -394,6 +320,16 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg)
|
|||
encoder->scaling_list.use_default_list = 1;
|
||||
}
|
||||
|
||||
// ROI / delta QP
|
||||
if (cfg->roi.file_path) {
|
||||
const char *mode[2] = { "r", "rb" };
|
||||
encoder->roi_file = fopen(cfg->roi.file_path, mode[cfg->roi.format]);
|
||||
if (!encoder->roi_file) {
|
||||
fprintf(stderr, "Could not open ROI file.\n");
|
||||
goto init_failed;
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg->fast_coeff_table_fn) {
|
||||
FILE *fast_coeff_table_f = fopen(cfg->fast_coeff_table_fn, "rb");
|
||||
if (fast_coeff_table_f == NULL) {
|
||||
|
@ -435,32 +371,10 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg)
|
|||
goto init_failed;
|
||||
}
|
||||
|
||||
if (cfg->erp_aqp) {
|
||||
init_erp_aqp_roi(encoder,
|
||||
cfg->roi.dqps,
|
||||
cfg->roi.width,
|
||||
cfg->roi.height);
|
||||
|
||||
} else if (cfg->roi.dqps) {
|
||||
// Copy delta QP array for ROI coding.
|
||||
const size_t roi_size = encoder->cfg.roi.width * encoder->cfg.roi.height;
|
||||
encoder->cfg.roi.dqps = calloc(roi_size, sizeof(cfg->roi.dqps[0]));
|
||||
memcpy(encoder->cfg.roi.dqps,
|
||||
cfg->roi.dqps,
|
||||
roi_size * sizeof(*cfg->roi.dqps));
|
||||
|
||||
}
|
||||
|
||||
// NOTE: When tr_depth_inter is equal to 0, the transform is still split
|
||||
// for SMP and AMP partition units.
|
||||
encoder->tr_depth_inter = 0;
|
||||
|
||||
if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps || encoder->cfg.set_qp_in_cu || encoder->cfg.vaq) {
|
||||
encoder->max_qp_delta_depth = 0;
|
||||
} else {
|
||||
encoder->max_qp_delta_depth = -1;
|
||||
}
|
||||
|
||||
//Tiles
|
||||
encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 ||
|
||||
encoder->cfg.tiles_height_count > 1;
|
||||
|
@ -761,7 +675,7 @@ void uvg_encoder_control_free(encoder_control_t *const encoder)
|
|||
|
||||
FREE_POINTER(encoder->tiles_tile_id);
|
||||
|
||||
FREE_POINTER(encoder->cfg.roi.dqps);
|
||||
FREE_POINTER(encoder->cfg.roi.file_path);
|
||||
|
||||
uvg_scalinglist_destroy(&encoder->scaling_list);
|
||||
|
||||
|
@ -773,6 +687,10 @@ void uvg_encoder_control_free(encoder_control_t *const encoder)
|
|||
|
||||
uvg_close_rdcost_outfiles();
|
||||
|
||||
if (encoder->roi_file) {
|
||||
fclose(encoder->roi_file);
|
||||
}
|
||||
|
||||
free(encoder);
|
||||
}
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ typedef struct encoder_control_t
|
|||
//! Picture weights when GOP is used.
|
||||
double gop_layer_weights[MAX_GOP_LAYERS];
|
||||
|
||||
int8_t max_qp_delta_depth;
|
||||
FILE *roi_file;
|
||||
|
||||
int tr_depth_inter;
|
||||
|
||||
|
|
|
@ -805,7 +805,7 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
|
|||
WRITE_U(stream, 0, 1, "pps_ref_wraparound_enabled_flag");
|
||||
|
||||
WRITE_SE(stream, ((int8_t)encoder->cfg.qp) - 26, "pps_init_qp_minus26");
|
||||
WRITE_U(stream, encoder->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");
|
||||
WRITE_U(stream, state->frame->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");
|
||||
|
||||
WRITE_U(stream, 0,1, "pps_chroma_tool_offsets_present_flag");
|
||||
/* // If chroma_tool_offsets_present
|
||||
|
@ -1037,8 +1037,8 @@ static void uvg_encoder_state_write_bitstream_picture_header(
|
|||
const int poc_lsb = state->frame->poc & ((1 << encoder->poc_lsb_bits) - 1);
|
||||
WRITE_U(stream, poc_lsb, encoder->poc_lsb_bits, "ph_pic_order_cnt_lsb");
|
||||
|
||||
if (encoder->max_qp_delta_depth >= 0) {
|
||||
WRITE_UE(stream, encoder->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_intra_slice");
|
||||
if (state->frame->max_qp_delta_depth >= 0) {
|
||||
WRITE_UE(stream, state->frame->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_intra_slice");
|
||||
}
|
||||
|
||||
// alf enable flags and aps IDs
|
||||
|
@ -1118,8 +1118,8 @@ static void uvg_encoder_state_write_bitstream_picture_header(
|
|||
|| state->frame->pictype == UVG_NAL_IDR_N_LP) {
|
||||
}
|
||||
else {
|
||||
if (encoder->max_qp_delta_depth >= 0) {
|
||||
WRITE_UE(stream, encoder->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_inter_slice");
|
||||
if (state->frame->max_qp_delta_depth >= 0) {
|
||||
WRITE_UE(stream, state->frame->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_inter_slice");
|
||||
}
|
||||
if (state->encoder_control->cfg.tmvp_enable) {
|
||||
WRITE_U(stream, state->encoder_control->cfg.tmvp_enable, 1, "ph_pic_temporal_mvp_enabled_flag");
|
||||
|
@ -1128,7 +1128,7 @@ static void uvg_encoder_state_write_bitstream_picture_header(
|
|||
}
|
||||
|
||||
if (encoder->cfg.jccr) {
|
||||
WRITE_U(stream, 0, 1, "ph_joint_cbcr_sign_flag");
|
||||
WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag");
|
||||
}
|
||||
// END PICTURE HEADER
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@
|
|||
|
||||
#include "encoderstate.h"
|
||||
|
||||
// This define is required for M_PI on Windows.
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -53,6 +56,12 @@
|
|||
|
||||
#include "strategies/strategies-picture.h"
|
||||
|
||||
/**
|
||||
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
|
||||
*
|
||||
* Determined empirically.
|
||||
*/
|
||||
static const double ERP_AQP_STRENGTH = 3.0;
|
||||
|
||||
int uvg_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) {
|
||||
int i;
|
||||
|
@ -572,7 +581,7 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
|
|||
cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, x, y);
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
|
||||
if (depth <= state->encoder_control->max_qp_delta_depth) {
|
||||
if (depth <= state->frame->max_qp_delta_depth) {
|
||||
*prev_qp = -1;
|
||||
}
|
||||
|
||||
|
@ -624,6 +633,38 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void set_joint_cb_cr_modes(encoder_state_t* state, uvg_picture* pic)
|
||||
{
|
||||
bool sgnFlag = true;
|
||||
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400)
|
||||
{
|
||||
const int x1 = pic->width / 2 - 1;
|
||||
const int y1 = pic->height / 2 - 1;
|
||||
const int cbs = pic->stride / 2;
|
||||
const int crs = pic->stride / 2;
|
||||
const uvg_pixel* p_cb = pic->u + 1 * cbs;
|
||||
const uvg_pixel* p_cr = pic->v + 1 * crs;
|
||||
int64_t sum_cb_cr = 0;
|
||||
|
||||
// determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
|
||||
for (int y = 1; y < y1; y++, p_cb += cbs, p_cr += crs)
|
||||
{
|
||||
for (int x = 1; x < x1; x++)
|
||||
{
|
||||
int cb = (12 * (int)p_cb[x] - 2 * ((int)p_cb[x - 1] + (int)p_cb[x + 1] + (int)p_cb[x - cbs] + (int)p_cb[x + cbs]) - ((int)p_cb[x - 1 - cbs] + (int)p_cb[x + 1 - cbs] + (int)p_cb[x - 1 + cbs] + (int)p_cb[x + 1 + cbs]));
|
||||
int cr = (12 * (int)p_cr[x] - 2 * ((int)p_cr[x - 1] + (int)p_cr[x + 1] + (int)p_cr[x - crs] + (int)p_cr[x + crs]) - ((int)p_cr[x - 1 - crs] + (int)p_cr[x + 1 - crs] + (int)p_cr[x - 1 + crs] + (int)p_cr[x + 1 + crs]));
|
||||
sum_cb_cr += cb * cr;
|
||||
}
|
||||
}
|
||||
|
||||
sgnFlag = (sum_cb_cr < 0);
|
||||
}
|
||||
|
||||
state->frame->jccr_sign = sgnFlag;
|
||||
}
|
||||
|
||||
static void encoder_state_worker_encode_lcu_bitstream(void* opaque);
|
||||
|
||||
static void encoder_state_worker_encode_lcu_search(void * opaque)
|
||||
|
@ -665,7 +706,7 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
|||
|
||||
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
|
||||
|
||||
if (encoder->max_qp_delta_depth >= 0) {
|
||||
if (state->frame->max_qp_delta_depth >= 0) {
|
||||
int last_qp = state->last_qp;
|
||||
int prev_qp = -1;
|
||||
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
|
||||
|
@ -716,6 +757,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
|||
const uint64_t existing_bits = uvg_bitstream_tell(&state->stream);
|
||||
|
||||
//Encode SAO
|
||||
state->cabac.update = 1;
|
||||
if (encoder->cfg.sao_type) {
|
||||
encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
|
||||
}
|
||||
|
@ -771,6 +813,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
|||
uvg_cabac_start(&state->cabac);
|
||||
}
|
||||
}
|
||||
state->cabac.update = 0;
|
||||
|
||||
|
||||
pthread_mutex_lock(&state->frame->rc_lock);
|
||||
|
@ -1421,6 +1464,154 @@ static bool edge_lcu(int id, int lcus_x, int lcus_y, bool xdiv64, bool ydiv64)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Return weight for 360 degree ERP video
|
||||
*
|
||||
* Returns the scaling factor of area from equirectangular projection to
|
||||
* spherical surface.
|
||||
*
|
||||
* \param y y-coordinate of the pixel
|
||||
* \param h height of the picture
|
||||
*/
|
||||
static double ws_weight(int y, int h)
|
||||
{
|
||||
return cos((y - 0.5 * h + 0.5) * (M_PI / h));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Update ROI QPs for 360 video with equirectangular projection.
|
||||
*
|
||||
* Updates the ROI parameters in frame->roi.
|
||||
*
|
||||
* \param encoder encoder control
|
||||
* \param frame frame that will have the ROI map
|
||||
*/
|
||||
static void init_erp_aqp_roi(const encoder_control_t *encoder, uvg_picture *frame)
|
||||
{
|
||||
int8_t *orig_roi = frame->roi.roi_array;
|
||||
int32_t orig_width = frame->roi.width;
|
||||
int32_t orig_height = frame->roi.height;
|
||||
|
||||
// Update ROI with WS-PSNR delta QPs.
|
||||
int new_height = encoder->in.height_in_lcu;
|
||||
int new_width = orig_roi ? orig_width : 1;
|
||||
int8_t *new_array = calloc(new_width * new_height, sizeof(orig_roi[0]));
|
||||
|
||||
int frame_height = encoder->in.real_height;
|
||||
|
||||
double total_weight = 0.0;
|
||||
for (int y = 0; y < frame_height; y++) {
|
||||
total_weight += ws_weight(y, frame_height);
|
||||
}
|
||||
|
||||
for (int y_lcu = 0; y_lcu < new_height; y_lcu++) {
|
||||
int y_orig = LCU_WIDTH * y_lcu;
|
||||
int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig);
|
||||
|
||||
double lcu_weight = 0.0;
|
||||
for (int y = y_orig; y < y_orig + lcu_height; y++) {
|
||||
lcu_weight += ws_weight(y, frame_height);
|
||||
}
|
||||
// Normalize.
|
||||
lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height);
|
||||
|
||||
int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight));
|
||||
|
||||
if (orig_roi) {
|
||||
// If a ROI array already exists, we copy the existing values to the
|
||||
// new array while adding qp_delta to each.
|
||||
int y_roi = y_lcu * orig_height / new_height;
|
||||
for (int x = 0; x < new_width; x++) {
|
||||
new_array[x + y_lcu * new_width] =
|
||||
CLIP(-51, 51, orig_roi[x + y_roi * new_width] + qp_delta);
|
||||
}
|
||||
|
||||
} else {
|
||||
// Otherwise, simply write qp_delta to the ROI array.
|
||||
new_array[y_lcu] = qp_delta;
|
||||
}
|
||||
}
|
||||
|
||||
// Update new values
|
||||
frame->roi.width = new_width;
|
||||
frame->roi.height = new_height;
|
||||
frame->roi.roi_array = new_array;
|
||||
FREE_POINTER(orig_roi);
|
||||
}
|
||||
|
||||
|
||||
static void next_roi_frame_from_file(uvg_picture *frame, FILE *file, enum uvg_roi_format format) {
|
||||
// The ROI description is as follows:
|
||||
// First number is width, second number is height,
|
||||
// then follows width * height number of dqp values.
|
||||
|
||||
// Rewind the (seekable) ROI file when end of file is reached.
|
||||
// Allows a single ROI frame to be used for a whole sequence
|
||||
// and looping with --loop-input. Skips possible whitespace.
|
||||
if (ftell(file) != -1L) {
|
||||
int c = fgetc(file);
|
||||
while (format == UVG_ROI_TXT && isspace(c)) c = fgetc(file);
|
||||
ungetc(c, file);
|
||||
if (c == EOF) rewind(file);
|
||||
}
|
||||
|
||||
int *width = &frame->roi.width;
|
||||
int *height = &frame->roi.height;
|
||||
|
||||
bool failed = false;
|
||||
|
||||
if (format == UVG_ROI_TXT) failed = !fscanf(file, "%d", width) || !fscanf(file, "%d", height);
|
||||
if (format == UVG_ROI_BIN) failed = fread(&frame->roi, 4, 2, file) != 2;
|
||||
|
||||
if (failed) {
|
||||
fprintf(stderr, "Failed to read ROI size.\n");
|
||||
fclose(file);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (*width <= 0 || *height <= 0) {
|
||||
fprintf(stderr, "Invalid ROI size: %dx%d.\n", *width, *height);
|
||||
fclose(file);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (*width > 10000 || *height > 10000) {
|
||||
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
|
||||
fclose(file);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
const unsigned size = (*width) * (*height);
|
||||
int8_t *dqp_array = calloc((size_t)size, sizeof(frame->roi.roi_array[0]));
|
||||
if (!dqp_array) {
|
||||
fprintf(stderr, "Failed to allocate memory for ROI table.\n");
|
||||
fclose(file);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
FREE_POINTER(frame->roi.roi_array);
|
||||
frame->roi.roi_array = dqp_array;
|
||||
|
||||
if (format == UVG_ROI_TXT) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
int number; // Need a pointer to int for fscanf
|
||||
if (fscanf(file, "%d", &number) != 1) {
|
||||
fprintf(stderr, "Reading ROI file failed.\n");
|
||||
fclose(file);
|
||||
assert(0);
|
||||
}
|
||||
dqp_array[i] = CLIP(-51, 51, number);
|
||||
}
|
||||
} else if (format == UVG_ROI_BIN) {
|
||||
if (fread(dqp_array, 1, size, file) != size) {
|
||||
fprintf(stderr, "Reading ROI file failed.\n");
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_picture* frame) {
|
||||
assert(state->type == ENCODER_STATE_TYPE_MAIN);
|
||||
|
||||
|
@ -1437,6 +1628,21 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
|
|||
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
||||
}
|
||||
|
||||
// ROI / delta QP maps
|
||||
if (frame->roi.roi_array && cfg->roi.file_path) {
|
||||
assert(0 && "Conflict: Other ROI data was supplied when a ROI file was specified.");
|
||||
}
|
||||
|
||||
// Read frame from the file. If no file is specified,
|
||||
// ROI data should be already set by the application.
|
||||
if (cfg->roi.file_path) {
|
||||
next_roi_frame_from_file(frame, state->encoder_control->roi_file, cfg->roi.format);
|
||||
}
|
||||
|
||||
if (cfg->erp_aqp) {
|
||||
init_erp_aqp_roi(state->encoder_control, state->tile->frame->source);
|
||||
}
|
||||
|
||||
// Variance adaptive quantization
|
||||
if (cfg->vaq) {
|
||||
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
|
||||
|
@ -1523,6 +1729,12 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
|
|||
}
|
||||
// Variance adaptive quantization - END
|
||||
|
||||
if (cfg->target_bitrate > 0 || frame->roi.roi_array || cfg->set_qp_in_cu || cfg->vaq) {
|
||||
state->frame->max_qp_delta_depth = 0;
|
||||
} else {
|
||||
state->frame->max_qp_delta_depth = -1;
|
||||
}
|
||||
|
||||
// Use this flag to handle closed gop irap picture selection.
|
||||
// If set to true, irap is already set and we avoid
|
||||
// setting it based on the intra period
|
||||
|
@ -1689,6 +1901,7 @@ void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame)
|
|||
|
||||
|
||||
encoder_state_init_new_frame(state, frame);
|
||||
if(state->encoder_control->cfg.jccr) set_joint_cb_cr_modes(state, frame);
|
||||
|
||||
// Create a separate job for ALF done after everything else, and only then do final bitstream writing (for ALF parameters)
|
||||
if (state->encoder_control->cfg.alf_type && state->encoder_control->cfg.wpp) {
|
||||
|
@ -1834,10 +2047,9 @@ lcu_stats_t* uvg_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
|
|||
|
||||
int uvg_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
|
||||
{
|
||||
const encoder_control_t *ctrl = state->encoder_control;
|
||||
const cu_array_t *cua = state->tile->frame->cu_array;
|
||||
// Quantization group width
|
||||
const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->depth);
|
||||
const int qg_width = LCU_WIDTH >> MIN(state->frame->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->depth);
|
||||
|
||||
// Coordinates of the top-left corner of the quantization group
|
||||
const int x_qg = x & ~(qg_width - 1);
|
||||
|
|
|
@ -179,6 +179,8 @@ typedef struct encoder_state_config_frame_t {
|
|||
*/
|
||||
double *aq_offsets;
|
||||
|
||||
int8_t max_qp_delta_depth;
|
||||
|
||||
/**
|
||||
* \brief Whether next NAL is the first NAL in the access unit.
|
||||
*/
|
||||
|
@ -193,6 +195,7 @@ typedef struct encoder_state_config_frame_t {
|
|||
|
||||
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
|
||||
uint8_t* hmvp_size; //!< \brief HMVP LUT size
|
||||
bool jccr_sign;
|
||||
|
||||
} encoder_state_config_frame_t;
|
||||
|
||||
|
@ -320,6 +323,7 @@ typedef struct encoder_state_t {
|
|||
|
||||
bitstream_t stream;
|
||||
cabac_data_t cabac;
|
||||
cabac_data_t search_cabac;
|
||||
|
||||
uint32_t stats_bitstream_length; //Bitstream length written in bytes
|
||||
|
||||
|
@ -402,10 +406,10 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
|
|||
*/
|
||||
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth)
|
||||
{
|
||||
if (state->encoder_control->max_qp_delta_depth < 0) return false;
|
||||
if (state->frame->max_qp_delta_depth < 0) return false;
|
||||
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
const int qg_width = LCU_WIDTH >> state->encoder_control->max_qp_delta_depth;
|
||||
const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth;
|
||||
const int right = x + cu_width;
|
||||
const int bottom = y + cu_width;
|
||||
return (right % qg_width == 0 || right >= state->tile->frame->width) &&
|
||||
|
|
|
@ -40,7 +40,7 @@ static uint16_t to_q88(float f)
|
|||
return (uint16_t)(f * 256.0f + 0.5f);
|
||||
}
|
||||
|
||||
static uint64_t to_4xq88(const float f[4])
|
||||
static uint64_t to_4xq88(const double f[4])
|
||||
{
|
||||
int i;
|
||||
uint64_t result = 0;
|
||||
|
@ -58,9 +58,9 @@ int uvg_fast_coeff_table_parse(fast_coeff_table_t *fast_coeff_table, FILE *fast_
|
|||
uint64_t *wts_by_qp = fast_coeff_table->wts_by_qp;
|
||||
|
||||
for (i = 0; i < MAX_FAST_COEFF_COST_QP; i++) {
|
||||
float curr_wts[4];
|
||||
double curr_wts[4];
|
||||
|
||||
if (fscanf(fast_coeff_table_f, "%f %f %f %f\n", curr_wts + 0,
|
||||
if (fscanf(fast_coeff_table_f, "%lf %lf %lf %lf\n", curr_wts + 0,
|
||||
curr_wts + 1,
|
||||
curr_wts + 2,
|
||||
curr_wts + 3) != 4) {
|
||||
|
|
|
@ -45,7 +45,7 @@ typedef struct {
|
|||
|
||||
// Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from
|
||||
// 0 to MAX_FAST_COEFF_COST_QP
|
||||
static const float default_fast_coeff_cost_wts[][4] = {
|
||||
static const double default_fast_coeff_cost_wts[][4] = {
|
||||
// Just extend it by stretching the first actual values..
|
||||
{0.164240f, 4.161530f, 3.509033f, 6.928047f},
|
||||
{0.164240f, 4.161530f, 3.509033f, 6.928047f},
|
||||
|
|
|
@ -339,7 +339,7 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir)
|
|||
|
||||
static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir)
|
||||
{
|
||||
if (state->encoder_control->max_qp_delta_depth < 0) {
|
||||
if (state->frame->max_qp_delta_depth < 0) {
|
||||
return state->qp;
|
||||
}
|
||||
|
||||
|
|
|
@ -106,6 +106,10 @@ uvg_picture * uvg_image_alloc(enum uvg_chroma_format chroma_format, const int32_
|
|||
|
||||
im->interlacing = UVG_INTERLACING_NONE;
|
||||
|
||||
im->roi.roi_array = NULL;
|
||||
im->roi.width = 0;
|
||||
im->roi.height = 0;
|
||||
|
||||
return im;
|
||||
}
|
||||
|
||||
|
@ -132,6 +136,7 @@ void uvg_image_free(uvg_picture *const im)
|
|||
uvg_image_free(im->base_image);
|
||||
} else {
|
||||
free(im->fulldata_buf);
|
||||
if (im->roi.roi_array) FREE_POINTER(im->roi.roi_array);
|
||||
}
|
||||
|
||||
// Make sure freed data won't be used.
|
||||
|
@ -192,6 +197,8 @@ uvg_picture *uvg_image_make_subimage(uvg_picture *const orig_image,
|
|||
im->pts = 0;
|
||||
im->dts = 0;
|
||||
|
||||
im->roi = orig_image->roi;
|
||||
|
||||
return im;
|
||||
}
|
||||
|
||||
|
|
14
src/inter.c
14
src/inter.c
|
@ -624,7 +624,9 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
|
|||
int i_pu)
|
||||
|
||||
{
|
||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
||||
const int x_scu = SUB_SCU(x);
|
||||
const int y_scu = SUB_SCU(y);
|
||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
|
||||
const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
|
||||
const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
|
||||
const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
|
||||
|
@ -673,6 +675,12 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
|
|||
NULL,
|
||||
predict_luma, predict_chroma);
|
||||
}
|
||||
|
||||
if (predict_chroma && state->encoder_control->cfg.jccr) {
|
||||
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
|
||||
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1290,7 +1298,7 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
|
|||
int32_t width,
|
||||
int32_t height,
|
||||
const merge_candidates_t *merge_cand,
|
||||
const cu_info_t *cur_cu,
|
||||
const cu_info_t * const cur_cu,
|
||||
int8_t reflist,
|
||||
mv_t mv_cand[2][2])
|
||||
{
|
||||
|
@ -1396,7 +1404,7 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
|
|||
int32_t width,
|
||||
int32_t height,
|
||||
mv_t mv_cand[2][2],
|
||||
cu_info_t* cur_cu,
|
||||
const cu_info_t * const cur_cu,
|
||||
lcu_t *lcu,
|
||||
int8_t reflist)
|
||||
{
|
||||
|
|
|
@ -96,7 +96,7 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
|
|||
int32_t width,
|
||||
int32_t height,
|
||||
mv_t mv_cand[2][2],
|
||||
cu_info_t* cur_cu,
|
||||
const cu_info_t* cur_cu,
|
||||
lcu_t *lcu,
|
||||
int8_t reflist);
|
||||
|
||||
|
|
189
src/intra.c
189
src/intra.c
|
@ -82,6 +82,17 @@ static const uint8_t num_ref_pixels_left[16][16] = {
|
|||
{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
|
||||
};
|
||||
|
||||
|
||||
static void mip_predict(
|
||||
const encoder_state_t* const state,
|
||||
const uvg_intra_references* const refs,
|
||||
const uint16_t pred_block_width,
|
||||
const uint16_t pred_block_height,
|
||||
uvg_pixel* dst,
|
||||
const int mip_mode,
|
||||
const bool mip_transp);
|
||||
|
||||
|
||||
int8_t uvg_intra_get_dir_luma_predictor(
|
||||
const uint32_t x,
|
||||
const uint32_t y,
|
||||
|
@ -452,7 +463,7 @@ static void get_cclm_parameters(
|
|||
}
|
||||
}
|
||||
|
||||
static void linear_transform_cclm(cclm_parameters_t* cclm_params, uvg_pixel * src, uvg_pixel * dst, int stride, int height) {
|
||||
static void linear_transform_cclm(const cclm_parameters_t* cclm_params, uvg_pixel * src, uvg_pixel * dst, int stride, int height) {
|
||||
int scale = cclm_params->a;
|
||||
int shift = cclm_params->shift;
|
||||
int offset = cclm_params->b;
|
||||
|
@ -468,7 +479,7 @@ static void linear_transform_cclm(cclm_parameters_t* cclm_params, uvg_pixel * sr
|
|||
}
|
||||
|
||||
|
||||
void uvg_predict_cclm(
|
||||
static void predict_cclm(
|
||||
encoder_state_t const* const state,
|
||||
const color_t color,
|
||||
const int8_t width,
|
||||
|
@ -477,7 +488,7 @@ void uvg_predict_cclm(
|
|||
const int16_t y0,
|
||||
const int16_t stride,
|
||||
const int8_t mode,
|
||||
lcu_t* const lcu,
|
||||
const lcu_t* const lcu,
|
||||
uvg_intra_references* chroma_ref,
|
||||
uvg_pixel* dst,
|
||||
cclm_parameters_t* cclm_params
|
||||
|
@ -498,6 +509,7 @@ void uvg_predict_cclm(
|
|||
|
||||
|
||||
uvg_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH;
|
||||
const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
|
||||
|
||||
// Essentially what this does is that it uses 6-tap filtering to downsample
|
||||
// the luma intra references down to match the resolution of the chroma channel.
|
||||
|
@ -508,12 +520,12 @@ void uvg_predict_cclm(
|
|||
if (y0) {
|
||||
for (; available_above_right < width / 2; available_above_right++) {
|
||||
int x_extension = x_scu + width * 2 + 4 * available_above_right;
|
||||
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
|
||||
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
|
||||
if (x_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
|
||||
}
|
||||
if(y_scu == 0) {
|
||||
if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4);
|
||||
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride / 2)], sizeof(uvg_pixel) * (width + available_above_right * 2));
|
||||
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride2 / 2)], sizeof(uvg_pixel) * (width + available_above_right * 2));
|
||||
}
|
||||
else {
|
||||
for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
|
||||
|
@ -533,16 +545,16 @@ void uvg_predict_cclm(
|
|||
if(x0) {
|
||||
for (; available_left_below < height / 2; available_left_below++) {
|
||||
int y_extension = y_scu + height * 2 + 4 * available_left_below;
|
||||
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension);
|
||||
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension);
|
||||
if (y_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
|
||||
if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break;
|
||||
}
|
||||
for(int i = 0; i < height + available_left_below * 2; i++) {
|
||||
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride/2) + x0 / 2 - 1];
|
||||
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride2/2) + x0 / 2 - 1];
|
||||
}
|
||||
}
|
||||
|
||||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride) / 4], sampled_luma, width, height, stride / 2, width);
|
||||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride2) / 4], sampled_luma, width, height, stride2 / 2, width);
|
||||
|
||||
int16_t a, b, shift;
|
||||
get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift);
|
||||
|
@ -727,12 +739,17 @@ void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int*
|
|||
}
|
||||
|
||||
|
||||
|
||||
/** \brief Matrix weighted intra prediction.
|
||||
*/
|
||||
void uvg_mip_predict(encoder_state_t const* const state, uvg_intra_references* const refs,
|
||||
const uint16_t pred_block_width, const uint16_t pred_block_height,
|
||||
uvg_pixel* dst,
|
||||
const int mip_mode, const bool mip_transp)
|
||||
static void mip_predict(
|
||||
const encoder_state_t* const state,
|
||||
const uvg_intra_references* const refs,
|
||||
const uint16_t pred_block_width,
|
||||
const uint16_t pred_block_height,
|
||||
uvg_pixel* dst,
|
||||
const int mip_mode,
|
||||
const bool mip_transp)
|
||||
{
|
||||
// MIP prediction uses int values instead of uvg_pixel as some temp values may be negative
|
||||
|
||||
|
@ -875,14 +892,13 @@ void uvg_mip_predict(encoder_state_t const* const state, uvg_intra_references* c
|
|||
}
|
||||
|
||||
|
||||
void uvg_intra_predict(
|
||||
encoder_state_t *const state,
|
||||
static void intra_predict_regular(
|
||||
const encoder_state_t* const state,
|
||||
uvg_intra_references *refs,
|
||||
int_fast8_t log2_width,
|
||||
int_fast8_t mode,
|
||||
color_t color,
|
||||
uvg_pixel *dst,
|
||||
bool filter_boundary,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
|
@ -1350,18 +1366,66 @@ void uvg_intra_build_reference(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void uvg_intra_predict(
|
||||
const encoder_state_t* const state,
|
||||
uvg_intra_references* const refs,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const color_t color,
|
||||
uvg_pixel* dst,
|
||||
const intra_search_data_t* data,
|
||||
const lcu_t* lcu
|
||||
)
|
||||
{
|
||||
const int stride = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
|
||||
// TODO: what is this used for?
|
||||
// const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
|
||||
bool use_mip = false;
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int x = cu_loc->x;
|
||||
const int y = cu_loc->y;
|
||||
int8_t intra_mode = color == COLOR_Y ? data->pred_cu.intra.mode : data->pred_cu.intra.mode_chroma;
|
||||
if (data->pred_cu.intra.mip_flag) {
|
||||
if (color == COLOR_Y) {
|
||||
use_mip = true;
|
||||
}
|
||||
else {
|
||||
use_mip = state->encoder_control->chroma_format == UVG_CSP_444;
|
||||
intra_mode = use_mip ? intra_mode : 0;
|
||||
}
|
||||
}
|
||||
if (intra_mode < 68) {
|
||||
if (use_mip) {
|
||||
assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
|
||||
mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed);
|
||||
}
|
||||
else {
|
||||
intra_predict_regular(state, refs, uvg_g_convert_to_bit[width] + 2, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx);
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width);
|
||||
if (data->pred_cu.depth != data->pred_cu.tr_depth || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
|
||||
predict_cclm(
|
||||
state, color, width, width, x, y, stride, intra_mode, lcu, refs, dst,
|
||||
(cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1]);
|
||||
}
|
||||
else {
|
||||
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void intra_recon_tb_leaf(
|
||||
encoder_state_t *const state,
|
||||
encoder_state_t* const state,
|
||||
int x,
|
||||
int y,
|
||||
int depth,
|
||||
int8_t intra_mode,
|
||||
cclm_parameters_t *cclm_params,
|
||||
lcu_t *lcu,
|
||||
color_t color,
|
||||
uint8_t multi_ref_idx,
|
||||
bool mip_flag,
|
||||
bool mip_transp)
|
||||
const intra_search_data_t* search_data)
|
||||
{
|
||||
const uvg_config *cfg = &state->encoder_control->cfg;
|
||||
const int shift = color == COLOR_Y ? 0 : 1;
|
||||
|
@ -1383,7 +1447,7 @@ static void intra_recon_tb_leaf(
|
|||
int x_scu = SUB_SCU(x);
|
||||
int y_scu = SUB_SCU(y);
|
||||
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
|
||||
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
|
||||
uint8_t multi_ref_index = color == COLOR_Y ? search_data->pred_cu.intra.multi_ref_idx: 0;
|
||||
|
||||
uvg_intra_references refs;
|
||||
// Extra reference lines for use with MRL. Extra lines needed only for left edge.
|
||||
|
@ -1406,42 +1470,14 @@ static void intra_recon_tb_leaf(
|
|||
uvg_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index);
|
||||
|
||||
uvg_pixel pred[32 * 32];
|
||||
int stride = state->tile->frame->source->stride;
|
||||
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
|
||||
bool use_mip = false;
|
||||
if (mip_flag) {
|
||||
if (color == COLOR_Y) {
|
||||
use_mip = true;
|
||||
} else {
|
||||
// MIP can be used for chroma if the chroma scheme is 444
|
||||
if (state->encoder_control->chroma_format == UVG_CSP_444) {
|
||||
use_mip = true;
|
||||
} else {
|
||||
// If MIP cannot be used for chroma, set mode to planar
|
||||
intra_mode = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(intra_mode < 68) {
|
||||
if (use_mip) {
|
||||
assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
|
||||
uvg_mip_predict(state, &refs, width, height, pred, intra_mode, mip_transp);
|
||||
}
|
||||
else {
|
||||
uvg_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index);
|
||||
}
|
||||
} else {
|
||||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
|
||||
if(cclm_params == NULL) {
|
||||
cclm_parameters_t temp_params;
|
||||
uvg_predict_cclm(
|
||||
state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params);
|
||||
}
|
||||
else {
|
||||
linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width);
|
||||
}
|
||||
}
|
||||
cu_loc_t loc = {
|
||||
x, y,
|
||||
width, height,
|
||||
width, height,
|
||||
};
|
||||
|
||||
uvg_intra_predict(state, &refs, &loc, color, pred, search_data, lcu);
|
||||
|
||||
const int index = lcu_px.x + lcu_px.y * lcu_width;
|
||||
uvg_pixel *block = NULL;
|
||||
|
@ -1483,17 +1519,12 @@ static void intra_recon_tb_leaf(
|
|||
* \param lcu containing LCU
|
||||
*/
|
||||
void uvg_intra_recon_cu(
|
||||
encoder_state_t *const state,
|
||||
encoder_state_t* const state,
|
||||
int x,
|
||||
int y,
|
||||
int depth,
|
||||
int8_t mode_luma,
|
||||
int8_t mode_chroma,
|
||||
intra_search_data_t* search_data,
|
||||
cu_info_t *cur_cu,
|
||||
cclm_parameters_t *cclm_params,
|
||||
uint8_t multi_ref_idx,
|
||||
bool mip_flag,
|
||||
bool mip_transp,
|
||||
lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
|
@ -1501,12 +1532,16 @@ void uvg_intra_recon_cu(
|
|||
if (cur_cu == NULL) {
|
||||
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
}
|
||||
uint8_t multi_ref_index = multi_ref_idx;
|
||||
bool use_mip = mip_flag;
|
||||
bool mip_transposed = mip_transp;
|
||||
const int8_t mode_luma = search_data->pred_cu.intra.mode;
|
||||
const int8_t mode_chroma= search_data->pred_cu.intra.mode_chroma;
|
||||
|
||||
if(mode_chroma != -1 && mode_luma == -1) {
|
||||
x &= ~7;
|
||||
y &= ~7;
|
||||
}
|
||||
|
||||
if (mode_luma != -1 && mode_chroma != -1) {
|
||||
if (use_mip) {
|
||||
if (search_data->pred_cu.intra.mip_flag) {
|
||||
assert(mode_luma == mode_chroma && "Chroma mode must be derived from luma mode if block uses MIP.");
|
||||
}
|
||||
}
|
||||
|
@ -1527,10 +1562,10 @@ void uvg_intra_recon_cu(
|
|||
const int32_t x2 = x + offset;
|
||||
const int32_t y2 = y + offset;
|
||||
|
||||
uvg_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
|
||||
uvg_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
|
||||
uvg_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
|
||||
uvg_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
|
||||
uvg_intra_recon_cu(state, x, y, depth + 1, search_data, NULL, lcu);
|
||||
uvg_intra_recon_cu(state, x2, y, depth + 1, search_data, NULL, lcu);
|
||||
uvg_intra_recon_cu(state, x, y2, depth + 1, search_data, NULL, lcu);
|
||||
uvg_intra_recon_cu(state, x2, y2, depth + 1, search_data, NULL, lcu);
|
||||
|
||||
// Propagate coded block flags from child CUs to parent CU.
|
||||
uint16_t child_cbfs[3] = {
|
||||
|
@ -1552,13 +1587,15 @@ void uvg_intra_recon_cu(
|
|||
|
||||
// Process a leaf TU.
|
||||
if (has_luma) {
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index, use_mip, mip_transposed);
|
||||
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_Y, search_data);
|
||||
}
|
||||
if (has_chroma) {
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0, use_mip, mip_transposed);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0, use_mip, mip_transposed);
|
||||
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_U, search_data);
|
||||
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_V, search_data);
|
||||
}
|
||||
|
||||
uvg_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);
|
||||
uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3),
|
||||
search_data->pred_cu.joint_cb_cr != 4 && state->encoder_control->cfg.jccr && (x % 8 == 0 && y % 8 == 0),
|
||||
x, y, depth, cur_cu, lcu, false);
|
||||
}
|
||||
}
|
||||
|
|
63
src/intra.h
63
src/intra.h
|
@ -63,6 +63,18 @@ typedef struct
|
|||
int16_t b;
|
||||
} cclm_parameters_t;
|
||||
|
||||
typedef struct {
|
||||
cu_info_t pred_cu;
|
||||
cclm_parameters_t cclm_parameters[2];
|
||||
double cost;
|
||||
double bits;
|
||||
double coeff_bits;
|
||||
double distortion;
|
||||
} intra_search_data_t ;
|
||||
|
||||
|
||||
#define UVG_NUM_INTRA_MODES 67
|
||||
|
||||
/**
|
||||
* \brief Function for deriving intra luma predictions
|
||||
* \param x x-coordinate of the PU in pixels
|
||||
|
@ -114,53 +126,22 @@ void uvg_intra_build_reference(
|
|||
* \param filter_boundary Whether to filter the boundary on modes 10 and 26.
|
||||
*/
|
||||
void uvg_intra_predict(
|
||||
encoder_state_t *const state,
|
||||
uvg_intra_references *refs,
|
||||
int_fast8_t log2_width,
|
||||
int_fast8_t mode,
|
||||
color_t color,
|
||||
uvg_pixel *dst,
|
||||
bool filter_boundary,
|
||||
const uint8_t multi_ref_idx);
|
||||
const encoder_state_t* const state,
|
||||
uvg_intra_references* const refs,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const color_t color,
|
||||
uvg_pixel* dst,
|
||||
const intra_search_data_t* data,
|
||||
const lcu_t* lcu
|
||||
);
|
||||
|
||||
void uvg_intra_recon_cu(
|
||||
encoder_state_t *const state,
|
||||
encoder_state_t* const state,
|
||||
int x,
|
||||
int y,
|
||||
int depth,
|
||||
int8_t mode_luma,
|
||||
int8_t mode_chroma,
|
||||
intra_search_data_t* search_data,
|
||||
cu_info_t *cur_cu,
|
||||
cclm_parameters_t* cclm_params,
|
||||
uint8_t multi_ref_idx,
|
||||
bool mip_flag,
|
||||
bool mip_transp,
|
||||
lcu_t *lcu);
|
||||
|
||||
|
||||
void uvg_predict_cclm(
|
||||
encoder_state_t const* const state,
|
||||
const color_t color,
|
||||
const int8_t width,
|
||||
const int8_t height,
|
||||
const int16_t x0,
|
||||
const int16_t y0,
|
||||
const int16_t stride,
|
||||
const int8_t mode,
|
||||
lcu_t* const lcu,
|
||||
uvg_intra_references* chroma_ref,
|
||||
uvg_pixel* dst,
|
||||
cclm_parameters_t* cclm_params
|
||||
);
|
||||
|
||||
int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a);
|
||||
|
||||
void uvg_mip_predict(
|
||||
encoder_state_t const * const state,
|
||||
uvg_intra_references * refs,
|
||||
const uint16_t width,
|
||||
const uint16_t height,
|
||||
uvg_pixel* dst,
|
||||
const int mip_mode,
|
||||
const bool mip_transp
|
||||
);
|
|
@ -1088,17 +1088,20 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
const encoder_control_t * const ctrl = state->encoder_control;
|
||||
lcu_stats_t *lcu = uvg_get_lcu_stats(state, pos.x, pos.y);
|
||||
|
||||
if (ctrl->cfg.roi.dqps != NULL) {
|
||||
vector2d_t lcu = {
|
||||
if (state->tile->frame->source->roi.roi_array) {
|
||||
vector2d_t lcu_vec = {
|
||||
pos.x + state->tile->lcu_offset_x,
|
||||
pos.y + state->tile->lcu_offset_y
|
||||
};
|
||||
vector2d_t roi = {
|
||||
lcu.x * ctrl->cfg.roi.width / ctrl->in.width_in_lcu,
|
||||
lcu.y * ctrl->cfg.roi.height / ctrl->in.height_in_lcu
|
||||
lcu_vec.x * state->tile->frame->source->roi.width / ctrl->in.width_in_lcu,
|
||||
lcu_vec.y * state->tile->frame->source->roi.height / ctrl->in.height_in_lcu
|
||||
};
|
||||
int roi_index = roi.x + roi.y * ctrl->cfg.roi.width;
|
||||
int dqp = ctrl->cfg.roi.dqps[roi_index];
|
||||
int roi_index = roi.x + roi.y * state->tile->frame->source->roi.width;
|
||||
int dqp = state->tile->frame->source->roi.roi_array[roi_index];
|
||||
if(dqp != 0) {
|
||||
pos.x = 0;
|
||||
}
|
||||
state->qp = CLIP_TO_QP(state->frame->QP + dqp);
|
||||
state->lambda = qp_to_lambda(state, state->qp);
|
||||
state->lambda_sqrt = sqrt(state->lambda);
|
||||
|
|
76
src/rdo.c
76
src/rdo.c
|
@ -315,12 +315,12 @@ static INLINE uint32_t get_coeff_cabac_cost(
|
|||
// Take a copy of the CABAC so that we don't overwrite the contexts when
|
||||
// counting the bits.
|
||||
cabac_data_t cabac_copy;
|
||||
memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy));
|
||||
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
|
||||
|
||||
// Clear bytes and bits and set mode to "count"
|
||||
cabac_copy.only_count = 1;
|
||||
cabac_copy.num_buffered_bytes = 0;
|
||||
cabac_copy.bits_left = 23;
|
||||
int num_buffered_bytes = cabac_copy.num_buffered_bytes;
|
||||
int bits_left = cabac_copy.bits_left;
|
||||
|
||||
// Execute the coding function.
|
||||
// It is safe to drop the const modifier since state won't be modified
|
||||
|
@ -343,8 +343,10 @@ static INLINE uint32_t get_coeff_cabac_cost(
|
|||
type,
|
||||
scan_mode);
|
||||
}
|
||||
|
||||
return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
|
||||
if(cabac_copy.update) {
|
||||
memcpy((cabac_data_t *)&state->search_cabac, &cabac_copy, sizeof(cabac_copy));
|
||||
}
|
||||
return (bits_left - cabac_copy.bits_left) + ((cabac_copy.num_buffered_bytes - num_buffered_bytes) << 3);
|
||||
}
|
||||
|
||||
static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t ccc)
|
||||
|
@ -1741,37 +1743,33 @@ void uvg_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
|
|||
/**
|
||||
* Calculate cost of actual motion vectors using CABAC coding
|
||||
*/
|
||||
uint32_t uvg_get_mvd_coding_cost_cabac(const encoder_state_t *state,
|
||||
const cabac_data_t* cabac,
|
||||
const int32_t mvd_hor,
|
||||
const int32_t mvd_ver)
|
||||
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
|
||||
const cabac_data_t* cabac,
|
||||
const int32_t mvd_hor,
|
||||
const int32_t mvd_ver)
|
||||
{
|
||||
cabac_data_t cabac_copy = *cabac;
|
||||
cabac_copy.only_count = 1;
|
||||
|
||||
double bits = 0;
|
||||
// It is safe to drop const here because cabac->only_count is set.
|
||||
uvg_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver);
|
||||
uvg_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver, &bits);
|
||||
|
||||
uint32_t bitcost =
|
||||
((23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3)) -
|
||||
((23 - cabac->bits_left) + (cabac->num_buffered_bytes << 3));
|
||||
|
||||
return bitcost;
|
||||
return bits;
|
||||
}
|
||||
|
||||
/** MVD cost calculation with CABAC
|
||||
* \returns int
|
||||
* Calculates Motion Vector cost and related costs using CABAC coding
|
||||
*/
|
||||
uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
||||
int x,
|
||||
int y,
|
||||
int mv_shift,
|
||||
mv_t mv_cand[2][2],
|
||||
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
||||
int16_t num_cand,
|
||||
int32_t ref_idx,
|
||||
uint32_t *bitcost)
|
||||
double uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
||||
int x,
|
||||
int y,
|
||||
int mv_shift,
|
||||
mv_t mv_cand[2][2],
|
||||
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
||||
int16_t num_cand,
|
||||
int32_t ref_idx,
|
||||
double* bitcost)
|
||||
{
|
||||
cabac_data_t state_cabac_copy;
|
||||
cabac_data_t* cabac;
|
||||
|
@ -1798,14 +1796,13 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
}
|
||||
|
||||
// Store cabac state and contexts
|
||||
memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t));
|
||||
memcpy(&state_cabac_copy, &state->search_cabac, sizeof(cabac_data_t));
|
||||
|
||||
// Clear bytes and bits and set mode to "count"
|
||||
state_cabac_copy.only_count = 1;
|
||||
state_cabac_copy.num_buffered_bytes = 0;
|
||||
state_cabac_copy.bits_left = 23;
|
||||
|
||||
cabac = &state_cabac_copy;
|
||||
double bits = 0;
|
||||
|
||||
if (!merged) {
|
||||
vector2d_t mvd1 = {
|
||||
|
@ -1820,8 +1817,8 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1);
|
||||
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
|
||||
|
||||
uint32_t cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
|
||||
uint32_t cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
|
||||
double cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
|
||||
double cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
|
||||
|
||||
// Select candidate 1 if it has lower cost
|
||||
if (cand2_cost < cand1_cost) {
|
||||
|
@ -1834,7 +1831,7 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
|
||||
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
|
||||
|
||||
CABAC_BIN(cabac, merged, "MergeFlag");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), merged, bits, "MergeFlag");
|
||||
num_cand = state->encoder_control->cfg.max_merge;
|
||||
if (merged) {
|
||||
if (num_cand > 1) {
|
||||
|
@ -1842,10 +1839,10 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
for (ui = 0; ui < num_cand - 1; ui++) {
|
||||
int32_t symbol = (ui != merge_idx);
|
||||
if (ui == 0) {
|
||||
cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
|
||||
CABAC_BIN(cabac, symbol, "MergeIndex");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
|
||||
} else {
|
||||
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
|
||||
bits += 1;
|
||||
}
|
||||
if (symbol == 0) break;
|
||||
}
|
||||
|
@ -1869,23 +1866,22 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
// parseRefFrmIdx
|
||||
int32_t ref_frame = ref_idx;
|
||||
|
||||
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
|
||||
CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[0]), (ref_frame != 0), bits, "ref_idx_lX");
|
||||
|
||||
if (ref_frame > 0) {
|
||||
int32_t i;
|
||||
int32_t ref_num = ref_list[ref_list_idx] - 2;
|
||||
|
||||
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
|
||||
ref_frame--;
|
||||
|
||||
for (i = 0; i < ref_num; ++i) {
|
||||
const uint32_t symbol = (i == ref_frame) ? 0 : 1;
|
||||
|
||||
if (i == 0) {
|
||||
CABAC_BIN(cabac, symbol, "ref_idx_lX");
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[1]), symbol, bits, "ref_idx_lX");
|
||||
} else {
|
||||
CABAC_BIN_EP(cabac, symbol, "ref_idx_lX");
|
||||
bits += 1;
|
||||
}
|
||||
if (symbol == 0) break;
|
||||
}
|
||||
|
@ -1895,7 +1891,7 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
// ToDo: Bidir vector support
|
||||
if (!(state->frame->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) {
|
||||
// It is safe to drop const here because cabac->only_count is set.
|
||||
uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y);
|
||||
uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y, &bits);
|
||||
}
|
||||
|
||||
// Signal which candidate MV to use
|
||||
|
@ -1905,10 +1901,10 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
}
|
||||
}
|
||||
|
||||
*bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
|
||||
*bitcost = bits;
|
||||
|
||||
// Store bitcost before restoring cabac
|
||||
return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
|
||||
return *bitcost * state->lambda_sqrt;
|
||||
}
|
||||
|
||||
void uvg_close_rdcost_outfiles(void)
|
||||
|
|
11
src/rdo.h
11
src/rdo.h
|
@ -77,10 +77,10 @@ uint32_t uvg_get_coded_level(encoder_state_t * state, double* coded_cost, double
|
|||
|
||||
uvg_mvd_cost_func uvg_calc_mvd_cost_cabac;
|
||||
|
||||
uint32_t uvg_get_mvd_coding_cost_cabac(const encoder_state_t *state,
|
||||
const cabac_data_t* cabac,
|
||||
int32_t mvd_hor,
|
||||
int32_t mvd_ver);
|
||||
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
|
||||
const cabac_data_t* cabac,
|
||||
int32_t mvd_hor,
|
||||
int32_t mvd_ver);
|
||||
|
||||
// Number of fixed point fractional bits used in the fractional bit table.
|
||||
#define CTX_FRAC_BITS 15
|
||||
|
@ -90,8 +90,5 @@ uint32_t uvg_get_mvd_coding_cost_cabac(const encoder_state_t *state,
|
|||
extern const uint32_t uvg_entropy_bits[512];
|
||||
#define CTX_ENTROPY_BITS(ctx, val) uvg_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
|
||||
|
||||
// Floating point fractional bits, derived from uvg_entropy_bits
|
||||
extern const float uvg_f_entropy_bits[512];
|
||||
#define CTX_ENTROPY_FBITS(ctx, val) uvg_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
|
||||
|
||||
#endif
|
||||
|
|
59
src/sao.c
59
src/sao.c
|
@ -49,63 +49,64 @@ static void init_sao_info(sao_info_t *sao) {
|
|||
}
|
||||
|
||||
|
||||
static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
|
||||
static double sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
|
||||
{
|
||||
float mode_bits = 0.0;
|
||||
const cabac_data_t * const cabac = &state->cabac;
|
||||
const cabac_ctx_t *ctx = NULL;
|
||||
double mode_bits = 0.0;
|
||||
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t *ctx = NULL;
|
||||
// FL coded merges.
|
||||
if (sao_left != NULL) {
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
|
||||
}
|
||||
if (sao_top != NULL) {
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
|
||||
}
|
||||
|
||||
// TR coded type_idx_, none = 0
|
||||
ctx = &(cabac->ctx.sao_type_idx_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_type");
|
||||
|
||||
return mode_bits;
|
||||
}
|
||||
|
||||
static float sao_mode_bits_merge(const encoder_state_t * const state,
|
||||
static double sao_mode_bits_merge(const encoder_state_t * const state,
|
||||
int8_t merge_cand) {
|
||||
float mode_bits = 0.0;
|
||||
const cabac_data_t * const cabac = &state->cabac;
|
||||
const cabac_ctx_t *ctx = NULL;
|
||||
double mode_bits = 0.0;
|
||||
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t *ctx = NULL;
|
||||
// FL coded merges.
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 1);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, merge_cand == 1, mode_bits, "sao_merge_flag");
|
||||
if (merge_cand == 1) return mode_bits;
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 2);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, merge_cand == 2, mode_bits, "sao_merge_flag");
|
||||
return mode_bits;
|
||||
}
|
||||
|
||||
|
||||
static float sao_mode_bits_edge(const encoder_state_t * const state,
|
||||
static double sao_mode_bits_edge(const encoder_state_t * const state,
|
||||
int edge_class, int offsets[NUM_SAO_EDGE_CATEGORIES],
|
||||
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
|
||||
{
|
||||
float mode_bits = 0.0;
|
||||
const cabac_data_t * const cabac = &state->cabac;
|
||||
const cabac_ctx_t *ctx = NULL;
|
||||
double mode_bits = 0.0;
|
||||
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t *ctx = NULL;
|
||||
// FL coded merges.
|
||||
if (sao_left != NULL) {
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
|
||||
}
|
||||
if (sao_top != NULL) {
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
|
||||
}
|
||||
|
||||
// TR coded type_idx_, edge = 2 = cMax
|
||||
ctx = &(cabac->ctx.sao_type_idx_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 1, mode_bits, "sao_type");
|
||||
mode_bits += 1.0;
|
||||
|
||||
// TR coded offsets.
|
||||
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++) {
|
||||
|
@ -126,26 +127,27 @@ static float sao_mode_bits_edge(const encoder_state_t * const state,
|
|||
}
|
||||
|
||||
|
||||
static float sao_mode_bits_band(const encoder_state_t * const state,
|
||||
static double sao_mode_bits_band(const encoder_state_t * const state,
|
||||
int band_position[2], int offsets[10],
|
||||
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
|
||||
{
|
||||
float mode_bits = 0.0;
|
||||
const cabac_data_t * const cabac = &state->cabac;
|
||||
const cabac_ctx_t *ctx = NULL;
|
||||
double mode_bits = 0.0;
|
||||
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t *ctx = NULL;
|
||||
// FL coded merges.
|
||||
if (sao_left != NULL) {
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
|
||||
}
|
||||
if (sao_top != NULL) {
|
||||
ctx = &(cabac->ctx.sao_merge_flag_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
|
||||
}
|
||||
|
||||
// TR coded sao_type_idx_, band = 1
|
||||
ctx = &(cabac->ctx.sao_type_idx_model);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, 1, mode_bits, "sao_type");
|
||||
mode_bits += 1.0;
|
||||
|
||||
// TR coded offsets and possible FL coded offset signs.
|
||||
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++)
|
||||
|
@ -552,7 +554,8 @@ static void sao_search_best_mode(const encoder_state_t * const state, const uvg_
|
|||
// Choose between SAO and doing nothing, taking into account the
|
||||
// rate-distortion cost of coding do nothing.
|
||||
{
|
||||
int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->lambda + 0.5);
|
||||
float mode_bits_none = sao_mode_bits_none(state, sao_top, sao_left);
|
||||
int cost_of_nothing = (int)(mode_bits_none * state->lambda + 0.5);
|
||||
if (sao_out->ddistortion >= cost_of_nothing) {
|
||||
sao_out->type = SAO_TYPE_NONE;
|
||||
merge_cost[0] = cost_of_nothing;
|
||||
|
|
605
src/search.c
605
src/search.c
|
@ -37,6 +37,7 @@
|
|||
|
||||
#include "cabac.h"
|
||||
#include "encoder.h"
|
||||
#include "encode_coding_tree.h"
|
||||
#include "imagelist.h"
|
||||
#include "inter.h"
|
||||
#include "intra.h"
|
||||
|
@ -59,14 +60,6 @@
|
|||
// Cost threshold for doing intra search in inter frames with --rd=0.
|
||||
static const int INTRA_THRESHOLD = 8;
|
||||
|
||||
// Modify weight of luma SSD.
|
||||
#ifndef LUMA_MULT
|
||||
# define LUMA_MULT 0.8
|
||||
#endif
|
||||
// Modify weight of chroma SSD.
|
||||
#ifndef CHROMA_MULT
|
||||
# define CHROMA_MULT 1.5
|
||||
#endif
|
||||
|
||||
static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
|
||||
{
|
||||
|
@ -225,16 +218,16 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
|
|||
const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2);
|
||||
|
||||
double ssd = 0.0;
|
||||
ssd += LUMA_MULT * uvg_pixels_calc_ssd(
|
||||
ssd += UVG_LUMA_MULT * uvg_pixels_calc_ssd(
|
||||
&lcu->ref.y[luma_index], &lcu->rec.y[luma_index],
|
||||
LCU_WIDTH, LCU_WIDTH, cu_width
|
||||
);
|
||||
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
ssd += CHROMA_MULT * uvg_pixels_calc_ssd(
|
||||
ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd(
|
||||
&lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||
);
|
||||
ssd += CHROMA_MULT * uvg_pixels_calc_ssd(
|
||||
ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd(
|
||||
&lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||
);
|
||||
|
@ -251,7 +244,8 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
|
|||
int x_scu = SUB_SCU(x);
|
||||
int y_scu = SUB_SCU(y);
|
||||
y_rec += x_scu + y_scu * LCU_WIDTH;
|
||||
int stride = state->tile->frame->source->stride;
|
||||
const int stride = state->tile->frame->rec->stride;
|
||||
const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
|
||||
|
||||
for (int y_ = 0; y_ < height && y_ * 2 + y < state->encoder_control->cfg.height; y_++) {
|
||||
for (int x_ = 0; x_ < width; x_++) {
|
||||
|
@ -265,13 +259,13 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
|
|||
s += y_rec[2 * x_ + LCU_WIDTH] * 2;
|
||||
s += y_rec[2 * x_ + 1 + LCU_WIDTH];
|
||||
s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH];
|
||||
int index = x / 2 + x_ + (y / 2 + y_ )* stride / 2;
|
||||
int index = x / 2 + x_ + (y / 2 + y_ )* stride2 / 2;
|
||||
state->tile->frame->cclm_luma_rec[index] = s >> 3;
|
||||
}
|
||||
y_rec += LCU_WIDTH * 2;
|
||||
}
|
||||
if((y + height * 2) % 64 == 0) {
|
||||
int line = y / 64 * stride / 2;
|
||||
int line = y / 64 * stride2 / 2;
|
||||
y_rec -= LCU_WIDTH;
|
||||
for (int i = 0; i < width; ++i) {
|
||||
int s = 2;
|
||||
|
@ -294,11 +288,13 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
|
|||
* prediction unit data needs to be coded.
|
||||
*/
|
||||
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu)
|
||||
const int x_px, const int y_px, const int depth,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu)
|
||||
{
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
||||
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
|
||||
|
||||
// cur_cu is used for TU parameters.
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
|
@ -324,14 +320,36 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
return sum + tr_tree_bits * state->lambda;
|
||||
}
|
||||
|
||||
|
||||
if (cabac->update && tr_cu->tr_depth == tr_cu->depth && !skip_residual_coding) {
|
||||
// Because these need to be coded before the luma cbf they also need to be counted
|
||||
// before the cabac state changes. However, since this branch is only executed when
|
||||
// calculating the last RD cost it is not problem to include the chroma cbf costs in
|
||||
// luma, because the chroma cost is calculated right after the luma cost.
|
||||
// However, if we have different tr_depth, the bits cannot be written in correct
|
||||
// order anyways so do not touch the chroma cbf here.
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
cabac_ctx_t* cr_ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
|
||||
cabac->cur_ctx = cr_ctx;
|
||||
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
CABAC_FBITS_UPDATE(cabac, cr_ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
cr_ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
|
||||
CABAC_FBITS_UPDATE(cabac, cr_ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
}
|
||||
}
|
||||
|
||||
// Add transform_tree cbf_luma bit cost.
|
||||
const int is_tr_split = tr_cu->tr_depth - tr_cu->depth;
|
||||
if (pred_cu->type == CU_INTRA ||
|
||||
tr_depth > 0 ||
|
||||
is_tr_split ||
|
||||
cbf_is_set(tr_cu->cbf, depth, COLOR_U) ||
|
||||
cbf_is_set(tr_cu->cbf, depth, COLOR_V))
|
||||
{
|
||||
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[0]);
|
||||
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y));
|
||||
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_luma[0]);
|
||||
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y);
|
||||
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search");
|
||||
}
|
||||
|
||||
// SSD between reconstruction and original
|
||||
|
@ -343,7 +361,8 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
width);
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
if (!skip_residual_coding) {
|
||||
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
|
||||
const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
|
||||
|
||||
|
@ -351,23 +370,22 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
return (double)ssd * LUMA_MULT + bits * state->lambda;
|
||||
return (double)ssd * UVG_LUMA_MULT + bits * state->lambda;
|
||||
}
|
||||
|
||||
|
||||
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
cu_info_t * pred_cu,
|
||||
lcu_t *const lcu)
|
||||
const int x_px, const int y_px, const int depth,
|
||||
cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
|
||||
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
||||
|
||||
double tr_tree_bits = 0;
|
||||
double joint_cbcr_tr_tree_bits = 0;
|
||||
double coeff_bits = 0;
|
||||
double joint_coeff_bits = 0;
|
||||
|
||||
assert(x_px >= 0 && x_px < LCU_WIDTH);
|
||||
assert(y_px >= 0 && y_px < LCU_WIDTH);
|
||||
|
@ -378,30 +396,28 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (depth < MAX_PU_DEPTH) {
|
||||
// See luma for why the second condition
|
||||
if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) {
|
||||
const int tr_depth = depth - pred_cu->depth;
|
||||
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_cb[0]);
|
||||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
|
||||
cabac->cur_ctx = ctx;
|
||||
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
|
||||
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
|
||||
}
|
||||
if(state->encoder_control->cfg.jccr) {
|
||||
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, pred_cu->joint_cb_cr & 1);
|
||||
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
}
|
||||
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
ctx = &(state->cabac.ctx.qt_cbf_model_cr[is_set]);
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[is_set]);
|
||||
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
|
||||
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V));
|
||||
}
|
||||
if(state->encoder_control->cfg.jccr) {
|
||||
ctx = &(state->cabac.ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
|
||||
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, (pred_cu->joint_cb_cr & 2) >> 1);
|
||||
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (tr_cu->tr_depth > depth) {
|
||||
int offset = LCU_WIDTH >> (depth + 1);
|
||||
int sum = 0;
|
||||
double sum = 0;
|
||||
|
||||
sum += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu);
|
||||
sum += uvg_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
|
||||
|
@ -418,15 +434,10 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
ctx = &(state->cabac.ctx.joint_cb_cr[cbf_mask]);
|
||||
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
||||
}
|
||||
if(pred_cu->joint_cb_cr) {
|
||||
ctx = &(state->cabac.ctx.joint_cb_cr[(pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1]);
|
||||
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Chroma SSD
|
||||
int ssd = 0;
|
||||
int joint_ssd = 0;
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
int ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
|
@ -436,12 +447,226 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
ssd = ssd_u + ssd_v;
|
||||
}
|
||||
|
||||
if(state->encoder_control->cfg.jccr) {
|
||||
if (!skip_residual_coding)
|
||||
{
|
||||
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
|
||||
}
|
||||
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
|
||||
return (double)ssd * UVG_CHROMA_MULT + bits * state->c_lambda;
|
||||
}
|
||||
|
||||
static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
const cu_info_t* const pred_cu,
|
||||
lcu_t* const lcu) {
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
||||
// cur_cu is used for TU parameters.
|
||||
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
|
||||
double coeff_bits = 0;
|
||||
double tr_tree_bits = 0;
|
||||
|
||||
// Check that lcu is not in
|
||||
assert(x_px >= 0 && x_px < LCU_WIDTH);
|
||||
assert(y_px >= 0 && y_px < LCU_WIDTH);
|
||||
|
||||
const uint8_t tr_depth = tr_cu->tr_depth - depth;
|
||||
|
||||
const int cb_flag_u = cbf_is_set(tr_cu->cbf, depth, COLOR_U);
|
||||
const int cb_flag_v = cbf_is_set(tr_cu->cbf, depth, COLOR_V);
|
||||
|
||||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
|
||||
{
|
||||
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
|
||||
// Only need to signal coded block flag if not skipped or merged
|
||||
// skip = no coded residual, merge = coded residual
|
||||
if (pred_cu->type == CU_INTER && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(state->encoder_control->chroma_format != UVG_CSP_400 && !skip_residual_coding) {
|
||||
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
|
||||
}
|
||||
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
|
||||
}
|
||||
}
|
||||
|
||||
if (tr_depth > 0) {
|
||||
int offset = LCU_WIDTH >> (depth + 1);
|
||||
double sum = 0;
|
||||
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
|
||||
return sum + tr_tree_bits * state->lambda;
|
||||
}
|
||||
const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) ;
|
||||
|
||||
// Add transform_tree cbf_luma bit cost.
|
||||
const int is_tr_split = depth - tr_cu->depth;
|
||||
if ((pred_cu->type == CU_INTRA ||
|
||||
is_tr_split ||
|
||||
cb_flag_u ||
|
||||
cb_flag_v)
|
||||
&& !skip_residual_coding)
|
||||
{
|
||||
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[!is_tr_split]);
|
||||
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
|
||||
}
|
||||
|
||||
if (cb_flag_y | cb_flag_u | cb_flag_v) {
|
||||
// TODO qp_delta_sign_flag
|
||||
|
||||
if ((cb_flag_u | cb_flag_v) && x_px % 8 == 0 && y_px % 8 == 0 && state->encoder_control->cfg.jccr) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// SSD between reconstruction and original
|
||||
unsigned luma_ssd = 0;
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = y_px * LCU_WIDTH + x_px;
|
||||
luma_ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
|
||||
LCU_WIDTH, LCU_WIDTH,
|
||||
width);
|
||||
}
|
||||
|
||||
{
|
||||
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
|
||||
const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode, tr_cu->tr_skip);
|
||||
}
|
||||
|
||||
unsigned chroma_ssd = 0;
|
||||
if(state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (x_px % 8 != 0 && y_px % 8 != 0))) {
|
||||
const vector2d_t lcu_px = { (x_px & ~7 ) / 2, (y_px & ~7) / 2 };
|
||||
const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1));
|
||||
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
if(pred_cu->joint_cb_cr == 0) {
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
chroma_ssd = ssd_u + ssd_v;
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order, 0);
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order, 0);
|
||||
}
|
||||
} else {
|
||||
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
chroma_ssd = ssd_u_joint + ssd_v_joint;
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
|
||||
}
|
||||
}
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + bits * state->lambda;
|
||||
}
|
||||
|
||||
|
||||
void uvg_select_jccr_mode(
|
||||
const encoder_state_t* const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
cu_info_t* pred_cu,
|
||||
lcu_t* const lcu,
|
||||
double* cost_out)
|
||||
{
|
||||
const vector2d_t lcu_px = { (SUB_SCU(x_px) & ~7) / 2, (SUB_SCU(y_px) & ~7) / 2 };
|
||||
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||
if (pred_cu == NULL) pred_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x_px), SUB_SCU(y_px));
|
||||
assert(pred_cu->depth == pred_cu->tr_depth && "jccr does not support transform splitting");
|
||||
if (cost_out == NULL && pred_cu->joint_cb_cr == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
double tr_tree_bits = 0;
|
||||
double joint_cbcr_tr_tree_bits = 0;
|
||||
double coeff_bits = 0;
|
||||
double joint_coeff_bits = 0;
|
||||
|
||||
assert(lcu_px.x >= 0 && lcu_px.x < LCU_WIDTH_C);
|
||||
assert(lcu_px.y >= 0 && lcu_px.y < LCU_WIDTH_C);
|
||||
|
||||
if (depth == 4 && (x_px % 8 == 0 || y_px % 8 == 0)) {
|
||||
// For MAX_PU_DEPTH calculate chroma for previous depth for the first
|
||||
// block and return 0 cost for all others.
|
||||
return;
|
||||
}
|
||||
|
||||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
|
||||
cabac->cur_ctx = ctx;
|
||||
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
|
||||
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cr_search");
|
||||
|
||||
int cbf_mask = u_is_set * 2 + v_is_set - 1;
|
||||
if((cbf_mask != -1 && pred_cu->type == CU_INTRA) || cbf_mask == 2)
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 0, tr_tree_bits, "jccr_flag");
|
||||
|
||||
if(pred_cu->joint_cb_cr) {
|
||||
const int u_jccr = (pred_cu->joint_cb_cr >> 1) & 1;
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, u_jccr, joint_cbcr_tr_tree_bits, "cbf_cb_search");
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[u_jccr]);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
|
||||
cbf_mask = pred_cu->joint_cb_cr - 1;
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 1, joint_cbcr_tr_tree_bits, "jccr_flag");
|
||||
}
|
||||
unsigned ssd = 0;
|
||||
unsigned joint_ssd = 0;
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
const int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
const unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
const unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
ssd = ssd_u + ssd_v;
|
||||
|
||||
if (pred_cu->joint_cb_cr) {
|
||||
const unsigned ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
const unsigned ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
joint_ssd = ssd_u_joint + ssd_v_joint;
|
||||
|
@ -455,34 +680,33 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
|
||||
|
||||
if(state->encoder_control->cfg.jccr) {
|
||||
joint_coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
|
||||
}
|
||||
joint_coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
|
||||
}
|
||||
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits;
|
||||
|
||||
double cost = (double)ssd + bits * state->c_lambda;
|
||||
double joint_cost = (double)joint_ssd + joint_bits * state->c_lambda;
|
||||
double cost = (double)ssd * UVG_CHROMA_MULT + bits * state->c_lambda;
|
||||
double joint_cost = (double)joint_ssd * UVG_CHROMA_MULT + joint_bits * state->c_lambda;
|
||||
if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) {
|
||||
pred_cu->joint_cb_cr = 0;
|
||||
return cost;
|
||||
if (cost_out) *cost_out += cost;
|
||||
return;
|
||||
}
|
||||
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
|
||||
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
|
||||
if (pred_cu->joint_cb_cr & 1) {
|
||||
if (pred_cu->joint_cb_cr & 2) {
|
||||
cbf_set(&pred_cu->cbf, depth, COLOR_U);
|
||||
}
|
||||
if (pred_cu->joint_cb_cr & 2) {
|
||||
if (pred_cu->joint_cb_cr & 1) {
|
||||
cbf_set(&pred_cu->cbf, depth, COLOR_V);
|
||||
}
|
||||
int lcu_width = LCU_WIDTH_C;
|
||||
const int index = lcu_px.x + lcu_px.y * lcu_width;
|
||||
uvg_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
|
||||
uvg_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
|
||||
return joint_cost;
|
||||
if (cost_out) *cost_out += joint_cost;
|
||||
}
|
||||
|
||||
|
||||
|
@ -492,23 +716,9 @@ static double calc_mode_bits(const encoder_state_t *state,
|
|||
const cu_info_t * cur_cu,
|
||||
int x, int y, int depth)
|
||||
{
|
||||
int x_local = SUB_SCU(x);
|
||||
int y_local = SUB_SCU(y);
|
||||
|
||||
assert(cur_cu->type == CU_INTRA);
|
||||
|
||||
int8_t candidate_modes[INTRA_MPM_COUNT];
|
||||
{
|
||||
const cu_info_t *left_cu = ((x >= SCU_WIDTH) ? LCU_GET_CU_AT_PX(lcu, x_local - SCU_WIDTH, y_local) : NULL);
|
||||
const cu_info_t *above_cu = ((y >= SCU_WIDTH) ? LCU_GET_CU_AT_PX(lcu, x_local, y_local - SCU_WIDTH) : NULL);
|
||||
uvg_intra_get_dir_luma_predictor(x, y, candidate_modes, cur_cu, left_cu, above_cu);
|
||||
}
|
||||
|
||||
int width = LCU_WIDTH >> depth;
|
||||
int height = width; // TODO: height for non-square blocks
|
||||
int num_mip_modes_half = NUM_MIP_MODES_HALF(width, height);
|
||||
int mip_flag_ctx_id = uvg_get_mip_flag_context(x, y, width, height, lcu, NULL);
|
||||
double mode_bits = uvg_luma_mode_bits(state, cur_cu->intra.mode, candidate_modes, cur_cu->intra.multi_ref_idx, num_mip_modes_half, mip_flag_ctx_id);
|
||||
double mode_bits = uvg_luma_mode_bits(state, cur_cu, x, y, depth, lcu);
|
||||
|
||||
if (((depth == 4 && x % 8 && y % 8) || (depth != 4)) && state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
mode_bits += uvg_chroma_mode_bits(state, cur_cu->intra.mode_chroma, cur_cu->intra.mode);
|
||||
|
@ -518,6 +728,7 @@ static double calc_mode_bits(const encoder_state_t *state,
|
|||
}
|
||||
|
||||
|
||||
// TODO: replace usages of this by the uvg_sort_indices_by_cost function.
|
||||
/**
|
||||
* \brief Sort modes and costs to ascending order according to costs.
|
||||
*/
|
||||
|
@ -567,16 +778,25 @@ void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict traf
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
|
||||
/**
|
||||
* \brief Sort keys (indices) to ascending order according to costs.
|
||||
*/
|
||||
void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map)
|
||||
{
|
||||
vector2d_t lcu_cu = { SUB_SCU(x), SUB_SCU(y) };
|
||||
bool condA = x >= 8 && LCU_GET_CU_AT_PX(lcu, lcu_cu.x - 1, lcu_cu.y )->depth > depth;
|
||||
bool condL = y >= 8 && LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y - 1)->depth > depth;
|
||||
return condA + condL;
|
||||
// Size of sorted arrays is expected to be "small". No need for faster algorithm.
|
||||
for (uint8_t i = 1; i < map->size; ++i) {
|
||||
const int8_t cur_indx = map->keys[i];
|
||||
const double cur_cost = map->cost[cur_indx];
|
||||
uint8_t j = i;
|
||||
while (j > 0 && cur_cost < map->cost[map->keys[j - 1]]) {
|
||||
map->keys[j] = map->keys[j - 1];
|
||||
--j;
|
||||
}
|
||||
map->keys[j] = cur_indx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Search every mode from 0 to MAX_PU_DEPTH and return cost of best mode.
|
||||
* - The recursion is started at depth 0 and goes in Z-order to MAX_PU_DEPTH.
|
||||
|
@ -592,10 +812,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
const encoder_control_t* ctrl = state->encoder_control;
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
int cu_width = LCU_WIDTH >> depth;
|
||||
double cost = MAX_INT;
|
||||
double inter_zero_coeff_cost = MAX_INT;
|
||||
uint32_t inter_bitcost = MAX_INT;
|
||||
double cost = MAX_DOUBLE;
|
||||
double inter_zero_coeff_cost = MAX_DOUBLE;
|
||||
double inter_bitcost = MAX_INT;
|
||||
cu_info_t *cur_cu;
|
||||
cabac_data_t pre_search_cabac;
|
||||
memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
|
||||
|
||||
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
|
||||
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
|
||||
|
@ -626,7 +848,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
|
||||
// Assign correct depth limit
|
||||
constraint_t* constr = state->constraint;
|
||||
if(constr->ml_intra_depth_ctu) {
|
||||
if(constr->ml_intra_depth_ctu) {
|
||||
pu_depth_intra.min = constr->ml_intra_depth_ctu->_mat_upper_depth[(x_local >> 3) + (y_local >> 3) * 8];
|
||||
pu_depth_intra.max = constr->ml_intra_depth_ctu->_mat_lower_depth[(x_local >> 3) + (y_local >> 3) * 8];
|
||||
}
|
||||
|
@ -670,7 +892,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
|
||||
if (can_use_inter) {
|
||||
double mode_cost;
|
||||
uint32_t mode_bitcost;
|
||||
double mode_bitcost;
|
||||
uvg_search_cu_inter(state,
|
||||
x, y,
|
||||
depth,
|
||||
|
@ -693,33 +915,34 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
|
||||
int32_t cu_width_intra_min = LCU_WIDTH >> pu_depth_intra.max;
|
||||
bool can_use_intra =
|
||||
WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
|
||||
(WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
|
||||
// When the split was forced because the CTU is partially outside
|
||||
// the frame, we permit intra coding even if pu_depth_intra would
|
||||
// otherwise forbid it.
|
||||
(x & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->width ||
|
||||
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height;
|
||||
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height) &&
|
||||
!(state->encoder_control->cfg.force_inter && state->frame->slicetype != UVG_SLICE_I);
|
||||
|
||||
intra_search_data_t intra_search;
|
||||
if (can_use_intra && !skip_intra) {
|
||||
int8_t intra_mode;
|
||||
int8_t intra_trafo;
|
||||
double intra_cost;
|
||||
uint8_t multi_ref_index = 0;
|
||||
bool mip_flag = false;
|
||||
bool mip_transposed = false;
|
||||
uvg_search_cu_intra(state, x, y, depth, lcu,
|
||||
&intra_mode, &intra_trafo, &intra_cost, &multi_ref_index, &mip_flag, &mip_transposed);
|
||||
if (intra_cost < cost) {
|
||||
cost = intra_cost;
|
||||
intra_search.pred_cu = *cur_cu;
|
||||
intra_search.pred_cu.joint_cb_cr = 4;
|
||||
uvg_search_cu_intra(state, x, y, depth, &intra_search,
|
||||
lcu);
|
||||
#ifdef COMPLETE_PRED_MODE_BITS
|
||||
// Technically counting these bits would be correct, however counting
|
||||
// them universally degrades quality so this block is disabled by default
|
||||
if(state->frame->slicetype != UVG_SLICE_I) {
|
||||
double pred_mode_type_bits = 0;
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.cu_pred_mode_model, 1, pred_mode_type_bits, "pred_mode_flag");
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(x, y, lcu, NULL)], 0, pred_mode_type_bits, "skip_flag");
|
||||
intra_cost += pred_mode_type_bits * state->lambda;
|
||||
}
|
||||
#endif
|
||||
if (intra_search.cost < cost) {
|
||||
cost = intra_search.cost;
|
||||
*cur_cu = intra_search.pred_cu;
|
||||
cur_cu->type = CU_INTRA;
|
||||
cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
|
||||
cur_cu->intra.mode = intra_mode;
|
||||
cur_cu->intra.multi_ref_idx = multi_ref_index;
|
||||
cur_cu->intra.mip_flag = mip_flag;
|
||||
cur_cu->intra.mip_is_transposed = mip_transposed;
|
||||
|
||||
//If the CU is not split from 64x64 block, the MTS is disabled for that CU.
|
||||
cur_cu->tr_idx = (depth > 0) ? intra_trafo : 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -727,20 +950,19 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// mode search of adjacent CUs.
|
||||
if (cur_cu->type == CU_INTRA) {
|
||||
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
|
||||
cur_cu->intra.mode_chroma = cur_cu->intra.mode;
|
||||
|
||||
intra_search.pred_cu.intra.mode_chroma = -1; // don't reconstruct chroma before search is performed for it
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
uvg_intra_recon_cu(state,
|
||||
x, y,
|
||||
depth,
|
||||
cur_cu->intra.mode, -1, // skip chroma
|
||||
NULL, NULL, cur_cu->intra.multi_ref_idx,
|
||||
cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
|
||||
depth, &intra_search,
|
||||
NULL,
|
||||
lcu);
|
||||
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
cur_cu->joint_cb_cr = 0;
|
||||
|
||||
// TODO: This heavily relies to square CUs
|
||||
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
|
@ -748,19 +970,47 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// rd2. Possibly because the luma mode search already takes chroma
|
||||
// into account, so there is less of a chanse of luma mode being
|
||||
// really bad for chroma.
|
||||
cclm_parameters_t cclm_params[2];
|
||||
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; // skip luma
|
||||
if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) {
|
||||
cur_cu->intra.mode_chroma = uvg_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params);
|
||||
cur_cu->intra.mode_chroma = uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search);
|
||||
|
||||
if (intra_search.pred_cu.joint_cb_cr == 0) intra_search.pred_cu.joint_cb_cr = 4;
|
||||
else cur_cu->joint_cb_cr = intra_search.pred_cu.joint_cb_cr;
|
||||
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
}
|
||||
|
||||
intra_search.pred_cu.intra.mode = -1; // skip luma
|
||||
uvg_intra_recon_cu(state,
|
||||
x & ~7, y & ~7, // TODO: as does this
|
||||
depth,
|
||||
-1, cur_cu->intra.mode_chroma, // skip luma
|
||||
NULL, cclm_params, 0,
|
||||
cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
|
||||
x, y, // TODO: as does this
|
||||
depth, &intra_search,
|
||||
NULL,
|
||||
lcu);
|
||||
if(depth != 0 && state->encoder_control->cfg.jccr && ctrl->cfg.rdo < 3) {
|
||||
uvg_select_jccr_mode(state,
|
||||
x, y,
|
||||
depth,
|
||||
NULL,
|
||||
lcu,
|
||||
NULL);
|
||||
}
|
||||
else if(depth != 0 && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr & 3) {
|
||||
assert(cur_cu->joint_cb_cr < 4);
|
||||
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
|
||||
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
|
||||
if (cur_cu->joint_cb_cr & 2) {
|
||||
cbf_set(&cur_cu->cbf, depth, COLOR_U);
|
||||
}
|
||||
if (cur_cu->joint_cb_cr & 1) {
|
||||
cbf_set(&cur_cu->cbf, depth, COLOR_V);
|
||||
}
|
||||
const vector2d_t lcu_px = { (x_local & ~7) / 2, (y_local & ~7) / 2 };
|
||||
int lcu_width = LCU_WIDTH_C;
|
||||
const int index = lcu_px.x + lcu_px.y * lcu_width;
|
||||
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||
uvg_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
|
||||
uvg_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
|
||||
|
||||
}
|
||||
}
|
||||
} else if (cur_cu->type == CU_INTER) {
|
||||
|
||||
|
@ -788,11 +1038,20 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
}
|
||||
|
||||
uvg_quantize_lcu_residual(state,
|
||||
true, has_chroma,
|
||||
x, y, depth,
|
||||
NULL,
|
||||
lcu,
|
||||
false);
|
||||
true, has_chroma,
|
||||
state->encoder_control->cfg.jccr, x, y,
|
||||
depth,
|
||||
NULL,
|
||||
lcu,
|
||||
false);
|
||||
if (cur_cu->depth == cur_cu->tr_depth && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr) {
|
||||
uvg_select_jccr_mode(state,
|
||||
x, y,
|
||||
depth,
|
||||
NULL,
|
||||
lcu,
|
||||
NULL);
|
||||
}
|
||||
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||
|
||||
|
@ -800,9 +1059,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
cur_cu->merged = 0;
|
||||
cur_cu->skipped = 1;
|
||||
// Selecting skip reduces bits needed to code the CU
|
||||
if (inter_bitcost > 1) {
|
||||
inter_bitcost -= 1;
|
||||
}
|
||||
int skip_ctx = uvg_get_skip_context(x, y, lcu, NULL, NULL);
|
||||
inter_bitcost = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_ctx], 1);
|
||||
inter_bitcost += CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), cur_cu->merge_idx != 0);
|
||||
inter_bitcost += cur_cu->merge_idx;
|
||||
}
|
||||
}
|
||||
lcu_fill_inter(lcu, x_local, y_local, cu_width);
|
||||
|
@ -811,19 +1071,25 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
}
|
||||
|
||||
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
||||
cost = uvg_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
double bits = 0;
|
||||
cabac_data_t* cabac = &state->search_cabac;
|
||||
cabac->update = 1;
|
||||
|
||||
if(cur_cu->type != CU_INTRA || cur_cu->part_size == SIZE_2Nx2N) {
|
||||
bits += uvg_mock_encode_coding_unit(
|
||||
state,
|
||||
cabac,
|
||||
x, y, depth,
|
||||
lcu,
|
||||
cur_cu);
|
||||
}
|
||||
else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
double mode_bits;
|
||||
if (cur_cu->type == CU_INTRA) {
|
||||
mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth);
|
||||
} else {
|
||||
mode_bits = inter_bitcost;
|
||||
}
|
||||
cost = bits * state->lambda;
|
||||
|
||||
cost += mode_bits * state->lambda;
|
||||
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
|
||||
if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
|
||||
cost = inter_zero_coeff_cost;
|
||||
|
@ -846,13 +1112,14 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
cur_cu->cbf = 0;
|
||||
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
|
||||
}
|
||||
cabac->update = 0;
|
||||
}
|
||||
|
||||
bool can_split_cu =
|
||||
// If the CU is partially outside the frame, we need to split it even
|
||||
// if pu_depth_intra and pu_depth_inter would not permit it.
|
||||
cur_cu->type == CU_NOTSET ||
|
||||
depth < pu_depth_intra.max ||
|
||||
(depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != UVG_SLICE_I)) ||
|
||||
(state->frame->slicetype != UVG_SLICE_I &&
|
||||
depth < pu_depth_inter.max);
|
||||
|
||||
|
@ -861,21 +1128,23 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
int half_cu = cu_width / 2;
|
||||
double split_cost = 0.0;
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||
cabac_data_t post_seach_cabac;
|
||||
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
|
||||
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
|
||||
state->search_cabac.update = 1;
|
||||
|
||||
double split_bits = 0;
|
||||
|
||||
if (depth < MAX_DEPTH) {
|
||||
// Add cost of cu_split_flag.
|
||||
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
|
||||
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
|
||||
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
|
||||
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
|
||||
uvg_write_split_flag(state, &state->search_cabac,
|
||||
x > 0 ? LCU_GET_CU_AT_PX(lcu,SUB_SCU(x) -1, SUB_SCU(y)): NULL,
|
||||
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
|
||||
1, depth, cu_width, x, y, &split_bits);
|
||||
}
|
||||
|
||||
if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
|
||||
// Add cost of intra part_size.
|
||||
const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]);
|
||||
cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N
|
||||
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN
|
||||
}
|
||||
state->search_cabac.update = 0;
|
||||
split_cost += split_bits * state->lambda;
|
||||
|
||||
// If skip mode was selected for the block, skip further search.
|
||||
// Skip mode means there's no coefficients in the block, so splitting
|
||||
|
@ -897,13 +1166,23 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// searching.
|
||||
|
||||
if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH
|
||||
&& x + cu_width <= frame->width && y + cu_width <= frame->height && 0)
|
||||
&& x + cu_width <= frame->width && y + cu_width <= frame->height
|
||||
&& state->encoder_control->cfg.combine_intra_cus)
|
||||
{
|
||||
|
||||
cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local);
|
||||
|
||||
// If the best CU in depth+1 is intra and the biggest it can be, try it.
|
||||
if (cu_d1->type == CU_INTRA && cu_d1->depth == depth + 1) {
|
||||
cabac_data_t temp_cabac;
|
||||
memcpy(&temp_cabac, &state->search_cabac, sizeof(temp_cabac));
|
||||
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(pre_search_cabac));
|
||||
cost = 0;
|
||||
double bits = 0;
|
||||
uvg_write_split_flag(state, &state->search_cabac,
|
||||
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
|
||||
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
|
||||
0, depth, cu_width, x, y, & split_bits);
|
||||
|
||||
cur_cu->intra = cu_d1->intra;
|
||||
cur_cu->type = CU_INTRA;
|
||||
|
@ -915,28 +1194,24 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
|
||||
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
|
||||
const int8_t mode_chroma = has_chroma ? cur_cu->intra.mode_chroma : -1;
|
||||
intra_search_data_t proxy;
|
||||
FILL(proxy, 0);
|
||||
proxy.pred_cu = *cur_cu;
|
||||
|
||||
uvg_intra_recon_cu(state,
|
||||
x, y,
|
||||
depth,
|
||||
cur_cu->intra.mode, mode_chroma,
|
||||
NULL,NULL, 0, cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
|
||||
&proxy,
|
||||
NULL,
|
||||
lcu);
|
||||
|
||||
cost += uvg_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
if (has_chroma) {
|
||||
cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
}
|
||||
|
||||
// Add the cost of coding no-split.
|
||||
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
|
||||
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
|
||||
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
|
||||
|
||||
// Add the cost of coding intra mode only once.
|
||||
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth);
|
||||
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits;
|
||||
cost += mode_bits * state->lambda;
|
||||
|
||||
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
|
||||
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -950,6 +1225,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
} else if (depth > 0) {
|
||||
// Copy this CU's mode all the way down for use in adjacent CUs mode
|
||||
// search.
|
||||
memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac));
|
||||
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
|
@ -962,6 +1238,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
||||
}
|
||||
}
|
||||
else {
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
}
|
||||
} else if (depth >= 0 && depth < MAX_PU_DEPTH) {
|
||||
// Need to copy modes down since the lower level of the work tree is used
|
||||
// when searching SMP and AMP blocks.
|
||||
|
@ -1139,6 +1420,8 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i
|
|||
*/
|
||||
void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, const yuv_t * const hor_buf, const yuv_t * const ver_buf, lcu_coeff_t *coeff)
|
||||
{
|
||||
memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t));
|
||||
state->search_cabac.only_count = 1;
|
||||
assert(x % LCU_WIDTH == 0);
|
||||
assert(y % LCU_WIDTH == 0);
|
||||
|
||||
|
|
56
src/search.h
56
src/search.h
|
@ -44,22 +44,62 @@
|
|||
#include "image.h"
|
||||
#include "constraint.h"
|
||||
|
||||
#define NUM_MIP_MODES_FULL(width, height) ((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12)
|
||||
#define NUM_MIP_MODES_HALF(width, height) NUM_MIP_MODES_FULL((width), (height)) >> 1
|
||||
#define MAX_UNIT_STATS_MAP_SIZE MAX(MAX_REF_PIC_COUNT, MRG_MAX_NUM_CANDS)
|
||||
|
||||
// Modify weight of luma SSD.
|
||||
#ifndef UVG_LUMA_MULT
|
||||
#define UVG_LUMA_MULT 0.8
|
||||
#endif
|
||||
// Modify weight of chroma SSD.
|
||||
#ifndef UVG_CHROMA_MULT
|
||||
#define UVG_CHROMA_MULT 1.5
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \brief Data collected during search processes.
|
||||
*
|
||||
* The intended use is to collect statistics of the
|
||||
* searched coding/prediction units. Data related to
|
||||
* a specific unit is found at index i. The arrays
|
||||
* should be indexed by elements of the "keys" array
|
||||
* that will be sorted by the RD costs of the units.
|
||||
*/
|
||||
typedef struct unit_stats_map_t {
|
||||
|
||||
cu_info_t unit[MAX_UNIT_STATS_MAP_SIZE]; //!< list of searched units
|
||||
double cost[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching RD costs
|
||||
double bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs
|
||||
int8_t keys[MAX_UNIT_STATS_MAP_SIZE]; //!< list of keys (indices) to elements in the other arrays
|
||||
int size; //!< number of active elements in the lists
|
||||
} unit_stats_map_t;
|
||||
|
||||
#define NUM_MIP_MODES_FULL(width, height) (((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12))
|
||||
#define NUM_MIP_MODES_HALF(width, height) (NUM_MIP_MODES_FULL((width), (height)) >> 1)
|
||||
|
||||
void uvg_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length);
|
||||
void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length);
|
||||
|
||||
void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map);
|
||||
|
||||
void uvg_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf, lcu_coeff_t *coeff);
|
||||
|
||||
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu);
|
||||
const int x_px, const int y_px, const int depth,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu);
|
||||
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
cu_info_t * pred_cu,
|
||||
lcu_t *const lcu);
|
||||
const int x_px, const int y_px, const int depth,
|
||||
cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu);
|
||||
void uvg_select_jccr_mode(
|
||||
const encoder_state_t* const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
cu_info_t* const pred_cu,
|
||||
lcu_t* const lcu,
|
||||
double* cost_out);
|
||||
|
||||
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth);
|
||||
|
||||
void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
||||
|
|
1352
src/search_inter.c
1352
src/search_inter.c
File diff suppressed because it is too large
Load diff
|
@ -64,20 +64,34 @@ enum hpel_position {
|
|||
HPEL_POS_DIA = 2
|
||||
};
|
||||
|
||||
typedef uint32_t uvg_mvd_cost_func(const encoder_state_t *state,
|
||||
typedef double uvg_mvd_cost_func(const encoder_state_t *state,
|
||||
int x, int y,
|
||||
int mv_shift,
|
||||
mv_t mv_cand[2][2],
|
||||
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
||||
int16_t num_cand,
|
||||
int32_t ref_idx,
|
||||
uint32_t *bitcost);
|
||||
double *bitcost);
|
||||
|
||||
void uvg_search_cu_inter(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
uint32_t *inter_bitcost);
|
||||
double* inter_bitcost);
|
||||
|
||||
|
||||
|
||||
unsigned uvg_inter_satd_cost(const encoder_state_t* state,
|
||||
const lcu_t *lcu,
|
||||
int x,
|
||||
int y);
|
||||
void uvg_cu_cost_inter_rd2(encoder_state_t* const state,
|
||||
int x, int y, int depth,
|
||||
cu_info_t* cur_cu,
|
||||
lcu_t* lcu,
|
||||
double* inter_cost,
|
||||
double* inter_bitcost);
|
||||
|
||||
int uvg_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx);
|
||||
|
||||
#endif // SEARCH_INTER_H_
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -43,24 +43,21 @@
|
|||
#include "global.h" // IWYU pragma: keep
|
||||
#include "intra.h"
|
||||
|
||||
double uvg_luma_mode_bits(const encoder_state_t *state,
|
||||
int8_t luma_mode, const int8_t *intra_preds, uint8_t multi_ref_idx, const uint8_t num_mip_modes, int mip_flag_ctx_id);
|
||||
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu);
|
||||
|
||||
double uvg_chroma_mode_bits(const encoder_state_t *state,
|
||||
int8_t chroma_mode, int8_t luma_mode);
|
||||
|
||||
int8_t uvg_search_cu_intra_chroma(encoder_state_t * const state,
|
||||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu, cclm_parameters_t* best_cclm);
|
||||
const int depth, lcu_t *lcu, intra_search_data_t* best_cclm);
|
||||
|
||||
void uvg_search_cu_intra(encoder_state_t * const state,
|
||||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu,
|
||||
int8_t *mode_out,
|
||||
int8_t *trafo_out,
|
||||
double *cost_out,
|
||||
uint8_t *multi_ref_idx_out,
|
||||
bool *mip_flag,
|
||||
bool *mip_transp);
|
||||
void uvg_search_cu_intra(
|
||||
encoder_state_t * const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
intra_search_data_t* search_data,
|
||||
lcu_t *lcu);
|
||||
|
||||
#endif // SEARCH_INTRA_H_
|
||||
|
|
|
@ -225,39 +225,40 @@ int uvg_quant_cbcr_residual_generic(
|
|||
int64_t best_cost = INT64_MAX;
|
||||
|
||||
// This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM
|
||||
for(int cbf_mask = cur_cu->type == CU_INTRA ? 1 : 3; cbf_mask < 4; cbf_mask++) {
|
||||
for(int i = cur_cu->type == CU_INTRA ? 1 : 3; i < 4; i++) {
|
||||
int64_t d1 = 0;
|
||||
const int cbf_mask = i * (state->frame->jccr_sign ? -1 : 1);
|
||||
for (int y = 0; y < width; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
int cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
|
||||
if (cbf_mask == 1)
|
||||
if (cbf_mask == 2)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (u1_residual[cbf_mask / 2][x + y * width] >> 1));
|
||||
u1_residual[i - 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (u1_residual[i - 2][x + y * width] >> 1));
|
||||
}
|
||||
else if (cbf_mask == -1)
|
||||
else if (cbf_mask == -2)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (-u1_residual[cbf_mask / 2][x + y * width] >> 1));
|
||||
u1_residual[i - 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (-u1_residual[i - 2][x + y * width] >> 1));
|
||||
}
|
||||
else if (cbf_mask == 3)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((cbx + crx) / 2);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - u1_residual[cbf_mask / 2][x + y * width]);
|
||||
u1_residual[i - 2][x + y * width] = ((cbx + crx) / 2);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - u1_residual[i - 2][x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == -3)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((cbx - crx) / 2);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx + u1_residual[cbf_mask / 2][x + y * width]);
|
||||
u1_residual[i - 2][x + y * width] = ((cbx - crx) / 2);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx + u1_residual[i - 2][x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == 2)
|
||||
else if (cbf_mask == 1)
|
||||
{
|
||||
v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5);
|
||||
d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == -2)
|
||||
else if (cbf_mask == -1)
|
||||
{
|
||||
v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5);
|
||||
d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
|
||||
|
@ -270,19 +271,19 @@ int uvg_quant_cbcr_residual_generic(
|
|||
}
|
||||
}
|
||||
if (d1 < best_cost) {
|
||||
best_cbf_mask = cbf_mask;
|
||||
best_cbf_mask = i;
|
||||
best_cost = d1;
|
||||
}
|
||||
}
|
||||
|
||||
uvg_transform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
|
||||
uvg_transform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && false) {
|
||||
|
@ -290,7 +291,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
uvg_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
}
|
||||
|
||||
|
@ -309,10 +310,10 @@ int uvg_quant_cbcr_residual_generic(
|
|||
int y, x;
|
||||
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
|
||||
uvg_itransform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
|
||||
uvg_itransform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
|
||||
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
|
@ -333,32 +334,32 @@ int uvg_quant_cbcr_residual_generic(
|
|||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
const int temp = best_cbf_mask * (state->frame->jccr_sign ? -1 : 1);
|
||||
// Get quantized reconstruction. (residual + pred_in -> rec_out)
|
||||
for (int y = 0; y < width; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
if (best_cbf_mask == 1) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
|
||||
if (temp == 2) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
|
||||
}
|
||||
else if (best_cbf_mask == -1) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
|
||||
else if (temp == -2) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
|
||||
}
|
||||
else if (best_cbf_mask == 3) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
else if (temp == 3) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
}
|
||||
else if (best_cbf_mask == -3) {
|
||||
else if (temp == -3) {
|
||||
// non-normative clipping to prevent 16-bit overflow
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
}
|
||||
else if (best_cbf_mask == 2) {
|
||||
else if (temp == 1) {
|
||||
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = v1_residual[x + y * width];
|
||||
}
|
||||
else if (best_cbf_mask == -2) {
|
||||
else if (temp == -1) {
|
||||
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = -v1_residual[x + y * width];
|
||||
}
|
||||
|
|
|
@ -260,12 +260,10 @@ int uvg_quantize_residual_trskip(
|
|||
struct {
|
||||
uvg_pixel rec[LCU_WIDTH * LCU_WIDTH];
|
||||
coeff_t coeff[LCU_WIDTH * LCU_WIDTH];
|
||||
uint32_t cost;
|
||||
double cost;
|
||||
int has_coeffs;
|
||||
} skip, *best;
|
||||
|
||||
const int bit_cost = (int)(state->lambda + 0.5);
|
||||
|
||||
//noskip.has_coeffs = uvg_quantize_residual(
|
||||
// state, cur_cu, width, color, scan_order,
|
||||
// 0, in_stride, 4,
|
||||
|
@ -278,7 +276,7 @@ int uvg_quantize_residual_trskip(
|
|||
1, in_stride, width,
|
||||
ref_in, pred_in, skip.rec, skip.coeff, false, lmcs_chroma_adj);
|
||||
skip.cost = uvg_pixels_calc_ssd(ref_in, skip.rec, in_stride, width, width);
|
||||
skip.cost += uvg_get_coeff_cost(state, skip.coeff, width, 0, scan_order, 1) * bit_cost;
|
||||
skip.cost += uvg_get_coeff_cost(state, skip.coeff, width, 0, scan_order, 1) * state->frame->lambda;
|
||||
|
||||
/* if (noskip.cost <= skip.cost) {
|
||||
*trskip_out = 0;
|
||||
|
@ -481,15 +479,17 @@ static void quantize_tr_residual(encoder_state_t * const state,
|
|||
* - lcu->cbf coded block flags for the area
|
||||
* - lcu->cu.intra.tr_skip tr skip flags for the area (in case of luma)
|
||||
*/
|
||||
void uvg_quantize_lcu_residual(encoder_state_t * const state,
|
||||
const bool luma,
|
||||
const bool chroma,
|
||||
const int32_t x,
|
||||
const int32_t y,
|
||||
const uint8_t depth,
|
||||
cu_info_t *cur_pu,
|
||||
lcu_t* lcu,
|
||||
bool early_skip)
|
||||
void uvg_quantize_lcu_residual(
|
||||
encoder_state_t * const state,
|
||||
const bool luma,
|
||||
const bool chroma,
|
||||
const bool jccr,
|
||||
const int32_t x,
|
||||
const int32_t y,
|
||||
const uint8_t depth,
|
||||
cu_info_t *cur_pu,
|
||||
lcu_t* lcu,
|
||||
bool early_skip)
|
||||
{
|
||||
const int32_t width = LCU_WIDTH >> depth;
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
|
@ -511,7 +511,7 @@ void uvg_quantize_lcu_residual(encoder_state_t * const state,
|
|||
if (luma) {
|
||||
cbf_clear(&cur_pu->cbf, depth, COLOR_Y);
|
||||
}
|
||||
if (chroma) {
|
||||
if (chroma || jccr) {
|
||||
cbf_clear(&cur_pu->cbf, depth, COLOR_U);
|
||||
cbf_clear(&cur_pu->cbf, depth, COLOR_V);
|
||||
}
|
||||
|
@ -523,10 +523,11 @@ void uvg_quantize_lcu_residual(encoder_state_t * const state,
|
|||
const int32_t x2 = x + offset;
|
||||
const int32_t y2 = y + offset;
|
||||
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, x, y, depth + 1, NULL, lcu, early_skip);
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, x2, y, depth + 1, NULL, lcu, early_skip);
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, x, y2, depth + 1, NULL, lcu, early_skip);
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, x2, y2, depth + 1, NULL, lcu, early_skip);
|
||||
// jccr is currently not supported if transform is split
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip);
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip);
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip);
|
||||
uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip);
|
||||
|
||||
// Propagate coded block flags from child CUs to parent CU.
|
||||
uint16_t child_cbfs[3] = {
|
||||
|
@ -549,9 +550,9 @@ void uvg_quantize_lcu_residual(encoder_state_t * const state,
|
|||
if (chroma) {
|
||||
quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip);
|
||||
quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip);
|
||||
if(state->encoder_control->cfg.jccr && cur_pu->tr_depth == cur_pu->depth){
|
||||
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
|
||||
}
|
||||
}
|
||||
if (jccr && cur_pu->tr_depth == cur_pu->depth) {
|
||||
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,14 +67,16 @@ void uvg_itransform2d(const encoder_control_t * const encoder,
|
|||
|
||||
int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);
|
||||
|
||||
void uvg_quantize_lcu_residual(encoder_state_t *state,
|
||||
bool luma,
|
||||
bool chroma,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
uint8_t depth,
|
||||
cu_info_t *cur_cu,
|
||||
lcu_t* lcu,
|
||||
bool early_skip);
|
||||
void uvg_quantize_lcu_residual(
|
||||
encoder_state_t *state,
|
||||
bool luma,
|
||||
bool chroma,
|
||||
const bool jccr,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
uint8_t depth,
|
||||
cu_info_t *cur_cu,
|
||||
lcu_t* lcu,
|
||||
bool early_skip);
|
||||
|
||||
#endif
|
||||
|
|
30
src/uvg266.h
30
src/uvg266.h
|
@ -267,6 +267,12 @@ enum uvg_amvr_resolution
|
|||
UVG_IMV_HPEL = 3
|
||||
};
|
||||
|
||||
enum uvg_roi_format
|
||||
{
|
||||
UVG_ROI_TXT = 0,
|
||||
UVG_ROI_BIN = 1
|
||||
};
|
||||
|
||||
// Map from input format to chroma format.
|
||||
#define UVG_FORMAT2CSP(format) ((enum uvg_chroma_format)format)
|
||||
|
||||
|
@ -408,10 +414,9 @@ typedef struct uvg_config
|
|||
int32_t implicit_rdpcm; /*!< \brief Enable implicit residual DPCM. */
|
||||
|
||||
struct {
|
||||
int32_t width;
|
||||
int32_t height;
|
||||
int8_t *dqps;
|
||||
} roi; /*!< \since 3.14.0 \brief Map of delta QPs for region of interest coding. */
|
||||
char *file_path;
|
||||
enum uvg_roi_format format;
|
||||
} roi; /*!< \brief Specify delta QPs for region of interest coding. */
|
||||
|
||||
unsigned slices; /*!< \since 3.15.0 \brief How to map slices to frame. */
|
||||
|
||||
|
@ -524,6 +529,12 @@ typedef struct uvg_config
|
|||
int8_t cclm;
|
||||
|
||||
int8_t amvr; /* \brief Adaptive motion vector resolution parameter */
|
||||
|
||||
/** \brief whether to try combining intra cus at the lower depth when search
|
||||
* is not performed at said depth*/
|
||||
uint8_t combine_intra_cus;
|
||||
|
||||
uint8_t force_inter;
|
||||
} uvg_config;
|
||||
|
||||
/**
|
||||
|
@ -555,6 +566,14 @@ typedef struct uvg_picture {
|
|||
enum uvg_chroma_format chroma_format;
|
||||
|
||||
int32_t ref_pocs[16];
|
||||
|
||||
struct
|
||||
{
|
||||
int width;
|
||||
int height;
|
||||
int8_t *roi_array;
|
||||
} roi;
|
||||
|
||||
} uvg_picture;
|
||||
|
||||
/**
|
||||
|
@ -781,6 +800,9 @@ typedef struct uvg_api {
|
|||
* original frame and frame info in data_out, len_out, pic_out, src_out and
|
||||
* info_out, respectively. Otherwise, set the output parameters to NULL.
|
||||
*
|
||||
* Region of interest (ROI) / delta QP map can be specified in the input
|
||||
* picture's ROI field but only when a ROI file is not used.
|
||||
*
|
||||
* After passing all of the input frames, the caller should keep calling this
|
||||
* function with pic_in set to NULL, until no more data is returned in the
|
||||
* output parameters.
|
||||
|
|
|
@ -3,6 +3,6 @@
|
|||
set -eu
|
||||
. "${0%/*}/util.sh"
|
||||
|
||||
valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --tiles=2x2
|
||||
valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --gop 0 --tiles=2x2
|
||||
#valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --slices=wpp
|
||||
#if [ ! -z ${GITLAB_CI+x} ];then valgrind_test 264x130 20 --threads=2 --owf=1 --preset=fast --slices=wpp --no-open-gop; fi
|
||||
|
|
Loading…
Reference in a new issue