Merge branch 'improve-intra-search'

This commit is contained in:
Joose Sainio 2022-05-30 12:11:48 +03:00
commit 153afc6739
40 changed files with 2954 additions and 2057 deletions

View file

@ -145,11 +145,20 @@ Video structure:
- frametile: Constrain within the tile.
- frametilemargin: Constrain even more.
--roi <filename> : Use a delta QP map for region of interest.
Reads an array of delta QP values from a text
file. The file format is: width and height of
the QP delta map followed by width*height delta
QP values in raster order. The map can be of any
size and will be scaled to the video size.
Reads an array of delta QP values from a file.
Text and binary files are supported and detected
from the file extension (.txt/.bin). If a known
extension is not found, the file is treated as
a text file. The file can include one or many
ROI frames each in the following format:
width and height of the QP delta map followed
by width * height delta QP values in raster
order. In binary format, width and height are
32-bit integers whereas the delta QP values are
signed 8-bit values. The map can be of any size
and will be scaled to the video size. The file
reading will loop if end of the file is reached.
See roi.txt in the examples folder.
--set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.
in PPS and slice_qp_delta in slize header zero.
--(no-)erp-aqp : Use adaptive QP for 360 degree video with

View file

@ -164,11 +164,20 @@ Constrain movement vectors. [none]
.TP
\fB\-\-roi <filename>
Use a delta QP map for region of interest.
Reads an array of delta QP values from a text
file. The file format is: width and height of
the QP delta map followed by width*height delta
QP values in raster order. The map can be of any
size and will be scaled to the video size.
Reads an array of delta QP values from a file.
Text and binary files are supported and detected
from the file extension (.txt/.bin). If a known
extension is not found, the file is treated as
a text file. The file can include one or many
ROI frames each in the following format:
width and height of the QP delta map followed
by width * height delta QP values in raster
order. In binary format, width and height are
32\-bit integers whereas the delta QP values are
signed 8\-bit values. The map can be of any size
and will be scaled to the video size. The file
reading will loop if end of the file is reached.
See roi.txt in the examples folder.
.TP
\fB\-\-set\-qp\-in\-cu
Set QP at CU level keeping pic_init_qp_minus26.

View file

@ -1236,19 +1236,19 @@ static void code_alf_ctu_filter_index(encoder_state_t * const state,
assert(filter_set_idx < num_available_filt_sets); //"temporal non-latest set"
if (num_aps > 1)
{
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx - ALF_NUM_FIXED_FILTER_SETS, num_available_filt_sets - ALF_NUM_FIXED_FILTER_SETS);
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx - ALF_NUM_FIXED_FILTER_SETS, num_available_filt_sets - ALF_NUM_FIXED_FILTER_SETS, NULL);
}
}
else
{
assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //"fixed set larger than temporal"
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS);
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS, NULL);
}
}
else
{
assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //Fixed set numavail < num_fixed
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS);
uvg_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS, NULL);
}
}

View file

@ -33,6 +33,7 @@
#include "bitstream.h"
#include <math.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>

View file

@ -70,6 +70,7 @@ void uvg_cabac_start(cabac_data_t * const data)
data->num_buffered_bytes = 0;
data->buffered_byte = 0xff;
data->only_count = 0; // By default, write bits out
data->update = 0;
}
/**
@ -199,7 +200,7 @@ void uvg_cabac_encode_bin_trm(cabac_data_t * const data, const uint8_t bin_value
/**
* \brief encode truncated binary code
*/
void uvg_cabac_encode_trunc_bin(cabac_data_t * const data, const uint32_t bin_value, const uint32_t max_value) {
void uvg_cabac_encode_trunc_bin(cabac_data_t * const data, const uint32_t bin_value, const uint32_t max_value, double* bits_out) {
int thresh;
int symbol = bin_value;
if (max_value > 256) {
@ -219,9 +220,11 @@ void uvg_cabac_encode_trunc_bin(cabac_data_t * const data, const uint32_t bin_va
int b = max_value - val;
if (symbol < val - b) {
CABAC_BINS_EP(data, symbol, thresh, "TruncSymbols");
if (bits_out) *bits_out += thresh;
} else {
symbol += val - b;
CABAC_BINS_EP(data, symbol, thresh + 1, "TruncSymbols");
if (bits_out) *bits_out += thresh + 1;
}
}
@ -349,7 +352,12 @@ void uvg_cabac_write_coeff_remain(cabac_data_t * const cabac, const uint32_t rem
/**
* \brief
*/
void uvg_cabac_write_unary_max_symbol(cabac_data_t * const data, cabac_ctx_t * const ctx, uint32_t symbol, const int32_t offset, const uint32_t max_symbol)
void uvg_cabac_write_unary_max_symbol(cabac_data_t * const data,
cabac_ctx_t * const ctx,
uint32_t symbol,
const int32_t offset,
const uint32_t max_symbol,
double* bits_out)
{
int8_t code_last = max_symbol > symbol;
@ -357,18 +365,17 @@ void uvg_cabac_write_unary_max_symbol(cabac_data_t * const data, cabac_ctx_t * c
if (!max_symbol) return;
data->cur_ctx = ctx;
CABAC_BIN(data, symbol, "ums");
CABAC_FBITS_UPDATE(data, ctx, symbol, *bits_out, "ums");
if (!symbol) return;
data->cur_ctx = &ctx[offset];
while (--symbol) {
CABAC_BIN(data, 1, "ums");
CABAC_FBITS_UPDATE(data, &ctx[offset], 1, *bits_out, "ums");
}
if (code_last) {
CABAC_BIN(data, 0, "ums");
CABAC_FBITS_UPDATE(data, &ctx[offset], 0,*bits_out, "ums");
}
}
@ -405,7 +412,7 @@ void uvg_cabac_write_unary_max_symbol_ep(cabac_data_t * const data, unsigned int
/**
* \brief
*/
void uvg_cabac_write_ep_ex_golomb(encoder_state_t * const state,
uint32_t uvg_cabac_write_ep_ex_golomb(encoder_state_t * const state,
cabac_data_t * const data,
uint32_t symbol,
uint32_t count)
@ -426,4 +433,5 @@ void uvg_cabac_write_ep_ex_golomb(encoder_state_t * const state,
num_bins += count;
CABAC_BINS_EP(data, bins, num_bins, "ep_ex_golomb");
return num_bins;
}

View file

@ -59,7 +59,8 @@ typedef struct
uint32_t buffered_byte;
int32_t num_buffered_bytes;
int32_t bits_left;
int8_t only_count;
int8_t only_count : 4;
int8_t update : 4;
bitstream_t *stream;
// CONTEXTS
@ -133,18 +134,18 @@ extern const uint8_t uvg_g_auc_renorm_table[32];
void uvg_cabac_start(cabac_data_t *data);
void uvg_cabac_encode_bin(cabac_data_t *data, uint32_t bin_value);
void uvg_cabac_encode_bin_ep(cabac_data_t *data, uint32_t bin_value);
void uvg_cabac_encode_trunc_bin(cabac_data_t *data, uint32_t bin_value, uint32_t max_value);
void uvg_cabac_encode_trunc_bin(cabac_data_t *data, uint32_t bin_value, uint32_t max_value, double* bits_out);
void uvg_cabac_encode_bins_ep(cabac_data_t *data, uint32_t bin_values, int num_bins);
void uvg_cabac_encode_bin_trm(cabac_data_t *data, uint8_t bin_value);
void uvg_cabac_write(cabac_data_t *data);
void uvg_cabac_finish(cabac_data_t *data);
void uvg_cabac_write_coeff_remain(cabac_data_t *cabac, uint32_t symbol,
uint32_t r_param, const unsigned int cutoff);
void uvg_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data,
uint32_t uvg_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data,
uint32_t symbol, uint32_t count);
void uvg_cabac_write_unary_max_symbol(cabac_data_t *data, cabac_ctx_t *ctx,
uint32_t symbol, int32_t offset,
uint32_t max_symbol);
uint32_t symbol, int32_t offset,
uint32_t max_symbol, double* bits_out);
void uvg_cabac_write_unary_max_symbol_ep(cabac_data_t *data, unsigned int symbol, unsigned int max_symbol);
#define CTX_PROB_BITS 15
@ -153,6 +154,18 @@ void uvg_cabac_write_unary_max_symbol_ep(cabac_data_t *data, unsigned int symbol
#define CTX_MASK_0 (~(~0u << CTX_PROB_BITS_0) << (CTX_PROB_BITS - CTX_PROB_BITS_0))
#define CTX_MASK_1 (~(~0u << CTX_PROB_BITS_1) << (CTX_PROB_BITS - CTX_PROB_BITS_1))
// Floating point fractional bits, derived from kvz_entropy_bits
extern const float uvg_f_entropy_bits[512];
#define CTX_ENTROPY_FBITS(ctx, val) uvg_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
#define CABAC_FBITS_UPDATE(cabac, ctx, val, bits, name) do { \
if((cabac)->only_count) (bits) += uvg_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]; \
if((cabac)->update) {\
(cabac)->cur_ctx = ctx;\
CABAC_BIN((cabac), (val), (name));\
} \
} while(0)
// Macros
#define CTX_GET_STATE(ctx) ( (ctx)->state[0]+(ctx)->state[1] )
#define CTX_STATE(ctx) ( CTX_GET_STATE(ctx)>>8 )
@ -185,23 +198,23 @@ extern uint32_t uvg_cabac_bins_count;
extern bool uvg_cabac_bins_verbose;
#define CABAC_BIN(data, value, name) { \
uint32_t prev_state = CTX_STATE(data->cur_ctx); \
if(uvg_cabac_bins_verbose && !data->only_count) {printf("%d %d [%d:%d] %s = %u, range = %u LPS = %u state = %u -> ", \
uvg_cabac_bins_count++, (data)->range, (data)->range-CTX_LPS(data->cur_ctx,(data)->range), CTX_LPS(data->cur_ctx,(data)->range), (name), (uint32_t)(value), (data)->range, CTX_LPS(data->cur_ctx,(data)->range), prev_state); }\
if(uvg_cabac_bins_verbose && !(data)->only_count) {printf("%d %d [%d:%d] %s = %u, range = %u LPS = %u state = %u -> ", \
uvg_cabac_bins_count++, (data)->range, (data)->range-CTX_LPS((data)->cur_ctx,(data)->range), CTX_LPS((data)->cur_ctx,(data)->range), (name), (uint32_t)(value), (data)->range, CTX_LPS((data)->cur_ctx,(data)->range), prev_state); }\
uvg_cabac_encode_bin((data), (value)); \
if(uvg_cabac_bins_verbose && !data->only_count) printf("%u\n", CTX_STATE(data->cur_ctx)); }
if(uvg_cabac_bins_verbose && !(data)->only_count) printf("%u\n", CTX_STATE((data)->cur_ctx)); }
#define CABAC_BINS_EP(data, value, bins, name) { \
uint32_t prev_state = CTX_STATE(data->cur_ctx); \
uint32_t prev_state = (!(data)->only_count) ? CTX_STATE(data->cur_ctx) : 0; \
uvg_cabac_encode_bins_ep((data), (value), (bins)); \
if(uvg_cabac_bins_verbose && !data->only_count) { printf("%d %s = %u(%u bins), state = %u -> %u\n", \
uvg_cabac_bins_count, (name), (uint32_t)(value), (bins), prev_state, CTX_STATE(data->cur_ctx)); uvg_cabac_bins_count+=bins;}}
uvg_cabac_bins_count, (name), (uint32_t)(value), (bins), prev_state, CTX_STATE((data)->cur_ctx)); uvg_cabac_bins_count+=(bins);}}
#define CABAC_BIN_EP(data, value, name) { \
uint32_t prev_state = CTX_STATE(data->cur_ctx); \
uint32_t prev_state = (!(data)->only_count) ? CTX_STATE((data)->cur_ctx) : 0;; \
uvg_cabac_encode_bin_ep((data), (value)); \
if(uvg_cabac_bins_verbose && !data->only_count) {printf("%d %s = %u, state = %u -> %u\n", \
uvg_cabac_bins_count++, (name), (uint32_t)(value), prev_state, CTX_STATE(data->cur_ctx)); }}
if(uvg_cabac_bins_verbose && !(data)->only_count) {printf("%d %s = %u, state = %u -> %u\n", \
uvg_cabac_bins_count++, (name), (uint32_t)(value), prev_state, CTX_STATE((data)->cur_ctx)); }}
#else
#define CABAC_BIN(data, value, name) \
uvg_cabac_encode_bin((data), (value));

View file

@ -147,9 +147,9 @@ int uvg_config_init(uvg_config *cfg)
cfg->gop_lp_definition.t = 1;
cfg->open_gop = true;
cfg->roi.width = 0;
cfg->roi.height = 0;
cfg->roi.dqps = NULL;
cfg->roi.file_path = NULL;
cfg->roi.format = UVG_ROI_TXT;
cfg->set_qp_in_cu = false;
cfg->erp_aqp = false;
@ -212,6 +212,9 @@ int uvg_config_init(uvg_config *cfg)
cfg->cclm = 0;
cfg->combine_intra_cus = 1;
cfg->force_inter = 0;
return 1;
}
@ -219,11 +222,11 @@ int uvg_config_destroy(uvg_config *cfg)
{
if (cfg) {
FREE_POINTER(cfg->cqmfile);
FREE_POINTER(cfg->roi.file_path);
FREE_POINTER(cfg->fast_coeff_table_fn);
FREE_POINTER(cfg->tiles_width_split);
FREE_POINTER(cfg->tiles_height_split);
FREE_POINTER(cfg->slice_addresses_in_ts);
FREE_POINTER(cfg->roi.dqps);
FREE_POINTER(cfg->fastrd_learning_outdir_fn);
}
free(cfg);
@ -1269,60 +1272,29 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
}
else if OPT("implicit-rdpcm")
cfg->implicit_rdpcm = (bool)atobool(value);
else if OPT("roi") {
// The ROI description is as follows:
// First number is width, second number is height,
// then follows width * height number of dqp values.
FILE* f = fopen(value, "rb");
if (!f) {
fprintf(stderr, "Could not open ROI file.\n");
static enum uvg_roi_format const formats[] = { UVG_ROI_TXT, UVG_ROI_BIN };
static const char * const format_names[] = { "txt", "bin", NULL };
char *roi_file = strdup(value);
if (!roi_file) {
fprintf(stderr, "Failed to allocate memory for ROI file name.\n");
return 0;
}
FREE_POINTER(cfg->roi.file_path);
cfg->roi.file_path = roi_file;
int width = 0;
int height = 0;
if (!fscanf(f, "%d", &width) || !fscanf(f, "%d", &height)) {
fprintf(stderr, "Failed to read ROI size.\n");
fclose(f);
return 0;
// Get file extension or the substring after the last dot
char *maybe_extension = strrchr(cfg->roi.file_path, '.');
if (!maybe_extension) {
cfg->roi.format = UVG_ROI_TXT;
} else {
maybe_extension++;
int8_t format;
bool unknown_format = !parse_enum(maybe_extension, format_names, &format);
cfg->roi.format = unknown_format ? UVG_ROI_TXT : formats[format];
}
if (width <= 0 || height <= 0) {
fprintf(stderr, "Invalid ROI size: %dx%d.\n", width, height);
fclose(f);
return 0;
}
if (width > 10000 || height > 10000) {
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
fclose(f);
return 0;
}
const unsigned size = width * height;
int8_t *dqp_array = calloc((size_t)size, sizeof(cfg->roi.dqps[0]));
if (!dqp_array) {
fprintf(stderr, "Failed to allocate memory for ROI table.\n");
fclose(f);
return 0;
}
FREE_POINTER(cfg->roi.dqps);
cfg->roi.dqps = dqp_array;
cfg->roi.width = width;
cfg->roi.height = height;
for (int i = 0; i < size; ++i) {
int number; // Need a pointer to int for fscanf
if (fscanf(f, "%d", &number) != 1) {
fprintf(stderr, "Reading ROI file failed.\n");
fclose(f);
return 0;
}
dqp_array[i] = CLIP(-51, 51, number);
}
fclose(f);
}
else if OPT("set-qp-in-cu") {
cfg->set_qp_in_cu = (bool)atobool(value);
@ -1476,6 +1448,12 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
else if OPT("cclm") {
cfg->cclm = (bool)atobool(value);
}
else if OPT("combine-intra-cus") {
cfg->combine_intra_cus = atobool(value);
}
else if OPT("force-inter") {
cfg->force_inter = atobool(value);
}
else {
return 0;
}

View file

@ -141,6 +141,7 @@ static const struct option long_options[] = {
{ "force-level", required_argument, NULL, 0 },
{ "high-tier", no_argument, NULL, 0 },
{ "me-steps", required_argument, NULL, 0 },
{ "roi-file", required_argument, NULL, 0 },
{ "fast-residual-cost", required_argument, NULL, 0 },
{ "set-qp-in-cu", no_argument, NULL, 0 },
{ "open-gop", no_argument, NULL, 0 },
@ -179,6 +180,10 @@ static const struct option long_options[] = {
{ "no-amvr", no_argument, NULL, 0 },
{ "cclm", no_argument, NULL, 0 },
{ "no-cclm", no_argument, NULL, 0 },
{ "combine-intra-cus", no_argument, NULL, 0 },
{ "no-combine-intra-cus", no_argument, NULL, 0 },
{ "force-inter", no_argument, NULL, 0 },
{ "no-force-inter", no_argument, NULL, 0 },
{0, 0, 0, 0}
};
@ -499,11 +504,20 @@ void print_help(void)
" - frametile: Constrain within the tile.\n"
" - frametilemargin: Constrain even more.\n"
" --roi <filename> : Use a delta QP map for region of interest.\n"
" Reads an array of delta QP values from a text\n"
" file. The file format is: width and height of\n"
" the QP delta map followed by width*height delta\n"
" QP values in raster order. The map can be of any\n"
" size and will be scaled to the video size.\n"
" Reads an array of delta QP values from a file.\n"
" Text and binary files are supported and detected\n"
" from the file extension (.txt/.bin). If a known\n"
" extension is not found, the file is treated as\n"
" a text file. The file can include one or many\n"
" ROI frames each in the following format:\n"
" width and height of the QP delta map followed\n"
" by width * height delta QP values in raster\n"
" order. In binary format, width and height are\n"
" 32-bit integers whereas the delta QP values are\n"
" signed 8-bit values. The map can be of any size\n"
" and will be scaled to the video size. The file\n"
" reading will loop if end of the file is reached.\n"
" See roi.txt in the examples folder.\n"
" --set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.\n"
" in PPS and slice_qp_delta in slize header zero.\n"
" --(no-)erp-aqp : Use adaptive QP for 360 degree video with\n"
@ -587,6 +601,16 @@ void print_help(void)
" --ml-pu-depth-intra : Predict the pu-depth-intra using machine\n"
" learning trees, overrides the\n"
" --pu-depth-intra parameter. [disabled]\n"
" --(no-)combine-intra-cus: Whether the encoder tries to code a cu\n"
" on lower depth even when search is not\n"
" performed on said depth. Should only\n"
" be disabled if cus absolutely must not\n"
" be larger than limited by the search.\n"
" [enabled]"
" --force-inter : Force the encoder to use inter always.\n"
" This is mostly for debugging and is not\n"
" guaranteed to produce sensible bitstream or\n"
" work at all. [disabled]"
" --tr-depth-intra <int> : Transform split depth for intra blocks [0]\n"
" --(no-)bipred : Bi-prediction [disabled]\n"
" --cu-split-termination <string> : CU split search termination [zero]\n"

View file

@ -148,7 +148,7 @@ typedef struct
uint8_t merge_idx : 3; //!< \brief merge index
uint8_t tr_skip : 1; //!< \brief transform skip flag
uint8_t tr_idx : 3; //!< \brief transform index
uint8_t joint_cb_cr : 2; //!< \brief joint chroma residual coding
uint8_t joint_cb_cr : 3; //!< \brief joint chroma residual coding
uint16_t cbf;
@ -183,6 +183,16 @@ typedef struct
};
} cu_info_t;
typedef struct {
int16_t x;
int16_t y;
int8_t width;
int8_t height;
int8_t chroma_width;
int8_t chroma_height;
} cu_loc_t;
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)

View file

@ -441,6 +441,7 @@ int main(int argc, char *argv[])
FILE *input = NULL; //!< input file (YUV)
FILE *output = NULL; //!< output file (HEVC NAL stream)
FILE *recout = NULL; //!< reconstructed YUV output, --debug
FILE *roifile = NULL;
clock_t start_time = clock();
clock_t encoding_start_cpu_time;
UVG_CLOCK_T encoding_start_real_time;
@ -587,7 +588,7 @@ int main(int argc, char *argv[])
// Give arguments via struct to the input thread
input_handler_args in_args = {
.available_input_slots = available_input_slots,
.filled_input_slots = filled_input_slots,
.filled_input_slots = filled_input_slots,
.input = input,
.api = api,
@ -828,6 +829,7 @@ done:
if (input) fclose(input);
if (output) fclose(output);
if (recout) fclose(recout);
if (roifile) fclose(roifile);
DBG_YUVIEW_CLEANUP();
CHECKPOINTS_FINALIZE();

File diff suppressed because it is too large Load diff

View file

@ -56,7 +56,33 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
void uvg_encode_mvd(encoder_state_t * const state,
cabac_data_t *cabac,
int32_t mvd_hor,
int32_t mvd_ver);
int32_t mvd_ver,
double* bits_out);
double uvg_mock_encode_coding_unit(
encoder_state_t* const state,
cabac_data_t* cabac,
int x, int y, int depth,
lcu_t* lcu, cu_info_t* cur_cu);
int uvg_encode_inter_prediction_unit(encoder_state_t* const state,
cabac_data_t* const cabac,
const cu_info_t* const cur_cu,
int x, int y, int width, int height,
int depth,
lcu_t* lcu,
double* bits_out);
void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state,
cabac_data_t* const cabac,
const cu_info_t* const cur_cu,
int x, int y, int depth, const lcu_t* lcu, double* bits_out);
bool uvg_write_split_flag(const encoder_state_t* const state, cabac_data_t* cabac,
const cu_info_t* left_cu, const cu_info_t* above_cu,
uint8_t split_flag,
int depth, int cu_width, int x, int y, double* bits_out);
void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
uint8_t lastpos_x, uint8_t lastpos_y,

View file

@ -32,7 +32,6 @@
#include "encoder.h"
// This define is required for M_PI on Windows.
#define _USE_MATH_DEFINES
#include <math.h>
#include <stdio.h>
@ -45,14 +44,6 @@
#include "uvg_math.h"
#include "fast_coeff_cost.h"
/**
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
*
* Determined empirically.
*/
static const double ERP_AQP_STRENGTH = 3.0;
static int encoder_control_init_gop_layer_weights(encoder_control_t * const);
static unsigned cfg_num_threads(void)
@ -136,22 +127,6 @@ static int get_max_parallelism(const encoder_control_t *const encoder)
}
/**
* \brief Return weight for 360 degree ERP video
*
* Returns the scaling factor of area from equirectangular projection to
* spherical surface.
*
* \param y y-coordinate of the pixel
* \param h height of the picture
*/
static double ws_weight(int y, int h)
{
return cos((y - 0.5 * h + 0.5) * (M_PI / h));
}
/**
* \brief Update ROI QPs for 360 video with equirectangular projection.
*
@ -162,55 +137,6 @@ static double ws_weight(int y, int h)
* \param orig_width width of orig_roi
* \param orig_height height of orig_roi
*/
static void init_erp_aqp_roi(encoder_control_t* encoder,
int8_t *orig_roi,
int32_t orig_width,
int32_t orig_height)
{
// Update ROI with WS-PSNR delta QPs.
int height = encoder->in.height_in_lcu;
int width = orig_roi ? orig_width : 1;
int frame_height = encoder->in.real_height;
encoder->cfg.roi.width = width;
encoder->cfg.roi.height = height;
encoder->cfg.roi.dqps = calloc(width * height, sizeof(orig_roi[0]));
double total_weight = 0.0;
for (int y = 0; y < frame_height; y++) {
total_weight += ws_weight(y, frame_height);
}
for (int y_lcu = 0; y_lcu < height; y_lcu++) {
int y_orig = LCU_WIDTH * y_lcu;
int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig);
double lcu_weight = 0.0;
for (int y = y_orig; y < y_orig + lcu_height; y++) {
lcu_weight += ws_weight(y, frame_height);
}
// Normalize.
lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height);
int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight));
if (orig_roi) {
// If a ROI array already exists, we copy the existing values to the
// new array while adding qp_delta to each.
int y_roi = y_lcu * orig_height / height;
for (int x = 0; x < width; x++) {
encoder->cfg.roi.dqps[x + y_lcu * width] =
CLIP(-51, 51, orig_roi[x + y_roi * width] + qp_delta);
}
} else {
// Otherwise, simply write qp_delta to the ROI array.
encoder->cfg.roi.dqps[y_lcu] = qp_delta;
}
}
}
static int8_t* derive_chroma_QP_mapping_table(const uvg_config* const cfg, int i)
{
@ -394,6 +320,16 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg)
encoder->scaling_list.use_default_list = 1;
}
// ROI / delta QP
if (cfg->roi.file_path) {
const char *mode[2] = { "r", "rb" };
encoder->roi_file = fopen(cfg->roi.file_path, mode[cfg->roi.format]);
if (!encoder->roi_file) {
fprintf(stderr, "Could not open ROI file.\n");
goto init_failed;
}
}
if (cfg->fast_coeff_table_fn) {
FILE *fast_coeff_table_f = fopen(cfg->fast_coeff_table_fn, "rb");
if (fast_coeff_table_f == NULL) {
@ -435,32 +371,10 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg)
goto init_failed;
}
if (cfg->erp_aqp) {
init_erp_aqp_roi(encoder,
cfg->roi.dqps,
cfg->roi.width,
cfg->roi.height);
} else if (cfg->roi.dqps) {
// Copy delta QP array for ROI coding.
const size_t roi_size = encoder->cfg.roi.width * encoder->cfg.roi.height;
encoder->cfg.roi.dqps = calloc(roi_size, sizeof(cfg->roi.dqps[0]));
memcpy(encoder->cfg.roi.dqps,
cfg->roi.dqps,
roi_size * sizeof(*cfg->roi.dqps));
}
// NOTE: When tr_depth_inter is equal to 0, the transform is still split
// for SMP and AMP partition units.
encoder->tr_depth_inter = 0;
if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps || encoder->cfg.set_qp_in_cu || encoder->cfg.vaq) {
encoder->max_qp_delta_depth = 0;
} else {
encoder->max_qp_delta_depth = -1;
}
//Tiles
encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 ||
encoder->cfg.tiles_height_count > 1;
@ -761,7 +675,7 @@ void uvg_encoder_control_free(encoder_control_t *const encoder)
FREE_POINTER(encoder->tiles_tile_id);
FREE_POINTER(encoder->cfg.roi.dqps);
FREE_POINTER(encoder->cfg.roi.file_path);
uvg_scalinglist_destroy(&encoder->scaling_list);
@ -773,6 +687,10 @@ void uvg_encoder_control_free(encoder_control_t *const encoder)
uvg_close_rdcost_outfiles();
if (encoder->roi_file) {
fclose(encoder->roi_file);
}
free(encoder);
}

View file

@ -130,7 +130,7 @@ typedef struct encoder_control_t
//! Picture weights when GOP is used.
double gop_layer_weights[MAX_GOP_LAYERS];
int8_t max_qp_delta_depth;
FILE *roi_file;
int tr_depth_inter;

View file

@ -805,7 +805,7 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
WRITE_U(stream, 0, 1, "pps_ref_wraparound_enabled_flag");
WRITE_SE(stream, ((int8_t)encoder->cfg.qp) - 26, "pps_init_qp_minus26");
WRITE_U(stream, encoder->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");
WRITE_U(stream, state->frame->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");
WRITE_U(stream, 0,1, "pps_chroma_tool_offsets_present_flag");
/* // If chroma_tool_offsets_present
@ -1037,8 +1037,8 @@ static void uvg_encoder_state_write_bitstream_picture_header(
const int poc_lsb = state->frame->poc & ((1 << encoder->poc_lsb_bits) - 1);
WRITE_U(stream, poc_lsb, encoder->poc_lsb_bits, "ph_pic_order_cnt_lsb");
if (encoder->max_qp_delta_depth >= 0) {
WRITE_UE(stream, encoder->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_intra_slice");
if (state->frame->max_qp_delta_depth >= 0) {
WRITE_UE(stream, state->frame->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_intra_slice");
}
// alf enable flags and aps IDs
@ -1118,8 +1118,8 @@ static void uvg_encoder_state_write_bitstream_picture_header(
|| state->frame->pictype == UVG_NAL_IDR_N_LP) {
}
else {
if (encoder->max_qp_delta_depth >= 0) {
WRITE_UE(stream, encoder->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_inter_slice");
if (state->frame->max_qp_delta_depth >= 0) {
WRITE_UE(stream, state->frame->max_qp_delta_depth, "ph_cu_qp_delta_subdiv_inter_slice");
}
if (state->encoder_control->cfg.tmvp_enable) {
WRITE_U(stream, state->encoder_control->cfg.tmvp_enable, 1, "ph_pic_temporal_mvp_enabled_flag");
@ -1128,7 +1128,7 @@ static void uvg_encoder_state_write_bitstream_picture_header(
}
if (encoder->cfg.jccr) {
WRITE_U(stream, 0, 1, "ph_joint_cbcr_sign_flag");
WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag");
}
// END PICTURE HEADER

View file

@ -32,6 +32,9 @@
#include "encoderstate.h"
// This define is required for M_PI on Windows.
#define _USE_MATH_DEFINES
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@ -53,6 +56,12 @@
#include "strategies/strategies-picture.h"
/**
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
*
* Determined empirically.
*/
static const double ERP_AQP_STRENGTH = 3.0;
int uvg_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) {
int i;
@ -572,7 +581,7 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, x, y);
const int cu_width = LCU_WIDTH >> depth;
if (depth <= state->encoder_control->max_qp_delta_depth) {
if (depth <= state->frame->max_qp_delta_depth) {
*prev_qp = -1;
}
@ -624,6 +633,38 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
}
}
static void set_joint_cb_cr_modes(encoder_state_t* state, uvg_picture* pic)
{
bool sgnFlag = true;
if (state->encoder_control->chroma_format != UVG_CSP_400)
{
const int x1 = pic->width / 2 - 1;
const int y1 = pic->height / 2 - 1;
const int cbs = pic->stride / 2;
const int crs = pic->stride / 2;
const uvg_pixel* p_cb = pic->u + 1 * cbs;
const uvg_pixel* p_cr = pic->v + 1 * crs;
int64_t sum_cb_cr = 0;
// determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
for (int y = 1; y < y1; y++, p_cb += cbs, p_cr += crs)
{
for (int x = 1; x < x1; x++)
{
int cb = (12 * (int)p_cb[x] - 2 * ((int)p_cb[x - 1] + (int)p_cb[x + 1] + (int)p_cb[x - cbs] + (int)p_cb[x + cbs]) - ((int)p_cb[x - 1 - cbs] + (int)p_cb[x + 1 - cbs] + (int)p_cb[x - 1 + cbs] + (int)p_cb[x + 1 + cbs]));
int cr = (12 * (int)p_cr[x] - 2 * ((int)p_cr[x - 1] + (int)p_cr[x + 1] + (int)p_cr[x - crs] + (int)p_cr[x + crs]) - ((int)p_cr[x - 1 - crs] + (int)p_cr[x + 1 - crs] + (int)p_cr[x - 1 + crs] + (int)p_cr[x + 1 + crs]));
sum_cb_cr += cb * cr;
}
}
sgnFlag = (sum_cb_cr < 0);
}
state->frame->jccr_sign = sgnFlag;
}
static void encoder_state_worker_encode_lcu_bitstream(void* opaque);
static void encoder_state_worker_encode_lcu_search(void * opaque)
@ -665,7 +706,7 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
if (encoder->max_qp_delta_depth >= 0) {
if (state->frame->max_qp_delta_depth >= 0) {
int last_qp = state->last_qp;
int prev_qp = -1;
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
@ -716,6 +757,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
const uint64_t existing_bits = uvg_bitstream_tell(&state->stream);
//Encode SAO
state->cabac.update = 1;
if (encoder->cfg.sao_type) {
encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
}
@ -771,6 +813,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
uvg_cabac_start(&state->cabac);
}
}
state->cabac.update = 0;
pthread_mutex_lock(&state->frame->rc_lock);
@ -1421,6 +1464,154 @@ static bool edge_lcu(int id, int lcus_x, int lcus_y, bool xdiv64, bool ydiv64)
}
}
/**
* \brief Return weight for 360 degree ERP video
*
* Returns the scaling factor of area from equirectangular projection to
* spherical surface.
*
* \param y y-coordinate of the pixel
* \param h height of the picture
*/
static double ws_weight(int y, int h)
{
return cos((y - 0.5 * h + 0.5) * (M_PI / h));
}
/**
* \brief Update ROI QPs for 360 video with equirectangular projection.
*
* Updates the ROI parameters in frame->roi.
*
* \param encoder encoder control
* \param frame frame that will have the ROI map
*/
static void init_erp_aqp_roi(const encoder_control_t *encoder, uvg_picture *frame)
{
int8_t *orig_roi = frame->roi.roi_array;
int32_t orig_width = frame->roi.width;
int32_t orig_height = frame->roi.height;
// Update ROI with WS-PSNR delta QPs.
int new_height = encoder->in.height_in_lcu;
int new_width = orig_roi ? orig_width : 1;
int8_t *new_array = calloc(new_width * new_height, sizeof(orig_roi[0]));
int frame_height = encoder->in.real_height;
double total_weight = 0.0;
for (int y = 0; y < frame_height; y++) {
total_weight += ws_weight(y, frame_height);
}
for (int y_lcu = 0; y_lcu < new_height; y_lcu++) {
int y_orig = LCU_WIDTH * y_lcu;
int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig);
double lcu_weight = 0.0;
for (int y = y_orig; y < y_orig + lcu_height; y++) {
lcu_weight += ws_weight(y, frame_height);
}
// Normalize.
lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height);
int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight));
if (orig_roi) {
// If a ROI array already exists, we copy the existing values to the
// new array while adding qp_delta to each.
int y_roi = y_lcu * orig_height / new_height;
for (int x = 0; x < new_width; x++) {
new_array[x + y_lcu * new_width] =
CLIP(-51, 51, orig_roi[x + y_roi * new_width] + qp_delta);
}
} else {
// Otherwise, simply write qp_delta to the ROI array.
new_array[y_lcu] = qp_delta;
}
}
// Update new values
frame->roi.width = new_width;
frame->roi.height = new_height;
frame->roi.roi_array = new_array;
FREE_POINTER(orig_roi);
}
static void next_roi_frame_from_file(uvg_picture *frame, FILE *file, enum uvg_roi_format format) {
// The ROI description is as follows:
// First number is width, second number is height,
// then follows width * height number of dqp values.
// Rewind the (seekable) ROI file when end of file is reached.
// Allows a single ROI frame to be used for a whole sequence
// and looping with --loop-input. Skips possible whitespace.
if (ftell(file) != -1L) {
int c = fgetc(file);
while (format == UVG_ROI_TXT && isspace(c)) c = fgetc(file);
ungetc(c, file);
if (c == EOF) rewind(file);
}
int *width = &frame->roi.width;
int *height = &frame->roi.height;
bool failed = false;
if (format == UVG_ROI_TXT) failed = !fscanf(file, "%d", width) || !fscanf(file, "%d", height);
if (format == UVG_ROI_BIN) failed = fread(&frame->roi, 4, 2, file) != 2;
if (failed) {
fprintf(stderr, "Failed to read ROI size.\n");
fclose(file);
assert(0);
}
if (*width <= 0 || *height <= 0) {
fprintf(stderr, "Invalid ROI size: %dx%d.\n", *width, *height);
fclose(file);
assert(0);
}
if (*width > 10000 || *height > 10000) {
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
fclose(file);
assert(0);
}
const unsigned size = (*width) * (*height);
int8_t *dqp_array = calloc((size_t)size, sizeof(frame->roi.roi_array[0]));
if (!dqp_array) {
fprintf(stderr, "Failed to allocate memory for ROI table.\n");
fclose(file);
assert(0);
}
FREE_POINTER(frame->roi.roi_array);
frame->roi.roi_array = dqp_array;
if (format == UVG_ROI_TXT) {
for (int i = 0; i < size; ++i) {
int number; // Need a pointer to int for fscanf
if (fscanf(file, "%d", &number) != 1) {
fprintf(stderr, "Reading ROI file failed.\n");
fclose(file);
assert(0);
}
dqp_array[i] = CLIP(-51, 51, number);
}
} else if (format == UVG_ROI_BIN) {
if (fread(dqp_array, 1, size, file) != size) {
fprintf(stderr, "Reading ROI file failed.\n");
assert(0);
}
}
}
static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_picture* frame) {
assert(state->type == ENCODER_STATE_TYPE_MAIN);
@ -1437,6 +1628,21 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
}
// ROI / delta QP maps
if (frame->roi.roi_array && cfg->roi.file_path) {
assert(0 && "Conflict: Other ROI data was supplied when a ROI file was specified.");
}
// Read frame from the file. If no file is specified,
// ROI data should be already set by the application.
if (cfg->roi.file_path) {
next_roi_frame_from_file(frame, state->encoder_control->roi_file, cfg->roi.format);
}
if (cfg->erp_aqp) {
init_erp_aqp_roi(state->encoder_control, state->tile->frame->source);
}
// Variance adaptive quantization
if (cfg->vaq) {
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
@ -1523,6 +1729,12 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
}
// Variance adaptive quantization - END
if (cfg->target_bitrate > 0 || frame->roi.roi_array || cfg->set_qp_in_cu || cfg->vaq) {
state->frame->max_qp_delta_depth = 0;
} else {
state->frame->max_qp_delta_depth = -1;
}
// Use this flag to handle closed gop irap picture selection.
// If set to true, irap is already set and we avoid
// setting it based on the intra period
@ -1689,6 +1901,7 @@ void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame)
encoder_state_init_new_frame(state, frame);
if(state->encoder_control->cfg.jccr) set_joint_cb_cr_modes(state, frame);
// Create a separate job for ALF done after everything else, and only then do final bitstream writing (for ALF parameters)
if (state->encoder_control->cfg.alf_type && state->encoder_control->cfg.wpp) {
@ -1834,10 +2047,9 @@ lcu_stats_t* uvg_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
int uvg_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
{
const encoder_control_t *ctrl = state->encoder_control;
const cu_array_t *cua = state->tile->frame->cu_array;
// Quantization group width
const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->depth);
const int qg_width = LCU_WIDTH >> MIN(state->frame->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->depth);
// Coordinates of the top-left corner of the quantization group
const int x_qg = x & ~(qg_width - 1);

View file

@ -179,6 +179,8 @@ typedef struct encoder_state_config_frame_t {
*/
double *aq_offsets;
int8_t max_qp_delta_depth;
/**
* \brief Whether next NAL is the first NAL in the access unit.
*/
@ -193,6 +195,7 @@ typedef struct encoder_state_config_frame_t {
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uint8_t* hmvp_size; //!< \brief HMVP LUT size
bool jccr_sign;
} encoder_state_config_frame_t;
@ -320,6 +323,7 @@ typedef struct encoder_state_t {
bitstream_t stream;
cabac_data_t cabac;
cabac_data_t search_cabac;
uint32_t stats_bitstream_length; //Bitstream length written in bytes
@ -402,10 +406,10 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
*/
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth)
{
if (state->encoder_control->max_qp_delta_depth < 0) return false;
if (state->frame->max_qp_delta_depth < 0) return false;
const int cu_width = LCU_WIDTH >> depth;
const int qg_width = LCU_WIDTH >> state->encoder_control->max_qp_delta_depth;
const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth;
const int right = x + cu_width;
const int bottom = y + cu_width;
return (right % qg_width == 0 || right >= state->tile->frame->width) &&

View file

@ -40,7 +40,7 @@ static uint16_t to_q88(float f)
return (uint16_t)(f * 256.0f + 0.5f);
}
static uint64_t to_4xq88(const float f[4])
static uint64_t to_4xq88(const double f[4])
{
int i;
uint64_t result = 0;
@ -58,9 +58,9 @@ int uvg_fast_coeff_table_parse(fast_coeff_table_t *fast_coeff_table, FILE *fast_
uint64_t *wts_by_qp = fast_coeff_table->wts_by_qp;
for (i = 0; i < MAX_FAST_COEFF_COST_QP; i++) {
float curr_wts[4];
double curr_wts[4];
if (fscanf(fast_coeff_table_f, "%f %f %f %f\n", curr_wts + 0,
if (fscanf(fast_coeff_table_f, "%lf %lf %lf %lf\n", curr_wts + 0,
curr_wts + 1,
curr_wts + 2,
curr_wts + 3) != 4) {

View file

@ -45,7 +45,7 @@ typedef struct {
// Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from
// 0 to MAX_FAST_COEFF_COST_QP
static const float default_fast_coeff_cost_wts[][4] = {
static const double default_fast_coeff_cost_wts[][4] = {
// Just extend it by stretching the first actual values..
{0.164240f, 4.161530f, 3.509033f, 6.928047f},
{0.164240f, 4.161530f, 3.509033f, 6.928047f},

View file

@ -339,7 +339,7 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir)
static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir)
{
if (state->encoder_control->max_qp_delta_depth < 0) {
if (state->frame->max_qp_delta_depth < 0) {
return state->qp;
}

View file

@ -106,6 +106,10 @@ uvg_picture * uvg_image_alloc(enum uvg_chroma_format chroma_format, const int32_
im->interlacing = UVG_INTERLACING_NONE;
im->roi.roi_array = NULL;
im->roi.width = 0;
im->roi.height = 0;
return im;
}
@ -132,6 +136,7 @@ void uvg_image_free(uvg_picture *const im)
uvg_image_free(im->base_image);
} else {
free(im->fulldata_buf);
if (im->roi.roi_array) FREE_POINTER(im->roi.roi_array);
}
// Make sure freed data won't be used.
@ -192,6 +197,8 @@ uvg_picture *uvg_image_make_subimage(uvg_picture *const orig_image,
im->pts = 0;
im->dts = 0;
im->roi = orig_image->roi;
return im;
}

View file

@ -624,7 +624,9 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
int i_pu)
{
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
const int x_scu = SUB_SCU(x);
const int y_scu = SUB_SCU(y);
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
@ -673,6 +675,12 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
NULL,
predict_luma, predict_chroma);
}
if (predict_chroma && state->encoder_control->cfg.jccr) {
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
}
}
/**
@ -1290,7 +1298,7 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
int32_t width,
int32_t height,
const merge_candidates_t *merge_cand,
const cu_info_t *cur_cu,
const cu_info_t * const cur_cu,
int8_t reflist,
mv_t mv_cand[2][2])
{
@ -1396,7 +1404,7 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
int32_t width,
int32_t height,
mv_t mv_cand[2][2],
cu_info_t* cur_cu,
const cu_info_t * const cur_cu,
lcu_t *lcu,
int8_t reflist)
{

View file

@ -96,7 +96,7 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
int32_t width,
int32_t height,
mv_t mv_cand[2][2],
cu_info_t* cur_cu,
const cu_info_t* cur_cu,
lcu_t *lcu,
int8_t reflist);

View file

@ -82,6 +82,17 @@ static const uint8_t num_ref_pixels_left[16][16] = {
{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
};
static void mip_predict(
const encoder_state_t* const state,
const uvg_intra_references* const refs,
const uint16_t pred_block_width,
const uint16_t pred_block_height,
uvg_pixel* dst,
const int mip_mode,
const bool mip_transp);
int8_t uvg_intra_get_dir_luma_predictor(
const uint32_t x,
const uint32_t y,
@ -452,7 +463,7 @@ static void get_cclm_parameters(
}
}
static void linear_transform_cclm(cclm_parameters_t* cclm_params, uvg_pixel * src, uvg_pixel * dst, int stride, int height) {
static void linear_transform_cclm(const cclm_parameters_t* cclm_params, uvg_pixel * src, uvg_pixel * dst, int stride, int height) {
int scale = cclm_params->a;
int shift = cclm_params->shift;
int offset = cclm_params->b;
@ -468,7 +479,7 @@ static void linear_transform_cclm(cclm_parameters_t* cclm_params, uvg_pixel * sr
}
void uvg_predict_cclm(
static void predict_cclm(
encoder_state_t const* const state,
const color_t color,
const int8_t width,
@ -477,7 +488,7 @@ void uvg_predict_cclm(
const int16_t y0,
const int16_t stride,
const int8_t mode,
lcu_t* const lcu,
const lcu_t* const lcu,
uvg_intra_references* chroma_ref,
uvg_pixel* dst,
cclm_parameters_t* cclm_params
@ -498,6 +509,7 @@ void uvg_predict_cclm(
uvg_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH;
const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
// Essentially what this does is that it uses 6-tap filtering to downsample
// the luma intra references down to match the resolution of the chroma channel.
@ -508,12 +520,12 @@ void uvg_predict_cclm(
if (y0) {
for (; available_above_right < width / 2; available_above_right++) {
int x_extension = x_scu + width * 2 + 4 * available_above_right;
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
if (x_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
}
if(y_scu == 0) {
if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4);
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride / 2)], sizeof(uvg_pixel) * (width + available_above_right * 2));
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride2 / 2)], sizeof(uvg_pixel) * (width + available_above_right * 2));
}
else {
for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
@ -533,16 +545,16 @@ void uvg_predict_cclm(
if(x0) {
for (; available_left_below < height / 2; available_left_below++) {
int y_extension = y_scu + height * 2 + 4 * available_left_below;
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension);
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension);
if (y_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break;
}
for(int i = 0; i < height + available_left_below * 2; i++) {
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride/2) + x0 / 2 - 1];
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride2/2) + x0 / 2 - 1];
}
}
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride) / 4], sampled_luma, width, height, stride / 2, width);
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride2) / 4], sampled_luma, width, height, stride2 / 2, width);
int16_t a, b, shift;
get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift);
@ -727,12 +739,17 @@ void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int*
}
/** \brief Matrix weighted intra prediction.
*/
void uvg_mip_predict(encoder_state_t const* const state, uvg_intra_references* const refs,
const uint16_t pred_block_width, const uint16_t pred_block_height,
uvg_pixel* dst,
const int mip_mode, const bool mip_transp)
static void mip_predict(
const encoder_state_t* const state,
const uvg_intra_references* const refs,
const uint16_t pred_block_width,
const uint16_t pred_block_height,
uvg_pixel* dst,
const int mip_mode,
const bool mip_transp)
{
// MIP prediction uses int values instead of uvg_pixel as some temp values may be negative
@ -875,14 +892,13 @@ void uvg_mip_predict(encoder_state_t const* const state, uvg_intra_references* c
}
void uvg_intra_predict(
encoder_state_t *const state,
static void intra_predict_regular(
const encoder_state_t* const state,
uvg_intra_references *refs,
int_fast8_t log2_width,
int_fast8_t mode,
color_t color,
uvg_pixel *dst,
bool filter_boundary,
const uint8_t multi_ref_idx)
{
const int_fast8_t width = 1 << log2_width;
@ -1350,18 +1366,66 @@ void uvg_intra_build_reference(
}
}
void uvg_intra_predict(
const encoder_state_t* const state,
uvg_intra_references* const refs,
const cu_loc_t* const cu_loc,
const color_t color,
uvg_pixel* dst,
const intra_search_data_t* data,
const lcu_t* lcu
)
{
const int stride = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
// TODO: what is this used for?
// const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
bool use_mip = false;
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int x = cu_loc->x;
const int y = cu_loc->y;
int8_t intra_mode = color == COLOR_Y ? data->pred_cu.intra.mode : data->pred_cu.intra.mode_chroma;
if (data->pred_cu.intra.mip_flag) {
if (color == COLOR_Y) {
use_mip = true;
}
else {
use_mip = state->encoder_control->chroma_format == UVG_CSP_444;
intra_mode = use_mip ? intra_mode : 0;
}
}
if (intra_mode < 68) {
if (use_mip) {
assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed);
}
else {
intra_predict_regular(state, refs, uvg_g_convert_to_bit[width] + 2, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx);
}
}
else {
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width);
if (data->pred_cu.depth != data->pred_cu.tr_depth || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
predict_cclm(
state, color, width, width, x, y, stride, intra_mode, lcu, refs, dst,
(cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1]);
}
else {
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width);
}
}
}
static void intra_recon_tb_leaf(
encoder_state_t *const state,
encoder_state_t* const state,
int x,
int y,
int depth,
int8_t intra_mode,
cclm_parameters_t *cclm_params,
lcu_t *lcu,
color_t color,
uint8_t multi_ref_idx,
bool mip_flag,
bool mip_transp)
const intra_search_data_t* search_data)
{
const uvg_config *cfg = &state->encoder_control->cfg;
const int shift = color == COLOR_Y ? 0 : 1;
@ -1383,7 +1447,7 @@ static void intra_recon_tb_leaf(
int x_scu = SUB_SCU(x);
int y_scu = SUB_SCU(y);
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
uint8_t multi_ref_index = color == COLOR_Y ? search_data->pred_cu.intra.multi_ref_idx: 0;
uvg_intra_references refs;
// Extra reference lines for use with MRL. Extra lines needed only for left edge.
@ -1406,42 +1470,14 @@ static void intra_recon_tb_leaf(
uvg_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index);
uvg_pixel pred[32 * 32];
int stride = state->tile->frame->source->stride;
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
bool use_mip = false;
if (mip_flag) {
if (color == COLOR_Y) {
use_mip = true;
} else {
// MIP can be used for chroma if the chroma scheme is 444
if (state->encoder_control->chroma_format == UVG_CSP_444) {
use_mip = true;
} else {
// If MIP cannot be used for chroma, set mode to planar
intra_mode = 0;
}
}
}
if(intra_mode < 68) {
if (use_mip) {
assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
uvg_mip_predict(state, &refs, width, height, pred, intra_mode, mip_transp);
}
else {
uvg_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index);
}
} else {
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
if(cclm_params == NULL) {
cclm_parameters_t temp_params;
uvg_predict_cclm(
state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params);
}
else {
linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width);
}
}
cu_loc_t loc = {
x, y,
width, height,
width, height,
};
uvg_intra_predict(state, &refs, &loc, color, pred, search_data, lcu);
const int index = lcu_px.x + lcu_px.y * lcu_width;
uvg_pixel *block = NULL;
@ -1483,17 +1519,12 @@ static void intra_recon_tb_leaf(
* \param lcu containing LCU
*/
void uvg_intra_recon_cu(
encoder_state_t *const state,
encoder_state_t* const state,
int x,
int y,
int depth,
int8_t mode_luma,
int8_t mode_chroma,
intra_search_data_t* search_data,
cu_info_t *cur_cu,
cclm_parameters_t *cclm_params,
uint8_t multi_ref_idx,
bool mip_flag,
bool mip_transp,
lcu_t *lcu)
{
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
@ -1501,12 +1532,16 @@ void uvg_intra_recon_cu(
if (cur_cu == NULL) {
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
uint8_t multi_ref_index = multi_ref_idx;
bool use_mip = mip_flag;
bool mip_transposed = mip_transp;
const int8_t mode_luma = search_data->pred_cu.intra.mode;
const int8_t mode_chroma= search_data->pred_cu.intra.mode_chroma;
if(mode_chroma != -1 && mode_luma == -1) {
x &= ~7;
y &= ~7;
}
if (mode_luma != -1 && mode_chroma != -1) {
if (use_mip) {
if (search_data->pred_cu.intra.mip_flag) {
assert(mode_luma == mode_chroma && "Chroma mode must be derived from luma mode if block uses MIP.");
}
}
@ -1527,10 +1562,10 @@ void uvg_intra_recon_cu(
const int32_t x2 = x + offset;
const int32_t y2 = y + offset;
uvg_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
uvg_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
uvg_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
uvg_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
uvg_intra_recon_cu(state, x, y, depth + 1, search_data, NULL, lcu);
uvg_intra_recon_cu(state, x2, y, depth + 1, search_data, NULL, lcu);
uvg_intra_recon_cu(state, x, y2, depth + 1, search_data, NULL, lcu);
uvg_intra_recon_cu(state, x2, y2, depth + 1, search_data, NULL, lcu);
// Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = {
@ -1552,13 +1587,15 @@ void uvg_intra_recon_cu(
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index, use_mip, mip_transposed);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_Y, search_data);
}
if (has_chroma) {
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0, use_mip, mip_transposed);
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0, use_mip, mip_transposed);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_U, search_data);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_V, search_data);
}
uvg_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);
uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3),
search_data->pred_cu.joint_cb_cr != 4 && state->encoder_control->cfg.jccr && (x % 8 == 0 && y % 8 == 0),
x, y, depth, cur_cu, lcu, false);
}
}

View file

@ -63,6 +63,18 @@ typedef struct
int16_t b;
} cclm_parameters_t;
typedef struct {
cu_info_t pred_cu;
cclm_parameters_t cclm_parameters[2];
double cost;
double bits;
double coeff_bits;
double distortion;
} intra_search_data_t ;
#define UVG_NUM_INTRA_MODES 67
/**
* \brief Function for deriving intra luma predictions
* \param x x-coordinate of the PU in pixels
@ -114,53 +126,22 @@ void uvg_intra_build_reference(
* \param filter_boundary Whether to filter the boundary on modes 10 and 26.
*/
void uvg_intra_predict(
encoder_state_t *const state,
uvg_intra_references *refs,
int_fast8_t log2_width,
int_fast8_t mode,
color_t color,
uvg_pixel *dst,
bool filter_boundary,
const uint8_t multi_ref_idx);
const encoder_state_t* const state,
uvg_intra_references* const refs,
const cu_loc_t* const cu_loc,
const color_t color,
uvg_pixel* dst,
const intra_search_data_t* data,
const lcu_t* lcu
);
void uvg_intra_recon_cu(
encoder_state_t *const state,
encoder_state_t* const state,
int x,
int y,
int depth,
int8_t mode_luma,
int8_t mode_chroma,
intra_search_data_t* search_data,
cu_info_t *cur_cu,
cclm_parameters_t* cclm_params,
uint8_t multi_ref_idx,
bool mip_flag,
bool mip_transp,
lcu_t *lcu);
void uvg_predict_cclm(
encoder_state_t const* const state,
const color_t color,
const int8_t width,
const int8_t height,
const int16_t x0,
const int16_t y0,
const int16_t stride,
const int8_t mode,
lcu_t* const lcu,
uvg_intra_references* chroma_ref,
uvg_pixel* dst,
cclm_parameters_t* cclm_params
);
int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a);
void uvg_mip_predict(
encoder_state_t const * const state,
uvg_intra_references * refs,
const uint16_t width,
const uint16_t height,
uvg_pixel* dst,
const int mip_mode,
const bool mip_transp
);

View file

@ -1088,17 +1088,20 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
const encoder_control_t * const ctrl = state->encoder_control;
lcu_stats_t *lcu = uvg_get_lcu_stats(state, pos.x, pos.y);
if (ctrl->cfg.roi.dqps != NULL) {
vector2d_t lcu = {
if (state->tile->frame->source->roi.roi_array) {
vector2d_t lcu_vec = {
pos.x + state->tile->lcu_offset_x,
pos.y + state->tile->lcu_offset_y
};
vector2d_t roi = {
lcu.x * ctrl->cfg.roi.width / ctrl->in.width_in_lcu,
lcu.y * ctrl->cfg.roi.height / ctrl->in.height_in_lcu
lcu_vec.x * state->tile->frame->source->roi.width / ctrl->in.width_in_lcu,
lcu_vec.y * state->tile->frame->source->roi.height / ctrl->in.height_in_lcu
};
int roi_index = roi.x + roi.y * ctrl->cfg.roi.width;
int dqp = ctrl->cfg.roi.dqps[roi_index];
int roi_index = roi.x + roi.y * state->tile->frame->source->roi.width;
int dqp = state->tile->frame->source->roi.roi_array[roi_index];
if(dqp != 0) {
pos.x = 0;
}
state->qp = CLIP_TO_QP(state->frame->QP + dqp);
state->lambda = qp_to_lambda(state, state->qp);
state->lambda_sqrt = sqrt(state->lambda);

View file

@ -315,12 +315,12 @@ static INLINE uint32_t get_coeff_cabac_cost(
// Take a copy of the CABAC so that we don't overwrite the contexts when
// counting the bits.
cabac_data_t cabac_copy;
memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy));
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
// Clear bytes and bits and set mode to "count"
cabac_copy.only_count = 1;
cabac_copy.num_buffered_bytes = 0;
cabac_copy.bits_left = 23;
int num_buffered_bytes = cabac_copy.num_buffered_bytes;
int bits_left = cabac_copy.bits_left;
// Execute the coding function.
// It is safe to drop the const modifier since state won't be modified
@ -343,8 +343,10 @@ static INLINE uint32_t get_coeff_cabac_cost(
type,
scan_mode);
}
return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
if(cabac_copy.update) {
memcpy((cabac_data_t *)&state->search_cabac, &cabac_copy, sizeof(cabac_copy));
}
return (bits_left - cabac_copy.bits_left) + ((cabac_copy.num_buffered_bytes - num_buffered_bytes) << 3);
}
static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t ccc)
@ -1741,37 +1743,33 @@ void uvg_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
/**
* Calculate cost of actual motion vectors using CABAC coding
*/
uint32_t uvg_get_mvd_coding_cost_cabac(const encoder_state_t *state,
const cabac_data_t* cabac,
const int32_t mvd_hor,
const int32_t mvd_ver)
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
const cabac_data_t* cabac,
const int32_t mvd_hor,
const int32_t mvd_ver)
{
cabac_data_t cabac_copy = *cabac;
cabac_copy.only_count = 1;
double bits = 0;
// It is safe to drop const here because cabac->only_count is set.
uvg_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver);
uvg_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver, &bits);
uint32_t bitcost =
((23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3)) -
((23 - cabac->bits_left) + (cabac->num_buffered_bytes << 3));
return bitcost;
return bits;
}
/** MVD cost calculation with CABAC
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding
*/
uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
double uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
double* bitcost)
{
cabac_data_t state_cabac_copy;
cabac_data_t* cabac;
@ -1798,14 +1796,13 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
}
// Store cabac state and contexts
memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t));
memcpy(&state_cabac_copy, &state->search_cabac, sizeof(cabac_data_t));
// Clear bytes and bits and set mode to "count"
state_cabac_copy.only_count = 1;
state_cabac_copy.num_buffered_bytes = 0;
state_cabac_copy.bits_left = 23;
cabac = &state_cabac_copy;
double bits = 0;
if (!merged) {
vector2d_t mvd1 = {
@ -1820,8 +1817,8 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1);
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
uint32_t cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
uint32_t cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
double cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
double cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
@ -1834,7 +1831,7 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
CABAC_BIN(cabac, merged, "MergeFlag");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), merged, bits, "MergeFlag");
num_cand = state->encoder_control->cfg.max_merge;
if (merged) {
if (num_cand > 1) {
@ -1842,10 +1839,10 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
for (ui = 0; ui < num_cand - 1; ui++) {
int32_t symbol = (ui != merge_idx);
if (ui == 0) {
cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
CABAC_BIN(cabac, symbol, "MergeIndex");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
} else {
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
bits += 1;
}
if (symbol == 0) break;
}
@ -1869,23 +1866,22 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
// parseRefFrmIdx
int32_t ref_frame = ref_idx;
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[0]), (ref_frame != 0), bits, "ref_idx_lX");
if (ref_frame > 0) {
int32_t i;
int32_t ref_num = ref_list[ref_list_idx] - 2;
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
ref_frame--;
for (i = 0; i < ref_num; ++i) {
const uint32_t symbol = (i == ref_frame) ? 0 : 1;
if (i == 0) {
CABAC_BIN(cabac, symbol, "ref_idx_lX");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[1]), symbol, bits, "ref_idx_lX");
} else {
CABAC_BIN_EP(cabac, symbol, "ref_idx_lX");
bits += 1;
}
if (symbol == 0) break;
}
@ -1895,7 +1891,7 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
// ToDo: Bidir vector support
if (!(state->frame->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) {
// It is safe to drop const here because cabac->only_count is set.
uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y);
uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y, &bits);
}
// Signal which candidate MV to use
@ -1905,10 +1901,10 @@ uint32_t uvg_calc_mvd_cost_cabac(const encoder_state_t * state,
}
}
*bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
*bitcost = bits;
// Store bitcost before restoring cabac
return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
return *bitcost * state->lambda_sqrt;
}
void uvg_close_rdcost_outfiles(void)

View file

@ -77,10 +77,10 @@ uint32_t uvg_get_coded_level(encoder_state_t * state, double* coded_cost, double
uvg_mvd_cost_func uvg_calc_mvd_cost_cabac;
uint32_t uvg_get_mvd_coding_cost_cabac(const encoder_state_t *state,
const cabac_data_t* cabac,
int32_t mvd_hor,
int32_t mvd_ver);
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
const cabac_data_t* cabac,
int32_t mvd_hor,
int32_t mvd_ver);
// Number of fixed point fractional bits used in the fractional bit table.
#define CTX_FRAC_BITS 15
@ -90,8 +90,5 @@ uint32_t uvg_get_mvd_coding_cost_cabac(const encoder_state_t *state,
extern const uint32_t uvg_entropy_bits[512];
#define CTX_ENTROPY_BITS(ctx, val) uvg_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
// Floating point fractional bits, derived from uvg_entropy_bits
extern const float uvg_f_entropy_bits[512];
#define CTX_ENTROPY_FBITS(ctx, val) uvg_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
#endif

View file

@ -49,63 +49,64 @@ static void init_sao_info(sao_info_t *sao) {
}
static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
static double sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
if (sao_top != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
// TR coded type_idx_, none = 0
ctx = &(cabac->ctx.sao_type_idx_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_type");
return mode_bits;
}
static float sao_mode_bits_merge(const encoder_state_t * const state,
static double sao_mode_bits_merge(const encoder_state_t * const state,
int8_t merge_cand) {
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 1);
CABAC_FBITS_UPDATE(cabac, ctx, merge_cand == 1, mode_bits, "sao_merge_flag");
if (merge_cand == 1) return mode_bits;
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 2);
CABAC_FBITS_UPDATE(cabac, ctx, merge_cand == 2, mode_bits, "sao_merge_flag");
return mode_bits;
}
static float sao_mode_bits_edge(const encoder_state_t * const state,
static double sao_mode_bits_edge(const encoder_state_t * const state,
int edge_class, int offsets[NUM_SAO_EDGE_CATEGORIES],
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
if (sao_top != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
// TR coded type_idx_, edge = 2 = cMax
ctx = &(cabac->ctx.sao_type_idx_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
CABAC_FBITS_UPDATE(cabac, ctx, 1, mode_bits, "sao_type");
mode_bits += 1.0;
// TR coded offsets.
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++) {
@ -126,26 +127,27 @@ static float sao_mode_bits_edge(const encoder_state_t * const state,
}
static float sao_mode_bits_band(const encoder_state_t * const state,
static double sao_mode_bits_band(const encoder_state_t * const state,
int band_position[2], int offsets[10],
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
if (sao_top != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
// TR coded sao_type_idx_, band = 1
ctx = &(cabac->ctx.sao_type_idx_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
CABAC_FBITS_UPDATE(cabac, ctx, 1, mode_bits, "sao_type");
mode_bits += 1.0;
// TR coded offsets and possible FL coded offset signs.
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++)
@ -552,7 +554,8 @@ static void sao_search_best_mode(const encoder_state_t * const state, const uvg_
// Choose between SAO and doing nothing, taking into account the
// rate-distortion cost of coding do nothing.
{
int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->lambda + 0.5);
float mode_bits_none = sao_mode_bits_none(state, sao_top, sao_left);
int cost_of_nothing = (int)(mode_bits_none * state->lambda + 0.5);
if (sao_out->ddistortion >= cost_of_nothing) {
sao_out->type = SAO_TYPE_NONE;
merge_cost[0] = cost_of_nothing;

View file

@ -37,6 +37,7 @@
#include "cabac.h"
#include "encoder.h"
#include "encode_coding_tree.h"
#include "imagelist.h"
#include "inter.h"
#include "intra.h"
@ -59,14 +60,6 @@
// Cost threshold for doing intra search in inter frames with --rd=0.
static const int INTRA_THRESHOLD = 8;
// Modify weight of luma SSD.
#ifndef LUMA_MULT
# define LUMA_MULT 0.8
#endif
// Modify weight of chroma SSD.
#ifndef CHROMA_MULT
# define CHROMA_MULT 1.5
#endif
static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
{
@ -225,16 +218,16 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2);
double ssd = 0.0;
ssd += LUMA_MULT * uvg_pixels_calc_ssd(
ssd += UVG_LUMA_MULT * uvg_pixels_calc_ssd(
&lcu->ref.y[luma_index], &lcu->rec.y[luma_index],
LCU_WIDTH, LCU_WIDTH, cu_width
);
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != UVG_CSP_400) {
ssd += CHROMA_MULT * uvg_pixels_calc_ssd(
ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd(
&lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index],
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
);
ssd += CHROMA_MULT * uvg_pixels_calc_ssd(
ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd(
&lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index],
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
);
@ -251,7 +244,8 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
int x_scu = SUB_SCU(x);
int y_scu = SUB_SCU(y);
y_rec += x_scu + y_scu * LCU_WIDTH;
int stride = state->tile->frame->source->stride;
const int stride = state->tile->frame->rec->stride;
const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
for (int y_ = 0; y_ < height && y_ * 2 + y < state->encoder_control->cfg.height; y_++) {
for (int x_ = 0; x_ < width; x_++) {
@ -265,13 +259,13 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
s += y_rec[2 * x_ + LCU_WIDTH] * 2;
s += y_rec[2 * x_ + 1 + LCU_WIDTH];
s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH];
int index = x / 2 + x_ + (y / 2 + y_ )* stride / 2;
int index = x / 2 + x_ + (y / 2 + y_ )* stride2 / 2;
state->tile->frame->cclm_luma_rec[index] = s >> 3;
}
y_rec += LCU_WIDTH * 2;
}
if((y + height * 2) % 64 == 0) {
int line = y / 64 * stride / 2;
int line = y / 64 * stride2 / 2;
y_rec -= LCU_WIDTH;
for (int i = 0; i < width; ++i) {
int s = 2;
@ -294,11 +288,13 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
* prediction unit data needs to be coded.
*/
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu)
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu)
{
const int width = LCU_WIDTH >> depth;
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
// cur_cu is used for TU parameters.
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
@ -324,14 +320,36 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
return sum + tr_tree_bits * state->lambda;
}
if (cabac->update && tr_cu->tr_depth == tr_cu->depth && !skip_residual_coding) {
// Because these need to be coded before the luma cbf they also need to be counted
// before the cabac state changes. However, since this branch is only executed when
// calculating the last RD cost it is not problem to include the chroma cbf costs in
// luma, because the chroma cost is calculated right after the luma cost.
// However, if we have different tr_depth, the bits cannot be written in correct
// order anyways so do not touch the chroma cbf here.
if (state->encoder_control->chroma_format != UVG_CSP_400) {
cabac_ctx_t* cr_ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
cabac->cur_ctx = cr_ctx;
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, cr_ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
cr_ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
CABAC_FBITS_UPDATE(cabac, cr_ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
}
}
// Add transform_tree cbf_luma bit cost.
const int is_tr_split = tr_cu->tr_depth - tr_cu->depth;
if (pred_cu->type == CU_INTRA ||
tr_depth > 0 ||
is_tr_split ||
cbf_is_set(tr_cu->cbf, depth, COLOR_U) ||
cbf_is_set(tr_cu->cbf, depth, COLOR_V))
{
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[0]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y));
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_luma[0]);
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y);
CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search");
}
// SSD between reconstruction and original
@ -343,7 +361,8 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
width);
}
{
if (!skip_residual_coding) {
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
@ -351,23 +370,22 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
}
double bits = tr_tree_bits + coeff_bits;
return (double)ssd * LUMA_MULT + bits * state->lambda;
return (double)ssd * UVG_LUMA_MULT + bits * state->lambda;
}
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
cu_info_t * pred_cu,
lcu_t *const lcu)
const int x_px, const int y_px, const int depth,
cu_info_t *const pred_cu,
lcu_t *const lcu)
{
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
double tr_tree_bits = 0;
double joint_cbcr_tr_tree_bits = 0;
double coeff_bits = 0;
double joint_coeff_bits = 0;
assert(x_px >= 0 && x_px < LCU_WIDTH);
assert(y_px >= 0 && y_px < LCU_WIDTH);
@ -378,30 +396,28 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
return 0;
}
if (depth < MAX_PU_DEPTH) {
// See luma for why the second condition
if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) {
const int tr_depth = depth - pred_cu->depth;
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_cb[0]);
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
cabac->cur_ctx = ctx;
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
}
if(state->encoder_control->cfg.jccr) {
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, pred_cu->joint_cb_cr & 1);
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
}
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
ctx = &(state->cabac.ctx.qt_cbf_model_cr[is_set]);
ctx = &(cabac->ctx.qt_cbf_model_cr[is_set]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V));
}
if(state->encoder_control->cfg.jccr) {
ctx = &(state->cabac.ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, (pred_cu->joint_cb_cr & 2) >> 1);
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
}
}
if (tr_cu->tr_depth > depth) {
int offset = LCU_WIDTH >> (depth + 1);
int sum = 0;
double sum = 0;
sum += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu);
sum += uvg_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
@ -418,15 +434,10 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
ctx = &(state->cabac.ctx.joint_cb_cr[cbf_mask]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 0);
}
if(pred_cu->joint_cb_cr) {
ctx = &(state->cabac.ctx.joint_cb_cr[(pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 1);
}
}
// Chroma SSD
int ssd = 0;
int joint_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
int ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
@ -436,12 +447,226 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
LCU_WIDTH_C, LCU_WIDTH_C,
width);
ssd = ssd_u + ssd_v;
}
if(state->encoder_control->cfg.jccr) {
if (!skip_residual_coding)
{
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
}
double bits = tr_tree_bits + coeff_bits;
return (double)ssd * UVG_CHROMA_MULT + bits * state->c_lambda;
}
static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
const int x_px, const int y_px, const int depth,
const cu_info_t* const pred_cu,
lcu_t* const lcu) {
const int width = LCU_WIDTH >> depth;
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
// cur_cu is used for TU parameters.
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
double coeff_bits = 0;
double tr_tree_bits = 0;
// Check that lcu is not in
assert(x_px >= 0 && x_px < LCU_WIDTH);
assert(y_px >= 0 && y_px < LCU_WIDTH);
const uint8_t tr_depth = tr_cu->tr_depth - depth;
const int cb_flag_u = cbf_is_set(tr_cu->cbf, depth, COLOR_U);
const int cb_flag_v = cbf_is_set(tr_cu->cbf, depth, COLOR_V);
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
{
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
// Only need to signal coded block flag if not skipped or merged
// skip = no coded residual, merge = coded residual
if (pred_cu->type == CU_INTER && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
}
}
if(state->encoder_control->chroma_format != UVG_CSP_400 && !skip_residual_coding) {
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
}
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
}
}
if (tr_depth > 0) {
int offset = LCU_WIDTH >> (depth + 1);
double sum = 0;
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu);
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
return sum + tr_tree_bits * state->lambda;
}
const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) ;
// Add transform_tree cbf_luma bit cost.
const int is_tr_split = depth - tr_cu->depth;
if ((pred_cu->type == CU_INTRA ||
is_tr_split ||
cb_flag_u ||
cb_flag_v)
&& !skip_residual_coding)
{
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[!is_tr_split]);
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
}
if (cb_flag_y | cb_flag_u | cb_flag_v) {
// TODO qp_delta_sign_flag
if ((cb_flag_u | cb_flag_v) && x_px % 8 == 0 && y_px % 8 == 0 && state->encoder_control->cfg.jccr) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag");
}
}
// SSD between reconstruction and original
unsigned luma_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
int index = y_px * LCU_WIDTH + x_px;
luma_ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
LCU_WIDTH, LCU_WIDTH,
width);
}
{
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
coeff_bits += uvg_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode, tr_cu->tr_skip);
}
unsigned chroma_ssd = 0;
if(state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (x_px % 8 != 0 && y_px % 8 != 0))) {
const vector2d_t lcu_px = { (x_px & ~7 ) / 2, (y_px & ~7) / 2 };
const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1));
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
if(pred_cu->joint_cb_cr == 0) {
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
chroma_ssd = ssd_u + ssd_v;
}
{
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order, 0);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order, 0);
}
} else {
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
chroma_ssd = ssd_u_joint + ssd_v_joint;
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
}
double bits = tr_tree_bits + coeff_bits;
return luma_ssd * UVG_LUMA_MULT + chroma_ssd * UVG_CHROMA_MULT + bits * state->lambda;
}
void uvg_select_jccr_mode(
const encoder_state_t* const state,
const int x_px,
const int y_px,
const int depth,
cu_info_t* pred_cu,
lcu_t* const lcu,
double* cost_out)
{
const vector2d_t lcu_px = { (SUB_SCU(x_px) & ~7) / 2, (SUB_SCU(y_px) & ~7) / 2 };
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
if (pred_cu == NULL) pred_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x_px), SUB_SCU(y_px));
assert(pred_cu->depth == pred_cu->tr_depth && "jccr does not support transform splitting");
if (cost_out == NULL && pred_cu->joint_cb_cr == 0) {
return;
}
double tr_tree_bits = 0;
double joint_cbcr_tr_tree_bits = 0;
double coeff_bits = 0;
double joint_coeff_bits = 0;
assert(lcu_px.x >= 0 && lcu_px.x < LCU_WIDTH_C);
assert(lcu_px.y >= 0 && lcu_px.y < LCU_WIDTH_C);
if (depth == 4 && (x_px % 8 == 0 || y_px % 8 == 0)) {
// For MAX_PU_DEPTH calculate chroma for previous depth for the first
// block and return 0 cost for all others.
return;
}
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
cabac->cur_ctx = ctx;
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cr_search");
int cbf_mask = u_is_set * 2 + v_is_set - 1;
if((cbf_mask != -1 && pred_cu->type == CU_INTRA) || cbf_mask == 2)
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 0, tr_tree_bits, "jccr_flag");
if(pred_cu->joint_cb_cr) {
const int u_jccr = (pred_cu->joint_cb_cr >> 1) & 1;
ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
CABAC_FBITS_UPDATE(cabac, ctx, u_jccr, joint_cbcr_tr_tree_bits, "cbf_cb_search");
ctx = &(cabac->ctx.qt_cbf_model_cr[u_jccr]);
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
cbf_mask = pred_cu->joint_cb_cr - 1;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 1, joint_cbcr_tr_tree_bits, "jccr_flag");
}
unsigned ssd = 0;
unsigned joint_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
const int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
const unsigned ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
const unsigned ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
ssd = ssd_u + ssd_v;
if (pred_cu->joint_cb_cr) {
const unsigned ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
const unsigned ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
joint_ssd = ssd_u_joint + ssd_v_joint;
@ -455,34 +680,33 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
if(state->encoder_control->cfg.jccr) {
joint_coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
joint_coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
double bits = tr_tree_bits + coeff_bits;
double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits;
double cost = (double)ssd + bits * state->c_lambda;
double joint_cost = (double)joint_ssd + joint_bits * state->c_lambda;
double cost = (double)ssd * UVG_CHROMA_MULT + bits * state->c_lambda;
double joint_cost = (double)joint_ssd * UVG_CHROMA_MULT + joint_bits * state->c_lambda;
if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) {
pred_cu->joint_cb_cr = 0;
return cost;
if (cost_out) *cost_out += cost;
return;
}
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
if (pred_cu->joint_cb_cr & 1) {
if (pred_cu->joint_cb_cr & 2) {
cbf_set(&pred_cu->cbf, depth, COLOR_U);
}
if (pred_cu->joint_cb_cr & 2) {
if (pred_cu->joint_cb_cr & 1) {
cbf_set(&pred_cu->cbf, depth, COLOR_V);
}
int lcu_width = LCU_WIDTH_C;
const int index = lcu_px.x + lcu_px.y * lcu_width;
uvg_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
uvg_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
return joint_cost;
if (cost_out) *cost_out += joint_cost;
}
@ -492,23 +716,9 @@ static double calc_mode_bits(const encoder_state_t *state,
const cu_info_t * cur_cu,
int x, int y, int depth)
{
int x_local = SUB_SCU(x);
int y_local = SUB_SCU(y);
assert(cur_cu->type == CU_INTRA);
int8_t candidate_modes[INTRA_MPM_COUNT];
{
const cu_info_t *left_cu = ((x >= SCU_WIDTH) ? LCU_GET_CU_AT_PX(lcu, x_local - SCU_WIDTH, y_local) : NULL);
const cu_info_t *above_cu = ((y >= SCU_WIDTH) ? LCU_GET_CU_AT_PX(lcu, x_local, y_local - SCU_WIDTH) : NULL);
uvg_intra_get_dir_luma_predictor(x, y, candidate_modes, cur_cu, left_cu, above_cu);
}
int width = LCU_WIDTH >> depth;
int height = width; // TODO: height for non-square blocks
int num_mip_modes_half = NUM_MIP_MODES_HALF(width, height);
int mip_flag_ctx_id = uvg_get_mip_flag_context(x, y, width, height, lcu, NULL);
double mode_bits = uvg_luma_mode_bits(state, cur_cu->intra.mode, candidate_modes, cur_cu->intra.multi_ref_idx, num_mip_modes_half, mip_flag_ctx_id);
double mode_bits = uvg_luma_mode_bits(state, cur_cu, x, y, depth, lcu);
if (((depth == 4 && x % 8 && y % 8) || (depth != 4)) && state->encoder_control->chroma_format != UVG_CSP_400) {
mode_bits += uvg_chroma_mode_bits(state, cur_cu->intra.mode_chroma, cur_cu->intra.mode);
@ -518,6 +728,7 @@ static double calc_mode_bits(const encoder_state_t *state,
}
// TODO: replace usages of this by the uvg_sort_indices_by_cost function.
/**
* \brief Sort modes and costs to ascending order according to costs.
*/
@ -567,16 +778,25 @@ void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict traf
}
}
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
/**
* \brief Sort keys (indices) to ascending order according to costs.
*/
void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map)
{
vector2d_t lcu_cu = { SUB_SCU(x), SUB_SCU(y) };
bool condA = x >= 8 && LCU_GET_CU_AT_PX(lcu, lcu_cu.x - 1, lcu_cu.y )->depth > depth;
bool condL = y >= 8 && LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y - 1)->depth > depth;
return condA + condL;
// Size of sorted arrays is expected to be "small". No need for faster algorithm.
for (uint8_t i = 1; i < map->size; ++i) {
const int8_t cur_indx = map->keys[i];
const double cur_cost = map->cost[cur_indx];
uint8_t j = i;
while (j > 0 && cur_cost < map->cost[map->keys[j - 1]]) {
map->keys[j] = map->keys[j - 1];
--j;
}
map->keys[j] = cur_indx;
}
}
/**
* Search every mode from 0 to MAX_PU_DEPTH and return cost of best mode.
* - The recursion is started at depth 0 and goes in Z-order to MAX_PU_DEPTH.
@ -592,10 +812,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
const encoder_control_t* ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
int cu_width = LCU_WIDTH >> depth;
double cost = MAX_INT;
double inter_zero_coeff_cost = MAX_INT;
uint32_t inter_bitcost = MAX_INT;
double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE;
double inter_bitcost = MAX_INT;
cu_info_t *cur_cu;
cabac_data_t pre_search_cabac;
memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
@ -626,7 +848,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// Assign correct depth limit
constraint_t* constr = state->constraint;
if(constr->ml_intra_depth_ctu) {
if(constr->ml_intra_depth_ctu) {
pu_depth_intra.min = constr->ml_intra_depth_ctu->_mat_upper_depth[(x_local >> 3) + (y_local >> 3) * 8];
pu_depth_intra.max = constr->ml_intra_depth_ctu->_mat_lower_depth[(x_local >> 3) + (y_local >> 3) * 8];
}
@ -670,7 +892,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (can_use_inter) {
double mode_cost;
uint32_t mode_bitcost;
double mode_bitcost;
uvg_search_cu_inter(state,
x, y,
depth,
@ -693,33 +915,34 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int32_t cu_width_intra_min = LCU_WIDTH >> pu_depth_intra.max;
bool can_use_intra =
WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
(WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
// When the split was forced because the CTU is partially outside
// the frame, we permit intra coding even if pu_depth_intra would
// otherwise forbid it.
(x & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->width ||
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height;
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height) &&
!(state->encoder_control->cfg.force_inter && state->frame->slicetype != UVG_SLICE_I);
intra_search_data_t intra_search;
if (can_use_intra && !skip_intra) {
int8_t intra_mode;
int8_t intra_trafo;
double intra_cost;
uint8_t multi_ref_index = 0;
bool mip_flag = false;
bool mip_transposed = false;
uvg_search_cu_intra(state, x, y, depth, lcu,
&intra_mode, &intra_trafo, &intra_cost, &multi_ref_index, &mip_flag, &mip_transposed);
if (intra_cost < cost) {
cost = intra_cost;
intra_search.pred_cu = *cur_cu;
intra_search.pred_cu.joint_cb_cr = 4;
uvg_search_cu_intra(state, x, y, depth, &intra_search,
lcu);
#ifdef COMPLETE_PRED_MODE_BITS
// Technically counting these bits would be correct, however counting
// them universally degrades quality so this block is disabled by default
if(state->frame->slicetype != UVG_SLICE_I) {
double pred_mode_type_bits = 0;
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.cu_pred_mode_model, 1, pred_mode_type_bits, "pred_mode_flag");
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(x, y, lcu, NULL)], 0, pred_mode_type_bits, "skip_flag");
intra_cost += pred_mode_type_bits * state->lambda;
}
#endif
if (intra_search.cost < cost) {
cost = intra_search.cost;
*cur_cu = intra_search.pred_cu;
cur_cu->type = CU_INTRA;
cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
cur_cu->intra.mode = intra_mode;
cur_cu->intra.multi_ref_idx = multi_ref_index;
cur_cu->intra.mip_flag = mip_flag;
cur_cu->intra.mip_is_transposed = mip_transposed;
//If the CU is not split from 64x64 block, the MTS is disabled for that CU.
cur_cu->tr_idx = (depth > 0) ? intra_trafo : 0;
}
}
@ -727,20 +950,19 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// mode search of adjacent CUs.
if (cur_cu->type == CU_INTRA) {
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
cur_cu->intra.mode_chroma = cur_cu->intra.mode;
intra_search.pred_cu.intra.mode_chroma = -1; // don't reconstruct chroma before search is performed for it
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
uvg_intra_recon_cu(state,
x, y,
depth,
cur_cu->intra.mode, -1, // skip chroma
NULL, NULL, cur_cu->intra.multi_ref_idx,
cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
depth, &intra_search,
NULL,
lcu);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
);
cur_cu->joint_cb_cr = 0;
// TODO: This heavily relies to square CUs
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400) {
@ -748,19 +970,47 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being
// really bad for chroma.
cclm_parameters_t cclm_params[2];
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; // skip luma
if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) {
cur_cu->intra.mode_chroma = uvg_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params);
cur_cu->intra.mode_chroma = uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search);
if (intra_search.pred_cu.joint_cb_cr == 0) intra_search.pred_cu.joint_cb_cr = 4;
else cur_cu->joint_cb_cr = intra_search.pred_cu.joint_cb_cr;
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
}
intra_search.pred_cu.intra.mode = -1; // skip luma
uvg_intra_recon_cu(state,
x & ~7, y & ~7, // TODO: as does this
depth,
-1, cur_cu->intra.mode_chroma, // skip luma
NULL, cclm_params, 0,
cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
x, y, // TODO: as does this
depth, &intra_search,
NULL,
lcu);
if(depth != 0 && state->encoder_control->cfg.jccr && ctrl->cfg.rdo < 3) {
uvg_select_jccr_mode(state,
x, y,
depth,
NULL,
lcu,
NULL);
}
else if(depth != 0 && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr & 3) {
assert(cur_cu->joint_cb_cr < 4);
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
if (cur_cu->joint_cb_cr & 2) {
cbf_set(&cur_cu->cbf, depth, COLOR_U);
}
if (cur_cu->joint_cb_cr & 1) {
cbf_set(&cur_cu->cbf, depth, COLOR_V);
}
const vector2d_t lcu_px = { (x_local & ~7) / 2, (y_local & ~7) / 2 };
int lcu_width = LCU_WIDTH_C;
const int index = lcu_px.x + lcu_px.y * lcu_width;
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
uvg_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
uvg_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
}
}
} else if (cur_cu->type == CU_INTER) {
@ -788,11 +1038,20 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
uvg_quantize_lcu_residual(state,
true, has_chroma,
x, y, depth,
NULL,
lcu,
false);
true, has_chroma,
state->encoder_control->cfg.jccr, x, y,
depth,
NULL,
lcu,
false);
if (cur_cu->depth == cur_cu->tr_depth && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr) {
uvg_select_jccr_mode(state,
x, y,
depth,
NULL,
lcu,
NULL);
}
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
@ -800,9 +1059,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->merged = 0;
cur_cu->skipped = 1;
// Selecting skip reduces bits needed to code the CU
if (inter_bitcost > 1) {
inter_bitcost -= 1;
}
int skip_ctx = uvg_get_skip_context(x, y, lcu, NULL, NULL);
inter_bitcost = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_ctx], 1);
inter_bitcost += CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), cur_cu->merge_idx != 0);
inter_bitcost += cur_cu->merge_idx;
}
}
lcu_fill_inter(lcu, x_local, y_local, cu_width);
@ -811,19 +1071,25 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
cost = uvg_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
double bits = 0;
cabac_data_t* cabac = &state->search_cabac;
cabac->update = 1;
if(cur_cu->type != CU_INTRA || cur_cu->part_size == SIZE_2Nx2N) {
bits += uvg_mock_encode_coding_unit(
state,
cabac,
x, y, depth,
lcu,
cur_cu);
}
else {
assert(0);
}
double mode_bits;
if (cur_cu->type == CU_INTRA) {
mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth);
} else {
mode_bits = inter_bitcost;
}
cost = bits * state->lambda;
cost += mode_bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
cost = inter_zero_coeff_cost;
@ -846,13 +1112,14 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->cbf = 0;
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
}
cabac->update = 0;
}
bool can_split_cu =
// If the CU is partially outside the frame, we need to split it even
// if pu_depth_intra and pu_depth_inter would not permit it.
cur_cu->type == CU_NOTSET ||
depth < pu_depth_intra.max ||
(depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != UVG_SLICE_I)) ||
(state->frame->slicetype != UVG_SLICE_I &&
depth < pu_depth_inter.max);
@ -861,21 +1128,23 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int half_cu = cu_width / 2;
double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
cabac_data_t post_seach_cabac;
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
state->search_cabac.update = 1;
double split_bits = 0;
if (depth < MAX_DEPTH) {
// Add cost of cu_split_flag.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
uvg_write_split_flag(state, &state->search_cabac,
x > 0 ? LCU_GET_CU_AT_PX(lcu,SUB_SCU(x) -1, SUB_SCU(y)): NULL,
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
1, depth, cu_width, x, y, &split_bits);
}
if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
// Add cost of intra part_size.
const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]);
cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN
}
state->search_cabac.update = 0;
split_cost += split_bits * state->lambda;
// If skip mode was selected for the block, skip further search.
// Skip mode means there's no coefficients in the block, so splitting
@ -897,13 +1166,23 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// searching.
if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH
&& x + cu_width <= frame->width && y + cu_width <= frame->height && 0)
&& x + cu_width <= frame->width && y + cu_width <= frame->height
&& state->encoder_control->cfg.combine_intra_cus)
{
cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local);
// If the best CU in depth+1 is intra and the biggest it can be, try it.
if (cu_d1->type == CU_INTRA && cu_d1->depth == depth + 1) {
cabac_data_t temp_cabac;
memcpy(&temp_cabac, &state->search_cabac, sizeof(temp_cabac));
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(pre_search_cabac));
cost = 0;
double bits = 0;
uvg_write_split_flag(state, &state->search_cabac,
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
0, depth, cu_width, x, y, & split_bits);
cur_cu->intra = cu_d1->intra;
cur_cu->type = CU_INTRA;
@ -915,28 +1194,24 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
const int8_t mode_chroma = has_chroma ? cur_cu->intra.mode_chroma : -1;
intra_search_data_t proxy;
FILL(proxy, 0);
proxy.pred_cu = *cur_cu;
uvg_intra_recon_cu(state,
x, y,
depth,
cur_cu->intra.mode, mode_chroma,
NULL,NULL, 0, cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
&proxy,
NULL,
lcu);
cost += uvg_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
if (has_chroma) {
cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
}
// Add the cost of coding no-split.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
// Add the cost of coding intra mode only once.
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth);
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits;
cost += mode_bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));
}
}
@ -950,6 +1225,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
} else if (depth > 0) {
// Copy this CU's mode all the way down for use in adjacent CUs mode
// search.
memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac));
work_tree_copy_down(x_local, y_local, depth, work_tree);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
@ -962,6 +1238,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
}
}
else {
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
);
}
} else if (depth >= 0 && depth < MAX_PU_DEPTH) {
// Need to copy modes down since the lower level of the work tree is used
// when searching SMP and AMP blocks.
@ -1139,6 +1420,8 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i
*/
void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, const yuv_t * const hor_buf, const yuv_t * const ver_buf, lcu_coeff_t *coeff)
{
memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t));
state->search_cabac.only_count = 1;
assert(x % LCU_WIDTH == 0);
assert(y % LCU_WIDTH == 0);

View file

@ -44,22 +44,62 @@
#include "image.h"
#include "constraint.h"
#define NUM_MIP_MODES_FULL(width, height) ((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12)
#define NUM_MIP_MODES_HALF(width, height) NUM_MIP_MODES_FULL((width), (height)) >> 1
#define MAX_UNIT_STATS_MAP_SIZE MAX(MAX_REF_PIC_COUNT, MRG_MAX_NUM_CANDS)
// Modify weight of luma SSD.
#ifndef UVG_LUMA_MULT
#define UVG_LUMA_MULT 0.8
#endif
// Modify weight of chroma SSD.
#ifndef UVG_CHROMA_MULT
#define UVG_CHROMA_MULT 1.5
#endif
/**
* \brief Data collected during search processes.
*
* The intended use is to collect statistics of the
* searched coding/prediction units. Data related to
* a specific unit is found at index i. The arrays
* should be indexed by elements of the "keys" array
* that will be sorted by the RD costs of the units.
*/
typedef struct unit_stats_map_t {
cu_info_t unit[MAX_UNIT_STATS_MAP_SIZE]; //!< list of searched units
double cost[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching RD costs
double bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs
int8_t keys[MAX_UNIT_STATS_MAP_SIZE]; //!< list of keys (indices) to elements in the other arrays
int size; //!< number of active elements in the lists
} unit_stats_map_t;
#define NUM_MIP_MODES_FULL(width, height) (((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12))
#define NUM_MIP_MODES_HALF(width, height) (NUM_MIP_MODES_FULL((width), (height)) >> 1)
void uvg_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length);
void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length);
void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map);
void uvg_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf, lcu_coeff_t *coeff);
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu);
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu);
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
cu_info_t * pred_cu,
lcu_t *const lcu);
const int x_px, const int y_px, const int depth,
cu_info_t *const pred_cu,
lcu_t *const lcu);
void uvg_select_jccr_mode(
const encoder_state_t* const state,
const int x_px,
const int y_px,
const int depth,
cu_info_t* const pred_cu,
lcu_t* const lcu,
double* cost_out);
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth);
void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);

File diff suppressed because it is too large Load diff

View file

@ -64,20 +64,34 @@ enum hpel_position {
HPEL_POS_DIA = 2
};
typedef uint32_t uvg_mvd_cost_func(const encoder_state_t *state,
typedef double uvg_mvd_cost_func(const encoder_state_t *state,
int x, int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost);
double *bitcost);
void uvg_search_cu_inter(encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost);
double* inter_bitcost);
unsigned uvg_inter_satd_cost(const encoder_state_t* state,
const lcu_t *lcu,
int x,
int y);
void uvg_cu_cost_inter_rd2(encoder_state_t* const state,
int x, int y, int depth,
cu_info_t* cur_cu,
lcu_t* lcu,
double* inter_cost,
double* inter_bitcost);
int uvg_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx);
#endif // SEARCH_INTER_H_

File diff suppressed because it is too large Load diff

View file

@ -43,24 +43,21 @@
#include "global.h" // IWYU pragma: keep
#include "intra.h"
double uvg_luma_mode_bits(const encoder_state_t *state,
int8_t luma_mode, const int8_t *intra_preds, uint8_t multi_ref_idx, const uint8_t num_mip_modes, int mip_flag_ctx_id);
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu);
double uvg_chroma_mode_bits(const encoder_state_t *state,
int8_t chroma_mode, int8_t luma_mode);
int8_t uvg_search_cu_intra_chroma(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu, cclm_parameters_t* best_cclm);
const int depth, lcu_t *lcu, intra_search_data_t* best_cclm);
void uvg_search_cu_intra(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu,
int8_t *mode_out,
int8_t *trafo_out,
double *cost_out,
uint8_t *multi_ref_idx_out,
bool *mip_flag,
bool *mip_transp);
void uvg_search_cu_intra(
encoder_state_t * const state,
const int x_px,
const int y_px,
const int depth,
intra_search_data_t* search_data,
lcu_t *lcu);
#endif // SEARCH_INTRA_H_

View file

@ -225,39 +225,40 @@ int uvg_quant_cbcr_residual_generic(
int64_t best_cost = INT64_MAX;
// This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM
for(int cbf_mask = cur_cu->type == CU_INTRA ? 1 : 3; cbf_mask < 4; cbf_mask++) {
for(int i = cur_cu->type == CU_INTRA ? 1 : 3; i < 4; i++) {
int64_t d1 = 0;
const int cbf_mask = i * (state->frame->jccr_sign ? -1 : 1);
for (int y = 0; y < width; y++)
{
for (int x = 0; x < width; x++)
{
int cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
if (cbf_mask == 1)
if (cbf_mask == 2)
{
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (u1_residual[cbf_mask / 2][x + y * width] >> 1));
u1_residual[i - 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (u1_residual[i - 2][x + y * width] >> 1));
}
else if (cbf_mask == -1)
else if (cbf_mask == -2)
{
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (-u1_residual[cbf_mask / 2][x + y * width] >> 1));
u1_residual[i - 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (-u1_residual[i - 2][x + y * width] >> 1));
}
else if (cbf_mask == 3)
{
u1_residual[cbf_mask / 2][x + y * width] = ((cbx + crx) / 2);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - u1_residual[cbf_mask / 2][x + y * width]);
u1_residual[i - 2][x + y * width] = ((cbx + crx) / 2);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - u1_residual[i - 2][x + y * width]);
}
else if (cbf_mask == -3)
{
u1_residual[cbf_mask / 2][x + y * width] = ((cbx - crx) / 2);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx + u1_residual[cbf_mask / 2][x + y * width]);
u1_residual[i - 2][x + y * width] = ((cbx - crx) / 2);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx + u1_residual[i - 2][x + y * width]);
}
else if (cbf_mask == 2)
else if (cbf_mask == 1)
{
v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5);
d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
}
else if (cbf_mask == -2)
else if (cbf_mask == -1)
{
v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5);
d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
@ -270,19 +271,19 @@ int uvg_quant_cbcr_residual_generic(
}
}
if (d1 < best_cost) {
best_cbf_mask = cbf_mask;
best_cbf_mask = i;
best_cost = d1;
}
}
uvg_transform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
uvg_transform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
{
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
uvg_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
uvg_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, tr_depth, cur_cu->cbf);
}
else if (state->encoder_control->cfg.rdoq_enable && false) {
@ -290,7 +291,7 @@ int uvg_quant_cbcr_residual_generic(
scan_order);
}
else {
uvg_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
uvg_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
}
@ -309,10 +310,10 @@ int uvg_quant_cbcr_residual_generic(
int y, x;
// Get quantized residual. (coeff_out -> coeff -> residual)
uvg_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
uvg_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
uvg_itransform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
uvg_itransform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
@ -333,32 +334,32 @@ int uvg_quant_cbcr_residual_generic(
// }
// }
//}
const int temp = best_cbf_mask * (state->frame->jccr_sign ? -1 : 1);
// Get quantized reconstruction. (residual + pred_in -> rec_out)
for (int y = 0; y < width; y++) {
for (int x = 0; x < width; x++) {
if (best_cbf_mask == 1) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
if (temp == 2) {
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
}
else if (best_cbf_mask == -1) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
else if (temp == -2) {
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
}
else if (best_cbf_mask == 3) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
else if (temp == 3) {
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
}
else if (best_cbf_mask == -3) {
else if (temp == -3) {
// non-normative clipping to prevent 16-bit overflow
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width];
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width];
}
else if (best_cbf_mask == 2) {
else if (temp == 1) {
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
v_residual[x + y * width] = v1_residual[x + y * width];
}
else if (best_cbf_mask == -2) {
else if (temp == -1) {
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
v_residual[x + y * width] = -v1_residual[x + y * width];
}

View file

@ -260,12 +260,10 @@ int uvg_quantize_residual_trskip(
struct {
uvg_pixel rec[LCU_WIDTH * LCU_WIDTH];
coeff_t coeff[LCU_WIDTH * LCU_WIDTH];
uint32_t cost;
double cost;
int has_coeffs;
} skip, *best;
const int bit_cost = (int)(state->lambda + 0.5);
//noskip.has_coeffs = uvg_quantize_residual(
// state, cur_cu, width, color, scan_order,
// 0, in_stride, 4,
@ -278,7 +276,7 @@ int uvg_quantize_residual_trskip(
1, in_stride, width,
ref_in, pred_in, skip.rec, skip.coeff, false, lmcs_chroma_adj);
skip.cost = uvg_pixels_calc_ssd(ref_in, skip.rec, in_stride, width, width);
skip.cost += uvg_get_coeff_cost(state, skip.coeff, width, 0, scan_order, 1) * bit_cost;
skip.cost += uvg_get_coeff_cost(state, skip.coeff, width, 0, scan_order, 1) * state->frame->lambda;
/* if (noskip.cost <= skip.cost) {
*trskip_out = 0;
@ -481,15 +479,17 @@ static void quantize_tr_residual(encoder_state_t * const state,
* - lcu->cbf coded block flags for the area
* - lcu->cu.intra.tr_skip tr skip flags for the area (in case of luma)
*/
void uvg_quantize_lcu_residual(encoder_state_t * const state,
const bool luma,
const bool chroma,
const int32_t x,
const int32_t y,
const uint8_t depth,
cu_info_t *cur_pu,
lcu_t* lcu,
bool early_skip)
void uvg_quantize_lcu_residual(
encoder_state_t * const state,
const bool luma,
const bool chroma,
const bool jccr,
const int32_t x,
const int32_t y,
const uint8_t depth,
cu_info_t *cur_pu,
lcu_t* lcu,
bool early_skip)
{
const int32_t width = LCU_WIDTH >> depth;
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
@ -511,7 +511,7 @@ void uvg_quantize_lcu_residual(encoder_state_t * const state,
if (luma) {
cbf_clear(&cur_pu->cbf, depth, COLOR_Y);
}
if (chroma) {
if (chroma || jccr) {
cbf_clear(&cur_pu->cbf, depth, COLOR_U);
cbf_clear(&cur_pu->cbf, depth, COLOR_V);
}
@ -523,10 +523,11 @@ void uvg_quantize_lcu_residual(encoder_state_t * const state,
const int32_t x2 = x + offset;
const int32_t y2 = y + offset;
uvg_quantize_lcu_residual(state, luma, chroma, x, y, depth + 1, NULL, lcu, early_skip);
uvg_quantize_lcu_residual(state, luma, chroma, x2, y, depth + 1, NULL, lcu, early_skip);
uvg_quantize_lcu_residual(state, luma, chroma, x, y2, depth + 1, NULL, lcu, early_skip);
uvg_quantize_lcu_residual(state, luma, chroma, x2, y2, depth + 1, NULL, lcu, early_skip);
// jccr is currently not supported if transform is split
uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip);
uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip);
uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip);
uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip);
// Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = {
@ -549,9 +550,9 @@ void uvg_quantize_lcu_residual(encoder_state_t * const state,
if (chroma) {
quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip);
quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip);
if(state->encoder_control->cfg.jccr && cur_pu->tr_depth == cur_pu->depth){
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
}
}
if (jccr && cur_pu->tr_depth == cur_pu->depth) {
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
}
}
}

View file

@ -67,14 +67,16 @@ void uvg_itransform2d(const encoder_control_t * const encoder,
int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);
void uvg_quantize_lcu_residual(encoder_state_t *state,
bool luma,
bool chroma,
int32_t x,
int32_t y,
uint8_t depth,
cu_info_t *cur_cu,
lcu_t* lcu,
bool early_skip);
void uvg_quantize_lcu_residual(
encoder_state_t *state,
bool luma,
bool chroma,
const bool jccr,
int32_t x,
int32_t y,
uint8_t depth,
cu_info_t *cur_cu,
lcu_t* lcu,
bool early_skip);
#endif

View file

@ -267,6 +267,12 @@ enum uvg_amvr_resolution
UVG_IMV_HPEL = 3
};
enum uvg_roi_format
{
UVG_ROI_TXT = 0,
UVG_ROI_BIN = 1
};
// Map from input format to chroma format.
#define UVG_FORMAT2CSP(format) ((enum uvg_chroma_format)format)
@ -408,10 +414,9 @@ typedef struct uvg_config
int32_t implicit_rdpcm; /*!< \brief Enable implicit residual DPCM. */
struct {
int32_t width;
int32_t height;
int8_t *dqps;
} roi; /*!< \since 3.14.0 \brief Map of delta QPs for region of interest coding. */
char *file_path;
enum uvg_roi_format format;
} roi; /*!< \brief Specify delta QPs for region of interest coding. */
unsigned slices; /*!< \since 3.15.0 \brief How to map slices to frame. */
@ -524,6 +529,12 @@ typedef struct uvg_config
int8_t cclm;
int8_t amvr; /* \brief Adaptive motion vector resolution parameter */
/** \brief whether to try combining intra cus at the lower depth when search
* is not performed at said depth*/
uint8_t combine_intra_cus;
uint8_t force_inter;
} uvg_config;
/**
@ -555,6 +566,14 @@ typedef struct uvg_picture {
enum uvg_chroma_format chroma_format;
int32_t ref_pocs[16];
struct
{
int width;
int height;
int8_t *roi_array;
} roi;
} uvg_picture;
/**
@ -781,6 +800,9 @@ typedef struct uvg_api {
* original frame and frame info in data_out, len_out, pic_out, src_out and
* info_out, respectively. Otherwise, set the output parameters to NULL.
*
* Region of interest (ROI) / delta QP map can be specified in the input
* picture's ROI field but only when a ROI file is not used.
*
* After passing all of the input frames, the caller should keep calling this
* function with pic_in set to NULL, until no more data is returned in the
* output parameters.

View file

@ -3,6 +3,6 @@
set -eu
. "${0%/*}/util.sh"
valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --tiles=2x2
valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --gop 0 --tiles=2x2
#valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --slices=wpp
#if [ ! -z ${GITLAB_CI+x} ];then valgrind_test 264x130 20 --threads=2 --owf=1 --preset=fast --slices=wpp --no-open-gop; fi