WIP: Merge remote-tracking branch 'remotes/kvz_github/master' into update-cabac-during-search

This commit is contained in:
Joose Sainio 2022-03-21 08:42:41 +02:00
commit 0be443d309
33 changed files with 1885 additions and 1019 deletions

View file

@ -150,11 +150,20 @@ Video structure:
- frametile: Constrain within the tile.
- frametilemargin: Constrain even more.
--roi <filename> : Use a delta QP map for region of interest.
Reads an array of delta QP values from a text
file. The file format is: width and height of
the QP delta map followed by width*height delta
QP values in raster order. The map can be of any
size and will be scaled to the video size.
Reads an array of delta QP values from a file.
Text and binary files are supported and detected
from the file extension (.txt/.bin). If a known
extension is not found, the file is treated as
a text file. The file can include one or many
ROI frames each in the following format:
width and height of the QP delta map followed
by width * height delta QP values in raster
order. In binary format, width and height are
32-bit integers whereas the delta QP values are
signed 8-bit values. The map can be of any size
and will be scaled to the video size. The file
reading will loop if end of the file is reached.
See roi.txt in the examples folder.
--set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.
in PPS and slice_qp_delta in slize header zero.
--(no-)erp-aqp : Use adaptive QP for 360 degree video with

View file

@ -22,8 +22,8 @@ AC_CONFIG_SRCDIR([src/encmain.c])
# - Increment when making new releases and major or minor was not changed since last release.
#
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
ver_major=6
ver_minor=7
ver_major=7
ver_minor=2
ver_release=0
# Prevents configure from adding a lot of defines to the CFLAGS

View file

@ -164,11 +164,20 @@ Constrain movement vectors. [none]
.TP
\fB\-\-roi <filename>
Use a delta QP map for region of interest.
Reads an array of delta QP values from a text
file. The file format is: width and height of
the QP delta map followed by width*height delta
QP values in raster order. The map can be of any
size and will be scaled to the video size.
Reads an array of delta QP values from a file.
Text and binary files are supported and detected
from the file extension (.txt/.bin). If a known
extension is not found, the file is treated as
a text file. The file can include one or many
ROI frames each in the following format:
width and height of the QP delta map followed
by width * height delta QP values in raster
order. In binary format, width and height are
32\-bit integers whereas the delta QP values are
signed 8\-bit values. The map can be of any size
and will be scaled to the video size. The file
reading will loop if end of the file is reached.
See roi.txt in the examples folder.
.TP
\fB\-\-set\-qp\-in\-cu
Set QP at CU level keeping pic_init_qp_minus26.

View file

@ -33,6 +33,7 @@
#include "bitstream.h"
#include <math.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>

View file

@ -70,6 +70,7 @@ void kvz_cabac_start(cabac_data_t * const data)
data->num_buffered_bytes = 0;
data->buffered_byte = 0xff;
data->only_count = 0; // By default, write bits out
data->update = 0;
}
/**
@ -349,26 +350,28 @@ void kvz_cabac_write_coeff_remain(cabac_data_t * const cabac, const uint32_t rem
/**
* \brief
*/
void kvz_cabac_write_unary_max_symbol(cabac_data_t * const data, cabac_ctx_t * const ctx, uint32_t symbol, const int32_t offset, const uint32_t max_symbol)
void kvz_cabac_write_unary_max_symbol(cabac_data_t * const data,
cabac_ctx_t * const ctx,
uint32_t symbol,
const int32_t offset,
const uint32_t max_symbol,
double* bits_out)
{
int8_t code_last = max_symbol > symbol;
assert(symbol <= max_symbol);
if (!max_symbol) return;
data->cur_ctx = ctx;
CABAC_BIN(data, symbol, "ums");
CABAC_FBITS_UPDATE(data, ctx, symbol, *bits_out, "ums");
if (!symbol) return;
while (--symbol) {
//data->cur_ctx = &ctx[offset];
CABAC_BIN(data, 1, "ums");
CABAC_FBITS_UPDATE(data, &ctx[offset], 1, *bits_out, "ums");
}
if (code_last) {
//data->cur_ctx = &ctx[offset];
CABAC_BIN(data, 0, "ums");
CABAC_FBITS_UPDATE(data, &ctx[offset], 0,*bits_out, "ums");
}
}
@ -405,7 +408,7 @@ void kvz_cabac_write_unary_max_symbol_ep(cabac_data_t * const data, unsigned int
/**
* \brief
*/
void kvz_cabac_write_ep_ex_golomb(encoder_state_t * const state,
uint32_t kvz_cabac_write_ep_ex_golomb(encoder_state_t * const state,
cabac_data_t * const data,
uint32_t symbol,
uint32_t count)
@ -426,4 +429,5 @@ void kvz_cabac_write_ep_ex_golomb(encoder_state_t * const state,
num_bins += count;
CABAC_BINS_EP(data, bins, num_bins, "ep_ex_golomb");
return num_bins;
}

View file

@ -59,7 +59,8 @@ typedef struct
uint32_t buffered_byte;
int32_t num_buffered_bytes;
int32_t bits_left;
int8_t only_count;
int8_t only_count : 4;
int8_t update : 4;
bitstream_t *stream;
// CONTEXTS
@ -140,11 +141,11 @@ void kvz_cabac_write(cabac_data_t *data);
void kvz_cabac_finish(cabac_data_t *data);
void kvz_cabac_write_coeff_remain(cabac_data_t *cabac, uint32_t symbol,
uint32_t r_param, const unsigned int cutoff);
void kvz_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data,
uint32_t kvz_cabac_write_ep_ex_golomb(struct encoder_state_t * const state, cabac_data_t *data,
uint32_t symbol, uint32_t count);
void kvz_cabac_write_unary_max_symbol(cabac_data_t *data, cabac_ctx_t *ctx,
uint32_t symbol, int32_t offset,
uint32_t max_symbol);
uint32_t symbol, int32_t offset,
uint32_t max_symbol, double* bits_out);
void kvz_cabac_write_unary_max_symbol_ep(cabac_data_t *data, unsigned int symbol, unsigned int max_symbol);
#define CTX_PROB_BITS 15
@ -153,6 +154,18 @@ void kvz_cabac_write_unary_max_symbol_ep(cabac_data_t *data, unsigned int symbol
#define CTX_MASK_0 (~(~0u << CTX_PROB_BITS_0) << (CTX_PROB_BITS - CTX_PROB_BITS_0))
#define CTX_MASK_1 (~(~0u << CTX_PROB_BITS_1) << (CTX_PROB_BITS - CTX_PROB_BITS_1))
// Floating point fractional bits, derived from kvz_entropy_bits
extern const float kvz_f_entropy_bits[512];
#define CTX_ENTROPY_FBITS(ctx, val) kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
#define CABAC_FBITS_UPDATE(cabac, ctx, val, bits, name) do { \
if((cabac)->only_count) (bits) += kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]; \
if((cabac)->update) {\
(cabac)->cur_ctx = ctx;\
CABAC_BIN((cabac), (val), (name));\
} \
} while(0)
// Macros
#define CTX_GET_STATE(ctx) ( (ctx)->state[0]+(ctx)->state[1] )
#define CTX_STATE(ctx) ( CTX_GET_STATE(ctx)>>8 )

View file

@ -149,9 +149,9 @@ int kvz_config_init(kvz_config *cfg)
cfg->gop_lp_definition.t = 1;
cfg->open_gop = true;
cfg->roi.width = 0;
cfg->roi.height = 0;
cfg->roi.dqps = NULL;
cfg->roi.file_path = NULL;
cfg->roi.format = KVZ_ROI_TXT;
cfg->set_qp_in_cu = false;
cfg->erp_aqp = false;
@ -214,6 +214,9 @@ int kvz_config_init(kvz_config *cfg)
cfg->cclm = 0;
cfg->combine_intra_cus = 1;
cfg->force_inter = 0;
return 1;
}
@ -221,11 +224,11 @@ int kvz_config_destroy(kvz_config *cfg)
{
if (cfg) {
FREE_POINTER(cfg->cqmfile);
FREE_POINTER(cfg->roi.file_path);
FREE_POINTER(cfg->fast_coeff_table_fn);
FREE_POINTER(cfg->tiles_width_split);
FREE_POINTER(cfg->tiles_height_split);
FREE_POINTER(cfg->slice_addresses_in_ts);
FREE_POINTER(cfg->roi.dqps);
FREE_POINTER(cfg->fastrd_learning_outdir_fn);
}
free(cfg);
@ -1295,60 +1298,29 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
}
else if OPT("implicit-rdpcm")
cfg->implicit_rdpcm = (bool)atobool(value);
else if OPT("roi") {
// The ROI description is as follows:
// First number is width, second number is height,
// then follows width * height number of dqp values.
FILE* f = fopen(value, "rb");
if (!f) {
fprintf(stderr, "Could not open ROI file.\n");
static enum kvz_roi_format const formats[] = { KVZ_ROI_TXT, KVZ_ROI_BIN };
static const char * const format_names[] = { "txt", "bin", NULL };
char *roi_file = strdup(value);
if (!roi_file) {
fprintf(stderr, "Failed to allocate memory for ROI file name.\n");
return 0;
}
FREE_POINTER(cfg->roi.file_path);
cfg->roi.file_path = roi_file;
int width = 0;
int height = 0;
if (!fscanf(f, "%d", &width) || !fscanf(f, "%d", &height)) {
fprintf(stderr, "Failed to read ROI size.\n");
fclose(f);
return 0;
// Get file extension or the substring after the last dot
char *maybe_extension = strrchr(cfg->roi.file_path, '.');
if (!maybe_extension) {
cfg->roi.format = KVZ_ROI_TXT;
} else {
maybe_extension++;
int8_t format;
bool unknown_format = !parse_enum(maybe_extension, format_names, &format);
cfg->roi.format = unknown_format ? KVZ_ROI_TXT : formats[format];
}
if (width <= 0 || height <= 0) {
fprintf(stderr, "Invalid ROI size: %dx%d.\n", width, height);
fclose(f);
return 0;
}
if (width > 10000 || height > 10000) {
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
fclose(f);
return 0;
}
const unsigned size = width * height;
int8_t *dqp_array = calloc((size_t)size, sizeof(cfg->roi.dqps[0]));
if (!dqp_array) {
fprintf(stderr, "Failed to allocate memory for ROI table.\n");
fclose(f);
return 0;
}
FREE_POINTER(cfg->roi.dqps);
cfg->roi.dqps = dqp_array;
cfg->roi.width = width;
cfg->roi.height = height;
for (int i = 0; i < size; ++i) {
int number; // Need a pointer to int for fscanf
if (fscanf(f, "%d", &number) != 1) {
fprintf(stderr, "Reading ROI file failed.\n");
fclose(f);
return 0;
}
dqp_array[i] = CLIP(-51, 51, number);
}
fclose(f);
}
else if OPT("set-qp-in-cu") {
cfg->set_qp_in_cu = (bool)atobool(value);
@ -1502,6 +1474,12 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
else if OPT("cclm") {
cfg->cclm = (bool)atobool(value);
}
else if OPT("combine-intra-cus") {
cfg->combine_intra_cus = atobool(value);
}
else if OPT("force-inter") {
cfg->force_inter = atobool(value);
}
else {
return 0;
}

View file

@ -145,6 +145,7 @@ static const struct option long_options[] = {
{ "force-level", required_argument, NULL, 0 },
{ "high-tier", no_argument, NULL, 0 },
{ "me-steps", required_argument, NULL, 0 },
{ "roi-file", required_argument, NULL, 0 },
{ "fast-residual-cost", required_argument, NULL, 0 },
{ "set-qp-in-cu", no_argument, NULL, 0 },
{ "open-gop", no_argument, NULL, 0 },
@ -183,6 +184,10 @@ static const struct option long_options[] = {
{ "no-amvr", no_argument, NULL, 0 },
{ "cclm", no_argument, NULL, 0 },
{ "no-cclm", no_argument, NULL, 0 },
{ "combine-intra-cus", no_argument, NULL, 0 },
{ "no-combine-intra-cus", no_argument, NULL, 0 },
{ "force-inter", no_argument, NULL, 0 },
{ "no-force-inter", no_argument, NULL, 0 },
{0, 0, 0, 0}
};
@ -504,11 +509,20 @@ void print_help(void)
" - frametile: Constrain within the tile.\n"
" - frametilemargin: Constrain even more.\n"
" --roi <filename> : Use a delta QP map for region of interest.\n"
" Reads an array of delta QP values from a text\n"
" file. The file format is: width and height of\n"
" the QP delta map followed by width*height delta\n"
" QP values in raster order. The map can be of any\n"
" size and will be scaled to the video size.\n"
" Reads an array of delta QP values from a file.\n"
" Text and binary files are supported and detected\n"
" from the file extension (.txt/.bin). If a known\n"
" extension is not found, the file is treated as\n"
" a text file. The file can include one or many\n"
" ROI frames each in the following format:\n"
" width and height of the QP delta map followed\n"
" by width * height delta QP values in raster\n"
" order. In binary format, width and height are\n"
" 32-bit integers whereas the delta QP values are\n"
" signed 8-bit values. The map can be of any size\n"
" and will be scaled to the video size. The file\n"
" reading will loop if end of the file is reached.\n"
" See roi.txt in the examples folder.\n"
" --set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.\n"
" in PPS and slice_qp_delta in slize header zero.\n"
" --(no-)erp-aqp : Use adaptive QP for 360 degree video with\n"
@ -594,6 +608,16 @@ void print_help(void)
" --ml-pu-depth-intra : Predict the pu-depth-intra using machine\n"
" learning trees, overrides the\n"
" --pu-depth-intra parameter. [disabled]\n"
" --(no-)combine-intra-cus: Whether the encoder tries to code a cu\n"
" on lower depth even when search is not\n"
" performed on said depth. Should only\n"
" be disabled if cus absolutely must not\n"
" be larger than limited by the search.\n"
" [enabled]"
" --force-inter : Force the encoder to use inter always.\n"
" This is mostly for debugging and is not\n"
" guaranteed to produce sensible bitstream or\n"
" work at all. [disabled]"
" --tr-depth-intra <int> : Transform split depth for intra blocks [0]\n"
" --(no-)bipred : Bi-prediction [disabled]\n"
" --cu-split-termination <string> : CU split search termination [zero]\n"

View file

@ -441,6 +441,7 @@ int main(int argc, char *argv[])
FILE *input = NULL; //!< input file (YUV)
FILE *output = NULL; //!< output file (HEVC NAL stream)
FILE *recout = NULL; //!< reconstructed YUV output, --debug
FILE *roifile = NULL;
clock_t start_time = clock();
clock_t encoding_start_cpu_time;
KVZ_CLOCK_T encoding_start_real_time;
@ -584,7 +585,7 @@ int main(int argc, char *argv[])
// Give arguments via struct to the input thread
input_handler_args in_args = {
.available_input_slots = available_input_slots,
.filled_input_slots = filled_input_slots,
.filled_input_slots = filled_input_slots,
.input = input,
.api = api,
@ -825,6 +826,7 @@ done:
if (input) fclose(input);
if (output) fclose(output);
if (recout) fclose(recout);
if (roifile) fclose(roifile);
DBG_YUVIEW_CLEANUP();
CHECKPOINTS_FINALIZE();

View file

@ -581,7 +581,7 @@ static void encode_transform_coeff(encoder_state_t * const state,
// cu_qp_delta_abs prefix
cabac->cur_ctx = &cabac->ctx.cu_qp_delta_abs[0];
kvz_cabac_write_unary_max_symbol(cabac, cabac->ctx.cu_qp_delta_abs, MIN(qp_delta_abs, 5), 1, 5);
kvz_cabac_write_unary_max_symbol(cabac, cabac->ctx.cu_qp_delta_abs, MIN(qp_delta_abs, 5), 1, 5, NULL);
if (qp_delta_abs >= 5) {
// cu_qp_delta_abs suffix
@ -610,17 +610,19 @@ static void encode_transform_coeff(encoder_state_t * const state,
* \param depth Depth from LCU.
* \return if non-zero mvd is coded
*/
static bool encode_inter_prediction_unit(encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int x, int y, int width, int height,
int depth)
int kvz_encode_inter_prediction_unit(encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int x, int y, int width, int height,
int depth, lcu_t* lcu, double* bits_out)
{
// Mergeflag
int16_t num_cand = 0;
bool non_zero_mvd = false;
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
CABAC_BIN(cabac, cur_cu->merged, "MergeFlag");
double bits = 0;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), cur_cu->merged, bits, "MergeFlag");
num_cand = state->encoder_control->cfg.max_merge;
if (cur_cu->merged) { //merge
if (num_cand > 1) {
@ -628,10 +630,10 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state,
for (ui = 0; ui < num_cand - 1; ui++) {
int32_t symbol = (ui != cur_cu->merge_idx);
if (ui == 0) {
cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
CABAC_BIN(cabac, symbol, "MergeIndex");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
} else {
CABAC_BIN_EP(cabac,symbol,"MergeIndex");
if(cabac->only_count) bits += 1;
}
if (symbol == 0) break;
}
@ -650,12 +652,10 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state,
if (cur_cu->part_size == SIZE_2Nx2N || (LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4
uint32_t inter_dir_ctx = (7 - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height) + 1) >> 1));
cabac->cur_ctx = &(cabac->ctx.inter_dir[inter_dir_ctx]);
CABAC_BIN(cabac, (inter_dir == 3), "inter_pred_idc");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[inter_dir_ctx]), (inter_dir == 3), bits, "inter_pred_idc");
}
if (inter_dir < 3) {
cabac->cur_ctx = &(cabac->ctx.inter_dir[5]);
CABAC_BIN(cabac, (inter_dir == 2), "inter_pred_idc");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[5]), (inter_dir == 2), bits, "inter_pred_idc");
}
}
@ -674,20 +674,21 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state,
if (ref_LX_size > 1) {
// parseRefFrmIdx
int32_t ref_frame = cur_cu->inter.mv_ref[ref_list_idx];
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
CABAC_BIN(cabac, (ref_frame > 0), "ref_idx_lX");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[0]), (ref_frame != 0), bits, "ref_idx_lX");
if (ref_frame > 0 && ref_LX_size > 2) {
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
CABAC_BIN(cabac, (ref_frame > 1), "ref_idx_lX");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[1]), (ref_frame > 1), bits, "ref_idx_lX");
if (ref_frame > 1 && ref_LX_size > 3) {
for (int idx = 3; idx < ref_LX_size; idx++)
{
uint8_t val = (ref_frame > idx - 1) ? 1 : 0;
CABAC_BIN_EP(cabac, val, "ref_idx_lX");
if (cabac->only_count) bits += 1;
if (!val) break;
}
}
}
@ -697,28 +698,37 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state,
if (state->frame->ref_list != REF_PIC_LIST_1 || cur_cu->inter.mv_dir != 3) {
mv_t mv_cand[2][2];
kvz_inter_get_mv_cand_cua(
if (lcu) {
kvz_inter_get_mv_cand(
state,
x, y, width, height,
mv_cand, cur_cu,
lcu, ref_list_idx);
}
else {
kvz_inter_get_mv_cand_cua(
state,
x, y, width, height,
mv_cand, cur_cu, ref_list_idx);
mv_cand, cur_cu, ref_list_idx
);
}
uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx);
mv_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0];
mv_t mvd_ver = cur_cu->inter.mv[ref_list_idx][1] - mv_cand[cu_mv_cand][1];
kvz_change_precision(INTERNAL_MV_PREC, kvz_g_imv_to_prec[KVZ_IMV_OFF], &mvd_hor, &mvd_ver);
kvz_encode_mvd(state, cabac, mvd_hor, mvd_ver);
kvz_encode_mvd(state, cabac, mvd_hor, mvd_ver, bits_out);
non_zero_mvd |= (mvd_hor != 0) || (mvd_ver != 0);
}
// Signal which candidate MV to use
cabac->cur_ctx = &(cabac->ctx.mvp_idx_model);
CABAC_BIN(cabac, CU_GET_MV_CAND(cur_cu, ref_list_idx), "mvp_flag");
CABAC_FBITS_UPDATE(cabac,&(cabac->ctx.mvp_idx_model), CU_GET_MV_CAND(cur_cu, ref_list_idx), bits, "mvp_flag");
} // for ref_list
} // if !merge
if(bits_out) *bits_out += bits;
return non_zero_mvd;
}
@ -807,7 +817,7 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c
static void encode_intra_coding_unit(encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int x, int y, int depth, lcu_coeff_t* coeff)
int x, int y, int depth, lcu_t* lcu, lcu_coeff_t* coeff, double* bits_out)
{
const videoframe_t * const frame = state->tile->frame;
uint8_t intra_pred_mode_actual[4];
@ -1050,6 +1060,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
kvz_cabac_encode_trunc_bin(cabac, tmp_pred, 67 - INTRA_MPM_COUNT);
}
if (cabac->only_count && bits_out) *bits_out += 5;
}
}
@ -1057,14 +1068,17 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth != 4) {
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
}
// if we are counting bits, the cost for transform coeffs is done separately
// To get the distortion at the same time
if (!cabac->only_count) {
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 0, coeff);
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 0, coeff);
encode_mts_idx(state, cabac, cur_cu);
encode_mts_idx(state, cabac, cur_cu);
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth == 4 && x % 8 && y % 8) {
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff);
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth == 4 && x % 8 && y % 8) {
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff);
}
}
}
@ -1105,32 +1119,32 @@ static void encode_part_mode(encoder_state_t * const state,
// log2CbSize == MinCbLog2SizeY | 0 1 2 bypass
// log2CbSize > MinCbLog2SizeY | 0 1 3 bypass
// ------------------------------+------------------
double bits = 0;
if (cur_cu->type == CU_INTRA) {
if (depth == MAX_DEPTH) {
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
if (cur_cu->part_size == SIZE_2Nx2N) {
CABAC_BIN(cabac, 1, "part_mode 2Nx2N");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
} else {
CABAC_BIN(cabac, 0, "part_mode NxN");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode NxN");
}
}
} else {
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
if (cur_cu->part_size == SIZE_2Nx2N) {
CABAC_BIN(cabac, 1, "part_mode 2Nx2N");
return;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
return bits;
}
CABAC_BIN(cabac, 0, "part_mode split");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode split");
cabac->cur_ctx = &(cabac->ctx.part_size_model[1]);
if (cur_cu->part_size == SIZE_2NxN ||
cur_cu->part_size == SIZE_2NxnU ||
cur_cu->part_size == SIZE_2NxnD) {
CABAC_BIN(cabac, 1, "part_mode vertical");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 1, bits, "part_mode vertical");
} else {
CABAC_BIN(cabac, 0, "part_mode horizontal");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 0, bits, "part_mode horizontal");
}
if (state->encoder_control->cfg.amp_enable && depth < MAX_DEPTH) {
@ -1138,19 +1152,22 @@ static void encode_part_mode(encoder_state_t * const state,
if (cur_cu->part_size == SIZE_2NxN ||
cur_cu->part_size == SIZE_Nx2N) {
CABAC_BIN(cabac, 1, "part_mode SMP");
return;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 1, bits, "part_mode SMP");
return bits;
}
CABAC_BIN(cabac, 0, "part_mode AMP");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 0, bits, "part_mode AMP");
if (cur_cu->part_size == SIZE_2NxnU ||
cur_cu->part_size == SIZE_nLx2N) {
CABAC_BINS_EP(cabac, 0, 1, "part_mode AMP");
if(cabac->only_count) bits += 1;
} else {
CABAC_BINS_EP(cabac, 1, 1, "part_mode AMP");
if(cabac->only_count) bits += 1;
}
}
}
return bits;
}
**/
@ -1191,7 +1208,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + half_cu;
bool border = border_x || border_y; /*!< are we in any border CU */
if (depth <= ctrl->max_qp_delta_depth) {
if (depth <= state->frame->max_qp_delta_depth) {
state->must_code_qp_delta = true;
}
@ -1456,7 +1473,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i);
const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y);
non_zero_mvd |= encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth);
non_zero_mvd |= kvz_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, NULL, NULL);
DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu);
kvz_hmvp_add_mv(state, x, y, pu_w, pu_h, cur_pu);
}
@ -1494,7 +1511,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
}
} else if (cur_cu->type == CU_INTRA) {
encode_intra_coding_unit(state, cabac, cur_cu, x, y, depth, coeff);
encode_intra_coding_unit(state, cabac, cur_cu, x, y, depth, NULL, coeff, NULL);
}
else {
@ -1511,11 +1528,128 @@ end:
}
double kvz_mock_encode_coding_unit(
encoder_state_t* const state,
cabac_data_t* cabac,
int x, int y, int depth,
lcu_t* lcu, cu_info_t* cur_cu) {
double bits = 0;
const encoder_control_t* const ctrl = state->encoder_control;
int x_local = SUB_SCU(x);
int y_local = SUB_SCU(y);
const int cu_width = LCU_WIDTH >> depth;
const cu_info_t* left_cu = NULL, *above_cu = NULL;
if (x) {
left_cu = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local);
}
if (y) {
above_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local-1);
}
uint8_t split_model = 0;
// Absolute coordinates
uint16_t abs_x = x + state->tile->offset_x;
uint16_t abs_y = y + state->tile->offset_y;
// Check for slice border
bool border_x = ctrl->in.width < abs_x + cu_width;
bool border_y = ctrl->in.height < abs_y + cu_width;
bool border = border_x || border_y; /*!< are we in any border CU */
if (depth <= state->frame->max_qp_delta_depth) {
state->must_code_qp_delta = true;
}
// When not in MAX_DEPTH, insert split flag and split the blocks if needed
if (depth != MAX_DEPTH) {
// Implicit split flag when on border
if (!border) {
// Get left and top block split_flags and if they are present and true, increase model number
if (left_cu && GET_SPLITDATA(left_cu, depth) == 1) {
split_model++;
}
if (above_cu && GET_SPLITDATA(above_cu, depth) == 1) {
split_model++;
}
// This mocks encoding the current CU so it should be never split
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.split_flag_model[split_model]), 0, bits, "SplitFlag");
}
}
// Encode skip flag
if (state->frame->slicetype != KVZ_SLICE_I) {
int8_t ctx_skip = 0;
if (left_cu && left_cu->skipped) {
ctx_skip++;
}
if (above_cu && above_cu->skipped) {
ctx_skip++;
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_skip_flag_model[ctx_skip]), cur_cu->skipped, bits, "SkipFlag");
if (cur_cu->skipped) {
int16_t num_cand = state->encoder_control->cfg.max_merge;
if (num_cand > 1) {
for (int ui = 0; ui < num_cand - 1; ui++) {
int32_t symbol = (ui != cur_cu->merge_idx);
if (ui == 0) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
}
else {
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
if(cabac->only_count) bits += 1;
}
if (symbol == 0) {
break;
}
}
}
return bits;
}
}
// Prediction mode
if (state->frame->slicetype != KVZ_SLICE_I && cu_width != 4) {
int8_t ctx_predmode = 0;
if ((left_cu && left_cu->type == CU_INTRA) || (above_cu && above_cu->type == CU_INTRA)) {
ctx_predmode = 1;
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_pred_mode_model[ctx_predmode]), (cur_cu->type == CU_INTRA), bits, "PredMode");
}
if (cur_cu->type == CU_INTER) {
const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size];
for (int i = 0; i < num_pu; ++i) {
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i);
const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i);
const int pu_w = PU_GET_W(cur_cu->part_size, cu_width, i);
const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i);
const cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
kvz_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, lcu, &bits);
}
}
else if (cur_cu->type == CU_INTRA) {
encode_intra_coding_unit(state, cabac, cur_cu, x, y, depth, lcu, NULL, &bits);
}
return bits;
}
void kvz_encode_mvd(encoder_state_t * const state,
cabac_data_t *cabac,
int32_t mvd_hor,
int32_t mvd_ver)
int32_t mvd_ver, double* bits_out)
{
const int8_t hor_abs_gr0 = mvd_hor != 0;
const int8_t ver_abs_gr0 = mvd_ver != 0;
@ -1523,29 +1657,33 @@ void kvz_encode_mvd(encoder_state_t * const state,
const uint32_t mvd_ver_abs = abs(mvd_ver);
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[0];
CABAC_BIN(cabac, (mvd_hor != 0), "abs_mvd_greater0_flag_hor");
CABAC_BIN(cabac, (mvd_ver != 0), "abs_mvd_greater0_flag_ver");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), *bits_out, "abs_mvd_greater0_flag_hor");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), *bits_out, "abs_mvd_greater0_flag_ver");
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[1];
if (hor_abs_gr0) {
CABAC_BIN(cabac, (mvd_hor_abs>1), "abs_mvd_greater1_flag_hor");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), *bits_out,"abs_mvd_greater1_flag_hor");
}
if (ver_abs_gr0) {
CABAC_BIN(cabac, (mvd_ver_abs>1), "abs_mvd_greater1_flag_ver");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), *bits_out, "abs_mvd_greater1_flag_ver");
}
if (hor_abs_gr0) {
if (mvd_hor_abs > 1) {
kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1);
uint32_t bits = kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1);
if(cabac->only_count) *bits_out += bits;
}
uint32_t mvd_hor_sign = (mvd_hor > 0) ? 0 : 1;
CABAC_BIN_EP(cabac, mvd_hor_sign, "mvd_sign_flag_hor");
if (cabac->only_count) *bits_out += 1;
}
if (ver_abs_gr0) {
if (mvd_ver_abs > 1) {
kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1);
uint32_t bits = kvz_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1);
if (cabac->only_count) *bits_out += bits;
}
uint32_t mvd_ver_sign = mvd_ver > 0 ? 0 : 1;
CABAC_BIN_EP(cabac, mvd_ver_sign, "mvd_sign_flag_ver");
if (cabac->only_count) *bits_out += 1;
}
}

View file

@ -56,7 +56,22 @@ void kvz_encode_ts_residual(encoder_state_t* const state,
void kvz_encode_mvd(encoder_state_t * const state,
cabac_data_t *cabac,
int32_t mvd_hor,
int32_t mvd_ver);
int32_t mvd_ver,
double* bits_out);
double kvz_mock_encode_coding_unit(
encoder_state_t* const state,
cabac_data_t* cabac,
int x, int y, int depth,
lcu_t* lcu, cu_info_t* cur_cu);
int kvz_encode_inter_prediction_unit(encoder_state_t* const state,
cabac_data_t* const cabac,
const cu_info_t* const cur_cu,
int x, int y, int width, int height,
int depth,
lcu_t* lcu,
double* bits_out);
void kvz_encode_last_significant_xy(cabac_data_t * const cabac,
uint8_t lastpos_x, uint8_t lastpos_y,

View file

@ -32,7 +32,6 @@
#include "encoder.h"
// This define is required for M_PI on Windows.
#define _USE_MATH_DEFINES
#include <math.h>
#include <stdio.h>
@ -45,14 +44,6 @@
#include "kvz_math.h"
#include "fast_coeff_cost.h"
/**
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
*
* Determined empirically.
*/
static const double ERP_AQP_STRENGTH = 3.0;
static int encoder_control_init_gop_layer_weights(encoder_control_t * const);
static unsigned cfg_num_threads(void)
@ -136,22 +127,6 @@ static int get_max_parallelism(const encoder_control_t *const encoder)
}
/**
* \brief Return weight for 360 degree ERP video
*
* Returns the scaling factor of area from equirectangular projection to
* spherical surface.
*
* \param y y-coordinate of the pixel
* \param h height of the picture
*/
static double ws_weight(int y, int h)
{
return cos((y - 0.5 * h + 0.5) * (M_PI / h));
}
/**
* \brief Update ROI QPs for 360 video with equirectangular projection.
*
@ -162,55 +137,6 @@ static double ws_weight(int y, int h)
* \param orig_width width of orig_roi
* \param orig_height height of orig_roi
*/
static void init_erp_aqp_roi(encoder_control_t* encoder,
int8_t *orig_roi,
int32_t orig_width,
int32_t orig_height)
{
// Update ROI with WS-PSNR delta QPs.
int height = encoder->in.height_in_lcu;
int width = orig_roi ? orig_width : 1;
int frame_height = encoder->in.real_height;
encoder->cfg.roi.width = width;
encoder->cfg.roi.height = height;
encoder->cfg.roi.dqps = calloc(width * height, sizeof(orig_roi[0]));
double total_weight = 0.0;
for (int y = 0; y < frame_height; y++) {
total_weight += ws_weight(y, frame_height);
}
for (int y_lcu = 0; y_lcu < height; y_lcu++) {
int y_orig = LCU_WIDTH * y_lcu;
int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig);
double lcu_weight = 0.0;
for (int y = y_orig; y < y_orig + lcu_height; y++) {
lcu_weight += ws_weight(y, frame_height);
}
// Normalize.
lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height);
int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight));
if (orig_roi) {
// If a ROI array already exists, we copy the existing values to the
// new array while adding qp_delta to each.
int y_roi = y_lcu * orig_height / height;
for (int x = 0; x < width; x++) {
encoder->cfg.roi.dqps[x + y_lcu * width] =
CLIP(-51, 51, orig_roi[x + y_roi * width] + qp_delta);
}
} else {
// Otherwise, simply write qp_delta to the ROI array.
encoder->cfg.roi.dqps[y_lcu] = qp_delta;
}
}
}
static int8_t* derive_chroma_QP_mapping_table(const kvz_config* const cfg, int i)
{
@ -394,6 +320,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg)
encoder->scaling_list.use_default_list = 1;
}
// ROI / delta QP
if (cfg->roi.file_path) {
const char *mode[2] = { "r", "rb" };
encoder->roi_file = fopen(cfg->roi.file_path, mode[cfg->roi.format]);
if (!encoder->roi_file) {
fprintf(stderr, "Could not open ROI file.\n");
goto init_failed;
}
}
if (cfg->fast_coeff_table_fn) {
FILE *fast_coeff_table_f = fopen(cfg->fast_coeff_table_fn, "rb");
if (fast_coeff_table_f == NULL) {
@ -435,32 +371,10 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg)
goto init_failed;
}
if (cfg->erp_aqp) {
init_erp_aqp_roi(encoder,
cfg->roi.dqps,
cfg->roi.width,
cfg->roi.height);
} else if (cfg->roi.dqps) {
// Copy delta QP array for ROI coding.
const size_t roi_size = encoder->cfg.roi.width * encoder->cfg.roi.height;
encoder->cfg.roi.dqps = calloc(roi_size, sizeof(cfg->roi.dqps[0]));
memcpy(encoder->cfg.roi.dqps,
cfg->roi.dqps,
roi_size * sizeof(*cfg->roi.dqps));
}
// NOTE: When tr_depth_inter is equal to 0, the transform is still split
// for SMP and AMP partition units.
encoder->tr_depth_inter = 0;
if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps || encoder->cfg.set_qp_in_cu || encoder->cfg.vaq) {
encoder->max_qp_delta_depth = 0;
} else {
encoder->max_qp_delta_depth = -1;
}
//Tiles
encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 ||
encoder->cfg.tiles_height_count > 1;
@ -761,7 +675,7 @@ void kvz_encoder_control_free(encoder_control_t *const encoder)
FREE_POINTER(encoder->tiles_tile_id);
FREE_POINTER(encoder->cfg.roi.dqps);
FREE_POINTER(encoder->cfg.roi.file_path);
kvz_scalinglist_destroy(&encoder->scaling_list);
@ -773,6 +687,10 @@ void kvz_encoder_control_free(encoder_control_t *const encoder)
kvz_close_rdcost_outfiles();
if (encoder->roi_file) {
fclose(encoder->roi_file);
}
free(encoder);
}

View file

@ -130,7 +130,7 @@ typedef struct encoder_control_t
//! Picture weights when GOP is used.
double gop_layer_weights[MAX_GOP_LAYERS];
int8_t max_qp_delta_depth;
FILE *roi_file;
int tr_depth_inter;

View file

@ -805,10 +805,10 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
WRITE_U(stream, 0, 1, "pps_ref_wraparound_enabled_flag");
WRITE_SE(stream, ((int8_t)encoder->cfg.qp) - 26, "pps_init_qp_minus26");
WRITE_U(stream, encoder->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");
if (encoder->max_qp_delta_depth >= 0) {
WRITE_U(stream, state->frame->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");
if (state->frame->max_qp_delta_depth >= 0) {
// Use separate QP for each LCU when rate control is enabled.
WRITE_UE(stream, encoder->max_qp_delta_depth, "diff_cu_qp_delta_depth");
WRITE_UE(stream, state->frame->max_qp_delta_depth, "diff_cu_qp_delta_depth");
}
WRITE_U(stream, 0,1, "pps_chroma_tool_offsets_present_flag");

View file

@ -32,6 +32,9 @@
#include "encoderstate.h"
// This define is required for M_PI on Windows.
#define _USE_MATH_DEFINES
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@ -53,6 +56,13 @@
#include "strategies/strategies-picture.h"
/**
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
*
* Determined empirically.
*/
static const double ERP_AQP_STRENGTH = 3.0;
int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) {
int i;
@ -572,7 +582,7 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y);
const int cu_width = LCU_WIDTH >> depth;
if (depth <= state->encoder_control->max_qp_delta_depth) {
if (depth <= state->frame->max_qp_delta_depth) {
*prev_qp = -1;
}
@ -665,7 +675,7 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
if (encoder->max_qp_delta_depth >= 0) {
if (state->frame->max_qp_delta_depth >= 0) {
int last_qp = state->last_qp;
int prev_qp = -1;
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
@ -716,6 +726,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
const uint64_t existing_bits = kvz_bitstream_tell(&state->stream);
//Encode SAO
state->cabac.update = 1;
if (encoder->cfg.sao_type) {
encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
}
@ -771,6 +782,7 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
kvz_cabac_start(&state->cabac);
}
}
state->cabac.update = 0;
pthread_mutex_lock(&state->frame->rc_lock);
@ -1421,6 +1433,154 @@ static bool edge_lcu(int id, int lcus_x, int lcus_y, bool xdiv64, bool ydiv64)
}
}
/**
* \brief Return weight for 360 degree ERP video
*
* Returns the scaling factor of area from equirectangular projection to
* spherical surface.
*
* \param y y-coordinate of the pixel
* \param h height of the picture
*/
static double ws_weight(int y, int h)
{
return cos((y - 0.5 * h + 0.5) * (M_PI / h));
}
/**
* \brief Update ROI QPs for 360 video with equirectangular projection.
*
* Updates the ROI parameters in frame->roi.
*
* \param encoder encoder control
* \param frame frame that will have the ROI map
*/
static void init_erp_aqp_roi(const encoder_control_t *encoder, kvz_picture *frame)
{
int8_t *orig_roi = frame->roi.roi_array;
int32_t orig_width = frame->roi.width;
int32_t orig_height = frame->roi.height;
// Update ROI with WS-PSNR delta QPs.
int new_height = encoder->in.height_in_lcu;
int new_width = orig_roi ? orig_width : 1;
int8_t *new_array = calloc(new_width * new_height, sizeof(orig_roi[0]));
int frame_height = encoder->in.real_height;
double total_weight = 0.0;
for (int y = 0; y < frame_height; y++) {
total_weight += ws_weight(y, frame_height);
}
for (int y_lcu = 0; y_lcu < new_height; y_lcu++) {
int y_orig = LCU_WIDTH * y_lcu;
int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig);
double lcu_weight = 0.0;
for (int y = y_orig; y < y_orig + lcu_height; y++) {
lcu_weight += ws_weight(y, frame_height);
}
// Normalize.
lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height);
int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight));
if (orig_roi) {
// If a ROI array already exists, we copy the existing values to the
// new array while adding qp_delta to each.
int y_roi = y_lcu * orig_height / new_height;
for (int x = 0; x < new_width; x++) {
new_array[x + y_lcu * new_width] =
CLIP(-51, 51, orig_roi[x + y_roi * new_width] + qp_delta);
}
} else {
// Otherwise, simply write qp_delta to the ROI array.
new_array[y_lcu] = qp_delta;
}
}
// Update new values
frame->roi.width = new_width;
frame->roi.height = new_height;
frame->roi.roi_array = new_array;
FREE_POINTER(orig_roi);
}
static void next_roi_frame_from_file(kvz_picture *frame, FILE *file, enum kvz_roi_format format) {
// The ROI description is as follows:
// First number is width, second number is height,
// then follows width * height number of dqp values.
// Rewind the (seekable) ROI file when end of file is reached.
// Allows a single ROI frame to be used for a whole sequence
// and looping with --loop-input. Skips possible whitespace.
if (ftell(file) != -1L) {
int c = fgetc(file);
while (format == KVZ_ROI_TXT && isspace(c)) c = fgetc(file);
ungetc(c, file);
if (c == EOF) rewind(file);
}
int *width = &frame->roi.width;
int *height = &frame->roi.height;
bool failed = false;
if (format == KVZ_ROI_TXT) failed = !fscanf(file, "%d", width) || !fscanf(file, "%d", height);
if (format == KVZ_ROI_BIN) failed = fread(&frame->roi, 4, 2, file) != 2;
if (failed) {
fprintf(stderr, "Failed to read ROI size.\n");
fclose(file);
assert(0);
}
if (*width <= 0 || *height <= 0) {
fprintf(stderr, "Invalid ROI size: %dx%d.\n", *width, *height);
fclose(file);
assert(0);
}
if (*width > 10000 || *height > 10000) {
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
fclose(file);
assert(0);
}
const unsigned size = (*width) * (*height);
int8_t *dqp_array = calloc((size_t)size, sizeof(frame->roi.roi_array[0]));
if (!dqp_array) {
fprintf(stderr, "Failed to allocate memory for ROI table.\n");
fclose(file);
assert(0);
}
FREE_POINTER(frame->roi.roi_array);
frame->roi.roi_array = dqp_array;
if (format == KVZ_ROI_TXT) {
for (int i = 0; i < size; ++i) {
int number; // Need a pointer to int for fscanf
if (fscanf(file, "%d", &number) != 1) {
fprintf(stderr, "Reading ROI file failed.\n");
fclose(file);
assert(0);
}
dqp_array[i] = CLIP(-51, 51, number);
}
} else if (format == KVZ_ROI_BIN) {
if (fread(dqp_array, 1, size, file) != size) {
fprintf(stderr, "Reading ROI file failed.\n");
assert(0);
}
}
}
static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_picture* frame) {
assert(state->type == ENCODER_STATE_TYPE_MAIN);
@ -1437,6 +1597,21 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
}
// ROI / delta QP maps
if (frame->roi.roi_array && cfg->roi.file_path) {
assert(0 && "Conflict: Other ROI data was supplied when a ROI file was specified.");
}
// Read frame from the file. If no file is specified,
// ROI data should be already set by the application.
if (cfg->roi.file_path) {
next_roi_frame_from_file(frame, state->encoder_control->roi_file, cfg->roi.format);
}
if (cfg->erp_aqp) {
init_erp_aqp_roi(state->encoder_control, state->tile->frame->source);
}
// Variance adaptive quantization
if (cfg->vaq) {
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
@ -1523,6 +1698,12 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict
}
// Variance adaptive quantization - END
if (cfg->target_bitrate > 0 || frame->roi.roi_array || cfg->set_qp_in_cu || cfg->vaq) {
state->frame->max_qp_delta_depth = 0;
} else {
state->frame->max_qp_delta_depth = -1;
}
// Use this flag to handle closed gop irap picture selection.
// If set to true, irap is already set and we avoid
// setting it based on the intra period
@ -1834,10 +2015,9 @@ lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
{
const encoder_control_t *ctrl = state->encoder_control;
const cu_array_t *cua = state->tile->frame->cu_array;
// Quantization group width
const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth);
const int qg_width = LCU_WIDTH >> MIN(state->frame->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth);
// Coordinates of the top-left corner of the quantization group
const int x_qg = x & ~(qg_width - 1);

View file

@ -179,6 +179,8 @@ typedef struct encoder_state_config_frame_t {
*/
double *aq_offsets;
int8_t max_qp_delta_depth;
/**
* \brief Whether next NAL is the first NAL in the access unit.
*/
@ -320,6 +322,7 @@ typedef struct encoder_state_t {
bitstream_t stream;
cabac_data_t cabac;
cabac_data_t search_cabac;
uint32_t stats_bitstream_length; //Bitstream length written in bytes
@ -402,10 +405,10 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
*/
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth)
{
if (state->encoder_control->max_qp_delta_depth < 0) return false;
if (state->frame->max_qp_delta_depth < 0) return false;
const int cu_width = LCU_WIDTH >> depth;
const int qg_width = LCU_WIDTH >> state->encoder_control->max_qp_delta_depth;
const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth;
const int right = x + cu_width;
const int bottom = y + cu_width;
return (right % qg_width == 0 || right >= state->tile->frame->width) &&

View file

@ -40,7 +40,7 @@ static uint16_t to_q88(float f)
return (uint16_t)(f * 256.0f + 0.5f);
}
static uint64_t to_4xq88(const float f[4])
static uint64_t to_4xq88(const double f[4])
{
int i;
uint64_t result = 0;
@ -58,9 +58,9 @@ int kvz_fast_coeff_table_parse(fast_coeff_table_t *fast_coeff_table, FILE *fast_
uint64_t *wts_by_qp = fast_coeff_table->wts_by_qp;
for (i = 0; i < MAX_FAST_COEFF_COST_QP; i++) {
float curr_wts[4];
double curr_wts[4];
if (fscanf(fast_coeff_table_f, "%f %f %f %f\n", curr_wts + 0,
if (fscanf(fast_coeff_table_f, "%lf %lf %lf %lf\n", curr_wts + 0,
curr_wts + 1,
curr_wts + 2,
curr_wts + 3) != 4) {

View file

@ -45,7 +45,7 @@ typedef struct {
// Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from
// 0 to MAX_FAST_COEFF_COST_QP
static const float default_fast_coeff_cost_wts[][4] = {
static const double default_fast_coeff_cost_wts[][4] = {
// Just extend it by stretching the first actual values..
{0.164240f, 4.161530f, 3.509033f, 6.928047f},
{0.164240f, 4.161530f, 3.509033f, 6.928047f},

View file

@ -339,7 +339,7 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir)
static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir)
{
if (state->encoder_control->max_qp_delta_depth < 0) {
if (state->frame->max_qp_delta_depth < 0) {
return state->qp;
}

View file

@ -106,6 +106,10 @@ kvz_picture * kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_
im->interlacing = KVZ_INTERLACING_NONE;
im->roi.roi_array = NULL;
im->roi.width = 0;
im->roi.height = 0;
return im;
}
@ -132,6 +136,7 @@ void kvz_image_free(kvz_picture *const im)
kvz_image_free(im->base_image);
} else {
free(im->fulldata_buf);
if (im->roi.roi_array) FREE_POINTER(im->roi.roi_array);
}
// Make sure freed data won't be used.
@ -192,6 +197,8 @@ kvz_picture *kvz_image_make_subimage(kvz_picture *const orig_image,
im->pts = 0;
im->dts = 0;
im->roi = orig_image->roi;
return im;
}

View file

@ -1290,7 +1290,7 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
int32_t width,
int32_t height,
const merge_candidates_t *merge_cand,
const cu_info_t *cur_cu,
const cu_info_t * const cur_cu,
int8_t reflist,
mv_t mv_cand[2][2])
{
@ -1396,7 +1396,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
int32_t width,
int32_t height,
mv_t mv_cand[2][2],
cu_info_t* cur_cu,
const cu_info_t * const cur_cu,
lcu_t *lcu,
int8_t reflist)
{

View file

@ -96,7 +96,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
int32_t width,
int32_t height,
mv_t mv_cand[2][2],
cu_info_t* cur_cu,
const cu_info_t* cur_cu,
lcu_t *lcu,
int8_t reflist);

View file

@ -267,6 +267,12 @@ enum kvz_amvr_resolution
KVZ_IMV_HPEL = 3
};
enum kvz_roi_format
{
KVZ_ROI_TXT = 0,
KVZ_ROI_BIN = 1
};
// Map from input format to chroma format.
#define KVZ_FORMAT2CSP(format) ((enum kvz_chroma_format)format)
@ -410,10 +416,9 @@ typedef struct kvz_config
int32_t implicit_rdpcm; /*!< \brief Enable implicit residual DPCM. */
struct {
int32_t width;
int32_t height;
int8_t *dqps;
} roi; /*!< \since 3.14.0 \brief Map of delta QPs for region of interest coding. */
char *file_path;
enum kvz_roi_format format;
} roi; /*!< \brief Specify delta QPs for region of interest coding. */
unsigned slices; /*!< \since 3.15.0 \brief How to map slices to frame. */
@ -526,6 +531,12 @@ typedef struct kvz_config
int8_t cclm;
int8_t amvr; /* \brief Adaptive motion vector resolution parameter */
/** \brief whether to try combining intra cus at the lower depth when search
* is not performed at said depth*/
uint8_t combine_intra_cus;
uint8_t force_inter;
} kvz_config;
/**
@ -557,6 +568,14 @@ typedef struct kvz_picture {
enum kvz_chroma_format chroma_format;
int32_t ref_pocs[16];
struct
{
int width;
int height;
int8_t *roi_array;
} roi;
} kvz_picture;
/**
@ -782,6 +801,9 @@ typedef struct kvz_api {
* the bitstream, length of the bitstream, the reconstructed frame, the
* original frame and frame info in data_out, len_out, pic_out, src_out and
* info_out, respectively. Otherwise, set the output parameters to NULL.
*
* Region of interest (ROI) / delta QP map can be specified in the input
* picture's ROI field but only when a ROI file is not used.
*
* After passing all of the input frames, the caller should keep calling this
* function with pic_in set to NULL, until no more data is returned in the

View file

@ -1088,17 +1088,20 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
const encoder_control_t * const ctrl = state->encoder_control;
lcu_stats_t *lcu = kvz_get_lcu_stats(state, pos.x, pos.y);
if (ctrl->cfg.roi.dqps != NULL) {
vector2d_t lcu = {
if (state->tile->frame->source->roi.roi_array) {
vector2d_t lcu_vec = {
pos.x + state->tile->lcu_offset_x,
pos.y + state->tile->lcu_offset_y
};
vector2d_t roi = {
lcu.x * ctrl->cfg.roi.width / ctrl->in.width_in_lcu,
lcu.y * ctrl->cfg.roi.height / ctrl->in.height_in_lcu
lcu_vec.x * state->tile->frame->source->roi.width / ctrl->in.width_in_lcu,
lcu_vec.y * state->tile->frame->source->roi.height / ctrl->in.height_in_lcu
};
int roi_index = roi.x + roi.y * ctrl->cfg.roi.width;
int dqp = ctrl->cfg.roi.dqps[roi_index];
int roi_index = roi.x + roi.y * state->tile->frame->source->roi.width;
int dqp = state->tile->frame->source->roi.roi_array[roi_index];
if(dqp != 0) {
pos.x = 0;
}
state->qp = CLIP_TO_QP(state->frame->QP + dqp);
state->lambda = qp_to_lambda(state, state->qp);
state->lambda_sqrt = sqrt(state->lambda);

View file

@ -315,12 +315,12 @@ static INLINE uint32_t get_coeff_cabac_cost(
// Take a copy of the CABAC so that we don't overwrite the contexts when
// counting the bits.
cabac_data_t cabac_copy;
memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy));
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
// Clear bytes and bits and set mode to "count"
cabac_copy.only_count = 1;
cabac_copy.num_buffered_bytes = 0;
cabac_copy.bits_left = 23;
int num_buffered_bytes = cabac_copy.num_buffered_bytes;
int bits_left = cabac_copy.bits_left;
// Execute the coding function.
// It is safe to drop the const modifier since state won't be modified
@ -343,8 +343,10 @@ static INLINE uint32_t get_coeff_cabac_cost(
type,
scan_mode);
}
return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
if(cabac_copy.update) {
memcpy((cabac_data_t *)&state->search_cabac, &cabac_copy, sizeof(cabac_copy));
}
return (bits_left - cabac_copy.bits_left) + ((cabac_copy.num_buffered_bytes - num_buffered_bytes) << 3);
}
static INLINE void save_ccc(int qp, const coeff_t *coeff, int32_t size, uint32_t ccc)
@ -1741,37 +1743,33 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
/**
* Calculate cost of actual motion vectors using CABAC coding
*/
uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
const cabac_data_t* cabac,
const int32_t mvd_hor,
const int32_t mvd_ver)
double kvz_get_mvd_coding_cost_cabac(const encoder_state_t* state,
const cabac_data_t* cabac,
const int32_t mvd_hor,
const int32_t mvd_ver)
{
cabac_data_t cabac_copy = *cabac;
cabac_copy.only_count = 1;
double bits = 0;
// It is safe to drop const here because cabac->only_count is set.
kvz_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver);
kvz_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver, &bits);
uint32_t bitcost =
((23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3)) -
((23 - cabac->bits_left) + (cabac->num_buffered_bytes << 3));
return bitcost;
return bits;
}
/** MVD cost calculation with CABAC
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding
*/
uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
double kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
double* bitcost)
{
cabac_data_t state_cabac_copy;
cabac_data_t* cabac;
@ -1798,14 +1796,13 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
}
// Store cabac state and contexts
memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t));
memcpy(&state_cabac_copy, &state->search_cabac, sizeof(cabac_data_t));
// Clear bytes and bits and set mode to "count"
state_cabac_copy.only_count = 1;
state_cabac_copy.num_buffered_bytes = 0;
state_cabac_copy.bits_left = 23;
cabac = &state_cabac_copy;
double bits = 0;
if (!merged) {
vector2d_t mvd1 = {
@ -1820,8 +1817,8 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
kvz_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1);
kvz_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
uint32_t cand1_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
uint32_t cand2_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
double cand1_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
double cand2_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
@ -1834,7 +1831,7 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
CABAC_BIN(cabac, merged, "MergeFlag");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), merged, bits, "MergeFlag");
num_cand = state->encoder_control->cfg.max_merge;
if (merged) {
if (num_cand > 1) {
@ -1842,10 +1839,10 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
for (ui = 0; ui < num_cand - 1; ui++) {
int32_t symbol = (ui != merge_idx);
if (ui == 0) {
cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
CABAC_BIN(cabac, symbol, "MergeIndex");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
} else {
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
bits += 1;
}
if (symbol == 0) break;
}
@ -1868,24 +1865,23 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
if (ref_list[ref_list_idx] > 1) {
// parseRefFrmIdx
int32_t ref_frame = ref_idx;
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[0]), (ref_frame != 0), bits, "ref_idx_lX");
if (ref_frame > 0) {
int32_t i;
int32_t ref_num = ref_list[ref_list_idx] - 2;
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
ref_frame--;
for (i = 0; i < ref_num; ++i) {
const uint32_t symbol = (i == ref_frame) ? 0 : 1;
if (i == 0) {
CABAC_BIN(cabac, symbol, "ref_idx_lX");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_ref_pic_model[1]), symbol, bits, "ref_idx_lX");
} else {
CABAC_BIN_EP(cabac, symbol, "ref_idx_lX");
bits += 1;
}
if (symbol == 0) break;
}
@ -1895,7 +1891,7 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
// ToDo: Bidir vector support
if (!(state->frame->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) {
// It is safe to drop const here because cabac->only_count is set.
kvz_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y);
kvz_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y, &bits);
}
// Signal which candidate MV to use
@ -1905,10 +1901,10 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
}
}
*bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
*bitcost = bits;
// Store bitcost before restoring cabac
return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
return *bitcost * state->lambda_sqrt;
}
void kvz_close_rdcost_outfiles(void)

View file

@ -77,10 +77,10 @@ uint32_t kvz_get_coded_level(encoder_state_t * state, double* coded_cost, double
kvz_mvd_cost_func kvz_calc_mvd_cost_cabac;
uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
const cabac_data_t* cabac,
int32_t mvd_hor,
int32_t mvd_ver);
double kvz_get_mvd_coding_cost_cabac(const encoder_state_t* state,
const cabac_data_t* cabac,
int32_t mvd_hor,
int32_t mvd_ver);
// Number of fixed point fractional bits used in the fractional bit table.
#define CTX_FRAC_BITS 15
@ -90,8 +90,5 @@ uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
extern const uint32_t kvz_entropy_bits[512];
#define CTX_ENTROPY_BITS(ctx, val) kvz_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
// Floating point fractional bits, derived from kvz_entropy_bits
extern const float kvz_f_entropy_bits[512];
#define CTX_ENTROPY_FBITS(ctx, val) kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
#endif

View file

@ -49,63 +49,64 @@ static void init_sao_info(sao_info_t *sao) {
}
static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
static double sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
if (sao_top != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
// TR coded type_idx_, none = 0
ctx = &(cabac->ctx.sao_type_idx_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_type");
return mode_bits;
}
static float sao_mode_bits_merge(const encoder_state_t * const state,
static double sao_mode_bits_merge(const encoder_state_t * const state,
int8_t merge_cand) {
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 1);
CABAC_FBITS_UPDATE(cabac, ctx, merge_cand == 1, mode_bits, "sao_merge_flag");
if (merge_cand == 1) return mode_bits;
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 2);
CABAC_FBITS_UPDATE(cabac, ctx, merge_cand == 2, mode_bits, "sao_merge_flag");
return mode_bits;
}
static float sao_mode_bits_edge(const encoder_state_t * const state,
static double sao_mode_bits_edge(const encoder_state_t * const state,
int edge_class, int offsets[NUM_SAO_EDGE_CATEGORIES],
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
ctx = &(cabac->ctx.sao_merge_flag_model);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
if (sao_top != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
// TR coded type_idx_, edge = 2 = cMax
ctx = &(cabac->ctx.sao_type_idx_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
CABAC_FBITS_UPDATE(cabac, ctx, 1, mode_bits, "sao_type");
mode_bits += 1.0;
// TR coded offsets.
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++) {
@ -126,26 +127,27 @@ static float sao_mode_bits_edge(const encoder_state_t * const state,
}
static float sao_mode_bits_band(const encoder_state_t * const state,
static double sao_mode_bits_band(const encoder_state_t * const state,
int band_position[2], int offsets[10],
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
{
float mode_bits = 0.0;
const cabac_data_t * const cabac = &state->cabac;
const cabac_ctx_t *ctx = NULL;
double mode_bits = 0.0;
cabac_data_t * cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = NULL;
// FL coded merges.
if (sao_left != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
if (sao_top != NULL) {
ctx = &(cabac->ctx.sao_merge_flag_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
CABAC_FBITS_UPDATE(cabac, ctx, 0, mode_bits, "sao_merge_flag");
}
// TR coded sao_type_idx_, band = 1
ctx = &(cabac->ctx.sao_type_idx_model);
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
CABAC_FBITS_UPDATE(cabac, ctx, 1, mode_bits, "sao_type");
mode_bits += 1.0;
// TR coded offsets and possible FL coded offset signs.
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++)
@ -552,7 +554,8 @@ static void sao_search_best_mode(const encoder_state_t * const state, const kvz_
// Choose between SAO and doing nothing, taking into account the
// rate-distortion cost of coding do nothing.
{
int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->lambda + 0.5);
float mode_bits_none = sao_mode_bits_none(state, sao_top, sao_left);
int cost_of_nothing = (int)(mode_bits_none * state->lambda + 0.5);
if (sao_out->ddistortion >= cost_of_nothing) {
sao_out->type = SAO_TYPE_NONE;
merge_cost[0] = cost_of_nothing;

View file

@ -37,6 +37,7 @@
#include "cabac.h"
#include "encoder.h"
#include "encode_coding_tree.h"
#include "imagelist.h"
#include "inter.h"
#include "intra.h"
@ -59,14 +60,6 @@
// Cost threshold for doing intra search in inter frames with --rd=0.
static const int INTRA_THRESHOLD = 8;
// Modify weight of luma SSD.
#ifndef LUMA_MULT
# define LUMA_MULT 0.8
#endif
// Modify weight of chroma SSD.
#ifndef CHROMA_MULT
# define CHROMA_MULT 1.5
#endif
static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
{
@ -225,16 +218,16 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2);
double ssd = 0.0;
ssd += LUMA_MULT * kvz_pixels_calc_ssd(
ssd += KVZ_LUMA_MULT * kvz_pixels_calc_ssd(
&lcu->ref.y[luma_index], &lcu->rec.y[luma_index],
LCU_WIDTH, LCU_WIDTH, cu_width
);
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != KVZ_CSP_400) {
ssd += CHROMA_MULT * kvz_pixels_calc_ssd(
ssd += KVZ_CHROMA_MULT * kvz_pixels_calc_ssd(
&lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index],
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
);
ssd += CHROMA_MULT * kvz_pixels_calc_ssd(
ssd += KVZ_CHROMA_MULT * kvz_pixels_calc_ssd(
&lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index],
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
);
@ -294,11 +287,13 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
* prediction unit data needs to be coded.
*/
double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu)
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu)
{
const int width = LCU_WIDTH >> depth;
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
// cur_cu is used for TU parameters.
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
@ -324,14 +319,36 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
return sum + tr_tree_bits * state->lambda;
}
if (cabac->update && tr_cu->tr_depth == tr_cu->depth && !skip_residual_coding) {
// Because these need to be coded before the luma cbf they also need to be counted
// before the cabac state changes. However, since this branch is only executed when
// calculating the last RD cost it is not problem to include the chroma cbf costs in
// luma, because the chroma cost is calculated right after the luma cost.
// However, if we have different tr_depth, the bits cannot be written in correct
// order anyways so do not touch the chroma cbf here.
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
cabac_ctx_t* cr_ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
cabac->cur_ctx = cr_ctx;
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, cr_ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
cr_ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
CABAC_FBITS_UPDATE(cabac, cr_ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
}
}
// Add transform_tree cbf_luma bit cost.
const int is_tr_split = tr_cu->tr_depth - tr_cu->depth;
if (pred_cu->type == CU_INTRA ||
tr_depth > 0 ||
is_tr_split ||
cbf_is_set(tr_cu->cbf, depth, COLOR_U) ||
cbf_is_set(tr_cu->cbf, depth, COLOR_V))
{
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[0]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y));
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_luma[0]);
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y);
CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search");
}
// SSD between reconstruction and original
@ -343,7 +360,8 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
width);
}
{
if (!skip_residual_coding) {
int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
@ -351,18 +369,19 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
}
double bits = tr_tree_bits + coeff_bits;
return (double)ssd * LUMA_MULT + bits * state->lambda;
return (double)ssd * KVZ_LUMA_MULT + bits * state->lambda;
}
double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
cu_info_t * pred_cu,
lcu_t *const lcu)
const int x_px, const int y_px, const int depth,
cu_info_t *const pred_cu,
lcu_t *const lcu)
{
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
double tr_tree_bits = 0;
double joint_cbcr_tr_tree_bits = 0;
@ -378,22 +397,27 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
return 0;
}
if (depth < MAX_PU_DEPTH) {
// See luma for why the second condition
if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) {
const int tr_depth = depth - pred_cu->depth;
const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_cb[0]);
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
cabac->cur_ctx = ctx;
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
}
if(state->encoder_control->cfg.jccr) {
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, pred_cu->joint_cb_cr & 1);
}
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
ctx = &(state->cabac.ctx.qt_cbf_model_cr[is_set]);
ctx = &(cabac->ctx.qt_cbf_model_cr[is_set]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V));
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
}
if(state->encoder_control->cfg.jccr) {
ctx = &(state->cabac.ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
ctx = &(cabac->ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, (pred_cu->joint_cb_cr & 2) >> 1);
}
}
@ -401,7 +425,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
if (tr_cu->tr_depth > depth) {
int offset = LCU_WIDTH >> (depth + 1);
int sum = 0;
double sum = 0;
sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu);
sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
@ -448,6 +472,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
}
}
if (!skip_residual_coding)
{
int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
@ -464,8 +489,8 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
double bits = tr_tree_bits + coeff_bits;
double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits;
double cost = (double)ssd + bits * state->c_lambda;
double joint_cost = (double)joint_ssd + joint_bits * state->c_lambda;
double cost = (double)ssd * KVZ_CHROMA_MULT + bits * state->c_lambda;
double joint_cost = (double)joint_ssd * KVZ_CHROMA_MULT + joint_bits * state->c_lambda;
if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) {
pred_cu->joint_cb_cr = 0;
return cost;
@ -485,6 +510,117 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
return joint_cost;
}
static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
const int x_px, const int y_px, const int depth,
const cu_info_t* const pred_cu,
lcu_t* const lcu) {
const int width = LCU_WIDTH >> depth;
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
// cur_cu is used for TU parameters.
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
double coeff_bits = 0;
double tr_tree_bits = 0;
// Check that lcu is not in
assert(x_px >= 0 && x_px < LCU_WIDTH);
assert(y_px >= 0 && y_px < LCU_WIDTH);
const uint8_t tr_depth = tr_cu->tr_depth - depth;
const int cb_flag_u = cbf_is_set(tr_cu->cbf, depth, COLOR_U);
const int cb_flag_v = cbf_is_set(tr_cu->cbf, depth, COLOR_V);
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
{
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
// Only need to signal coded block flag if not skipped or merged
// skip = no coded residual, merge = coded residual
if (pred_cu->type == CU_INTER && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
}
}
if(state->encoder_control->chroma_format != KVZ_CSP_400 && !skip_residual_coding) {
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
}
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
}
}
if (tr_depth > 0) {
int offset = LCU_WIDTH >> (depth + 1);
double sum = 0;
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu);
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
return sum + tr_tree_bits * state->lambda;
}
const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) ;
// Add transform_tree cbf_luma bit cost.
const int is_tr_split = depth - tr_cu->depth;
if ((pred_cu->type == CU_INTRA ||
is_tr_split ||
cb_flag_u ||
cb_flag_v)
&& !skip_residual_coding)
{
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[!is_tr_split]);
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
}
// SSD between reconstruction and original
unsigned luma_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
int index = y_px * LCU_WIDTH + x_px;
luma_ssd = kvz_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
LCU_WIDTH, LCU_WIDTH,
width);
}
{
int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
coeff_bits += kvz_get_coeff_cost(state, coeffs, width, 0, luma_scan_mode, tr_cu->tr_skip);
}
unsigned chroma_ssd = 0;
if(state->encoder_control->chroma_format != KVZ_CSP_400 && x_px % 8 == 0 && y_px % 8 == 0) {
const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
const int chroma_width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
unsigned ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
unsigned ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
chroma_ssd = ssd_u + ssd_v;
}
{
int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order, 0);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order, 0);
}
}
double bits = tr_tree_bits + coeff_bits;
return luma_ssd * KVZ_LUMA_MULT + chroma_ssd * KVZ_CHROMA_MULT + bits * state->lambda;
}
// Return estimate of bits used to code prediction mode of cur_cu.
static double calc_mode_bits(const encoder_state_t *state,
@ -518,6 +654,7 @@ static double calc_mode_bits(const encoder_state_t *state,
}
// TODO: replace usages of this by the kvz_sort_indices_by_cost function.
/**
* \brief Sort modes and costs to ascending order according to costs.
*/
@ -567,6 +704,23 @@ void kvz_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict traf
}
}
/**
* \brief Sort keys (indices) to ascending order according to costs.
*/
void kvz_sort_keys_by_cost(unit_stats_map_t *__restrict map)
{
// Size of sorted arrays is expected to be "small". No need for faster algorithm.
for (uint8_t i = 1; i < map->size; ++i) {
const int8_t cur_indx = map->keys[i];
const double cur_cost = map->cost[cur_indx];
uint8_t j = i;
while (j > 0 && cur_cost < map->cost[map->keys[j - 1]]) {
map->keys[j] = map->keys[j - 1];
--j;
}
map->keys[j] = cur_indx;
}
}
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
@ -592,10 +746,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
const encoder_control_t* ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
int cu_width = LCU_WIDTH >> depth;
double cost = MAX_INT;
double inter_zero_coeff_cost = MAX_INT;
uint32_t inter_bitcost = MAX_INT;
double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE;
double inter_bitcost = MAX_INT;
cu_info_t *cur_cu;
cabac_data_t pre_search_cabac;
memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
@ -626,7 +782,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// Assign correct depth limit
constraint_t* constr = state->constraint;
if(constr->ml_intra_depth_ctu) {
if(constr->ml_intra_depth_ctu) {
pu_depth_intra.min = constr->ml_intra_depth_ctu->_mat_upper_depth[(x_local >> 3) + (y_local >> 3) * 8];
pu_depth_intra.max = constr->ml_intra_depth_ctu->_mat_lower_depth[(x_local >> 3) + (y_local >> 3) * 8];
}
@ -670,7 +826,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (can_use_inter) {
double mode_cost;
uint32_t mode_bitcost;
double mode_bitcost;
kvz_search_cu_inter(state,
x, y,
depth,
@ -721,12 +877,13 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int32_t cu_width_intra_min = LCU_WIDTH >> pu_depth_intra.max;
bool can_use_intra =
WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
(WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
// When the split was forced because the CTU is partially outside
// the frame, we permit intra coding even if pu_depth_intra would
// otherwise forbid it.
(x & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->width ||
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height;
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height) &&
!(state->encoder_control->cfg.force_inter && state->frame->slicetype != KVZ_SLICE_I);
if (can_use_intra && !skip_intra) {
int8_t intra_mode;
@ -737,6 +894,16 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
bool mip_transposed = false;
kvz_search_cu_intra(state, x, y, depth, lcu,
&intra_mode, &intra_trafo, &intra_cost, &multi_ref_index, &mip_flag, &mip_transposed);
#ifdef COMPLETE_PRED_MODE_BITS
// Technically counting these bits would be correct, however counting
// them universally degrades quality so this block is disabled by default
if(state->frame->slicetype != KVZ_SLICE_I) {
double pred_mode_type_bits = 0;
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.cu_pred_mode_model, 1, pred_mode_type_bits, "pred_mode_flag");
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.cu_skip_flag_model[kvz_get_skip_context(x, y, lcu, NULL)], 0, pred_mode_type_bits, "skip_flag");
intra_cost += pred_mode_type_bits * state->lambda;
}
#endif
if (intra_cost < cost) {
cost = intra_cost;
cur_cu->type = CU_INTRA;
@ -828,9 +995,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->merged = 0;
cur_cu->skipped = 1;
// Selecting skip reduces bits needed to code the CU
if (inter_bitcost > 1) {
inter_bitcost -= 1;
}
int skip_ctx = kvz_get_skip_context(x, y, lcu, NULL, NULL);
inter_bitcost = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_ctx], 1);
inter_bitcost += CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), cur_cu->merge_idx != 0);
inter_bitcost += cur_cu->merge_idx;
}
}
lcu_fill_inter(lcu, x_local, y_local, cu_width);
@ -839,20 +1007,26 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
double bits = 0;
cabac_data_t* cabac = &state->search_cabac;
cabac->update = 1;
if(cur_cu->type != CU_INTRA || cur_cu->part_size == SIZE_2Nx2N) {
bits += kvz_mock_encode_coding_unit(
state,
cabac,
x, y, depth,
lcu,
cur_cu);
}
double mode_bits;
if (cur_cu->type == CU_INTRA) {
mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth);
} else {
mode_bits = inter_bitcost;
else {
assert(0);
}
cost = bits * state->lambda;
cost += mode_bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
cost = inter_zero_coeff_cost;
@ -874,13 +1048,14 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->cbf = 0;
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
}
}
cabac->update = 0;
}
bool can_split_cu =
// If the CU is partially outside the frame, we need to split it even
// if pu_depth_intra and pu_depth_inter would not permit it.
cur_cu->type == CU_NOTSET ||
depth < pu_depth_intra.max ||
(depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != KVZ_SLICE_I)) ||
(state->frame->slicetype != KVZ_SLICE_I &&
depth < pu_depth_inter.max);
@ -889,21 +1064,27 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int half_cu = cu_width / 2;
double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
cabac_data_t post_seach_cabac;
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
state->search_cabac.update = 1;
double split_bits = 0;
if (depth < MAX_DEPTH) {
// Add cost of cu_split_flag.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
CABAC_FBITS_UPDATE(&state->search_cabac, ctx, 1, split_bits, "split_search");
}
if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
// Add cost of intra part_size.
const cabac_ctx_t *ctx = &(state->cabac.ctx.part_size_model[0]);
cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; // 2Nx2N
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN
cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]);
CABAC_FBITS_UPDATE(&state->search_cabac, ctx, 0, split_bits, "split_search");
}
state->search_cabac.update = 0;
split_cost += split_bits * state->lambda;
// If skip mode was selected for the block, skip further search.
// Skip mode means there's no coefficients in the block, so splitting
@ -925,13 +1106,29 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// searching.
if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH
&& x + cu_width <= frame->width && y + cu_width <= frame->height && 0)
&& x + cu_width <= frame->width && y + cu_width <= frame->height
&& state->encoder_control->cfg.combine_intra_cus)
{
cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local);
// If the best CU in depth+1 is intra and the biggest it can be, try it.
if (cu_d1->type == CU_INTRA && cu_d1->depth == depth + 1) {
cabac_data_t temp_cabac;
memcpy(&temp_cabac, &state->search_cabac, sizeof(temp_cabac));
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(pre_search_cabac));
cost = 0;
double bits = 0;
if (depth < MAX_DEPTH) {
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
cabac_ctx_t* ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
CABAC_FBITS_UPDATE(&state->search_cabac, ctx, 0, bits, "no_split_search");
}
else if (depth == MAX_DEPTH && cur_cu->type == CU_INTRA) {
// Add cost of intra part_size.
cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
CABAC_FBITS_UPDATE(&state->search_cabac, ctx, 1, bits, "no_split_search");
}
cur_cu->intra = cu_d1->intra;
cur_cu->type = CU_INTRA;
@ -952,19 +1149,13 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
NULL,NULL, 0, cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
lcu);
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
if (has_chroma) {
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
}
// Add the cost of coding no-split.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->cabac.ctx.split_flag_model[split_model]);
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
// Add the cost of coding intra mode only once.
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth);
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits;
cost += mode_bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu);
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));
}
}
@ -978,6 +1169,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
} else if (depth > 0) {
// Copy this CU's mode all the way down for use in adjacent CUs mode
// search.
memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac));
work_tree_copy_down(x_local, y_local, depth, work_tree);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
@ -1167,6 +1359,8 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i
*/
void kvz_search_lcu(encoder_state_t * const state, const int x, const int y, const yuv_t * const hor_buf, const yuv_t * const ver_buf, lcu_coeff_t *coeff)
{
memcpy(&state->search_cabac, &state->cabac, sizeof(cabac_data_t));
state->search_cabac.only_count = 1;
assert(x % LCU_WIDTH == 0);
assert(y % LCU_WIDTH == 0);

View file

@ -44,22 +44,53 @@
#include "image.h"
#include "constraint.h"
#define MAX_UNIT_STATS_MAP_SIZE MAX(MAX_REF_PIC_COUNT, MRG_MAX_NUM_CANDS)
// Modify weight of luma SSD.
#ifndef KVZ_LUMA_MULT
# define KVZ_LUMA_MULT 0.8
#endif
// Modify weight of chroma SSD.
#ifndef KVZ_CHROMA_MULT
# define KVZ_CHROMA_MULT 1.5
#endif
/**
* \brief Data collected during search processes.
*
* The intended use is to collect statistics of the
* searched coding/prediction units. Data related to
* a specific unit is found at index i. The arrays
* should be indexed by elements of the "keys" array
* that will be sorted by the RD costs of the units.
*/
typedef struct unit_stats_map_t {
cu_info_t unit[MAX_UNIT_STATS_MAP_SIZE]; //!< list of searched units
double cost[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching RD costs
double bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs
int8_t keys[MAX_UNIT_STATS_MAP_SIZE]; //!< list of keys (indices) to elements in the other arrays
int size; //!< number of active elements in the lists
} unit_stats_map_t;
#define NUM_MIP_MODES_FULL(width, height) ((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12)
#define NUM_MIP_MODES_HALF(width, height) NUM_MIP_MODES_FULL((width), (height)) >> 1
void kvz_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length);
void kvz_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length);
void kvz_sort_keys_by_cost(unit_stats_map_t *__restrict map);
void kvz_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf, lcu_coeff_t *coeff);
double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu);
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
lcu_t *const lcu);
double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
cu_info_t * pred_cu,
lcu_t *const lcu);
const int x_px, const int y_px, const int depth,
cu_info_t *const pred_cu,
lcu_t *const lcu);
void kvz_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth);
void kvz_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);

File diff suppressed because it is too large Load diff

View file

@ -64,20 +64,20 @@ enum hpel_position {
HPEL_POS_DIA = 2
};
typedef uint32_t kvz_mvd_cost_func(const encoder_state_t *state,
typedef double kvz_mvd_cost_func(const encoder_state_t *state,
int x, int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost);
double *bitcost);
void kvz_search_cu_inter(encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost);
double* inter_bitcost);
void kvz_search_cu_smp(encoder_state_t * const state,
int x, int y,
@ -85,12 +85,20 @@ void kvz_search_cu_smp(encoder_state_t * const state,
part_mode_t part_mode,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost);
double* inter_bitcost);
unsigned kvz_inter_satd_cost(const encoder_state_t* state,
const lcu_t *lcu,
int x,
int y);
void kvz_cu_cost_inter_rd2(encoder_state_t* const state,
int x, int y, int depth,
cu_info_t* cur_cu,
lcu_t* lcu,
double* inter_cost,
double* inter_bitcost);
int kvz_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx);
#endif // SEARCH_INTER_H_

View file

@ -97,13 +97,13 @@ static double get_cost(encoder_state_t * const state,
// Add the offset bit costs of signaling 'luma and chroma use trskip',
// versus signaling 'luma and chroma don't use trskip' to the SAD cost.
const cabac_ctx_t *ctx = &state->cabac.ctx.transform_skip_model_luma;
const cabac_ctx_t *ctx = &state->search_cabac.ctx.transform_skip_model_luma;
double trskip_bits = CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0);
// ToDo: Check cost
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
ctx = &state->cabac.ctx.transform_skip_model_chroma;
ctx = &state->search_cabac.ctx.transform_skip_model_chroma;
trskip_bits += 2.0 * (CTX_ENTROPY_FBITS(ctx, 1) - CTX_ENTROPY_FBITS(ctx, 0));
}
@ -394,7 +394,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
// max_depth.
// - Min transform size hasn't been reached (MAX_PU_DEPTH).
if (depth < max_depth && depth < MAX_PU_DEPTH) {
split_cost = 3 * state->lambda;
split_cost = 0;
split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
if (split_cost < nosplit_cost) {
@ -417,14 +417,15 @@ static double search_intra_trdepth(encoder_state_t * const state,
// so this will code cbf as 0 and not code the cbf at all for descendants.
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
const uint8_t tr_depth = depth - pred_cu->depth;
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
const cabac_ctx_t* ctx = &(state->cabac.ctx.qt_cbf_model_cb[0]);
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
cbf_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
CABAC_FBITS_UPDATE(cabac, ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U), cbf_bits, "cbf_cb");
}
ctx = &(state->cabac.ctx.qt_cbf_model_cr[cbf_is_set(pred_cu->cbf, depth, COLOR_U)]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
cbf_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V));
CABAC_FBITS_UPDATE(cabac, ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V), cbf_bits, "cbf_cr");
}
}
@ -677,9 +678,8 @@ static int8_t search_intra_rough(encoder_state_t * const state,
// Add prediction mode coding cost as the last thing. We don't want this
// affecting the halving search.
int lambda_cost = (int)(state->lambda_sqrt + 0.5);
for (int mode_i = 0; mode_i < modes_selected; ++mode_i) {
costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds, 0, 0, 0);
costs[mode_i] += state->lambda_sqrt * kvz_luma_mode_bits(state, modes[mode_i], intra_preds, 0, 0, 0);
}
#undef PARALLEL_BLKS
@ -771,7 +771,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
int rdo_bitcost = kvz_luma_mode_bits(state, mode, intra_preds, multi_ref_index, transp_off, ctx_id);
*mode_cost_p = rdo_bitcost * (int)(state->lambda + 0.5);
// Mip related stuff
// There can be 32 MIP modes, but only mode numbers [0, 15] are ever written to bitstream.
// Half of the modes [16, 31] are indicated with the separate transpose flag.
@ -818,6 +818,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
}
// The best transform split hierarchy is not saved anywhere, so to get the
// transform split hierarchy the search has to be performed again with the
// best mode.
@ -854,7 +855,8 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds, const uint8_t multi_ref_idx, const uint8_t num_mip_modes_half, int mip_flag_ctx_id)
{
double mode_bits = 0.0;
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
double mode_bits = 0;
bool enable_mip = state->encoder_control->cfg.mip;
bool mip_flag = enable_mip ? (num_mip_modes_half > 0 ? true : false) : false;
@ -899,11 +901,26 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const
break;
}
}
cabac_ctx_t *ctx = &(cabac->ctx.luma_planar_model[1]);
CABAC_FBITS_UPDATE(cabac, ctx, mode_in_preds, mode_bits, "prev_intra_luma_pred_flag_search");
if (state->search_cabac.update) {
if(mode_in_preds) {
CABAC_BIN_EP(cabac, !(luma_mode == intra_preds[0]), "mpm_idx");
if(luma_mode != intra_preds[0]) {
CABAC_BIN_EP(cabac, !(luma_mode == intra_preds[1]), "mpm_idx");
}
}
else {
// This value should be transformed for actual coding,
// but here the value does not actually matter, just that we write 5 bits
CABAC_BINS_EP(cabac, luma_mode, 5, "rem_intra_luma_pred_mode");
}
}
bool enable_mrl = state->encoder_control->cfg.mrl;
uint8_t multi_ref_index = enable_mrl ? multi_ref_idx : 0;
const cabac_ctx_t* ctx = &(state->cabac.ctx.intra_luma_mpm_flag_model);
ctx = &(cabac->ctx.intra_luma_mpm_flag_model);
if (multi_ref_index == 0) {
mode_bits += CTX_ENTROPY_FBITS(ctx, mode_in_preds != -1);
@ -911,17 +928,17 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const
// Add MRL bits.
if (enable_mrl && MAX_REF_LINE_IDX > 1) {
ctx = &(state->cabac.ctx.multi_ref_line[0]);
ctx = &(cabac->ctx.multi_ref_line[0]);
mode_bits += CTX_ENTROPY_FBITS(ctx, multi_ref_index != 0);
if (multi_ref_index != 0 && MAX_REF_LINE_IDX > 2) {
ctx = &(state->cabac.ctx.multi_ref_line[1]);
ctx = &(cabac->ctx.multi_ref_line[1]);
mode_bits += CTX_ENTROPY_FBITS(ctx, multi_ref_index != 1);
}
}
if (mode_in_preds != -1 || multi_ref_index != 0) {
ctx = &(state->cabac.ctx.luma_planar_model[0]);
ctx = &(cabac->ctx.luma_planar_model[0]);
if (multi_ref_index == 0) {
mode_bits += CTX_ENTROPY_FBITS(ctx, mode_in_preds > 0);
}
@ -938,7 +955,8 @@ double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const
double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, int8_t luma_mode)
{
const cabac_ctx_t *ctx = &(state->cabac.ctx.chroma_pred_model);
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
const cabac_ctx_t *ctx = &(cabac->ctx.chroma_pred_model);
double mode_bits;
if (chroma_mode == luma_mode) {
mode_bits = CTX_ENTROPY_FBITS(ctx, 0);
@ -958,6 +976,13 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
mode_bits += CTX_ENTROPY_FBITS(ctx, chroma_mode > 67);
}
if(cabac->update) {
if(chroma_mode != luma_mode) {
// Again it does not matter what we actually write here
CABAC_BINS_EP(cabac, 0, 2, "intra_chroma_pred_mode");
}
}
return mode_bits;
}
@ -1045,9 +1070,11 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
-1, chroma.mode, // skip luma
NULL, cclm_params, 0, false, false, lcu);
}
double bits = 0;
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode);
bits += mode_bits;
chroma.cost += mode_bits * state->lambda;
if (chroma.cost < best_chroma.cost) {

View file

@ -260,11 +260,9 @@ int kvz_quantize_residual_trskip(
struct {
kvz_pixel rec[LCU_WIDTH * LCU_WIDTH];
coeff_t coeff[LCU_WIDTH * LCU_WIDTH];
uint32_t cost;
double cost;
int has_coeffs;
} skip, *best;
const int bit_cost = (int)(state->lambda + 0.5);
//noskip.has_coeffs = kvz_quantize_residual(
// state, cur_cu, width, color, scan_order,
@ -278,7 +276,7 @@ int kvz_quantize_residual_trskip(
1, in_stride, width,
ref_in, pred_in, skip.rec, skip.coeff, false, lmcs_chroma_adj);
skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, width, width);
skip.cost += kvz_get_coeff_cost(state, skip.coeff, width, 0, scan_order, 1) * bit_cost;
skip.cost += kvz_get_coeff_cost(state, skip.coeff, width, 0, scan_order, 1) * state->frame->lambda;
/* if (noskip.cost <= skip.cost) {
*trskip_out = 0;