mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Early skip
This commit is contained in:
parent
5bfe585e74
commit
4097331fd6
|
@ -207,6 +207,11 @@ Compression tools:
|
||||||
when QP is below the limit. [0]
|
when QP is below the limit. [0]
|
||||||
--(no-)intra-rdo-et : Check intra modes in rdo stage only until
|
--(no-)intra-rdo-et : Check intra modes in rdo stage only until
|
||||||
a zero coefficient CU is found. [disabled]
|
a zero coefficient CU is found. [disabled]
|
||||||
|
--(no-)early-skip : Try to find skip cu from merge candidates.
|
||||||
|
Perform no further search if skip is found.
|
||||||
|
For rd=0..1: Try the first candidate.
|
||||||
|
For rd=2.. : Try the best candidate based
|
||||||
|
on luma satd cost. [enabled]
|
||||||
--(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported
|
--(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported
|
||||||
with lossless coding. [disabled]
|
with lossless coding. [disabled]
|
||||||
--(no-)tmvp : Temporal motion vector prediction [enabled]
|
--(no-)tmvp : Temporal motion vector prediction [enabled]
|
||||||
|
|
|
@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
|
||||||
#
|
#
|
||||||
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
|
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
|
||||||
ver_major=4
|
ver_major=4
|
||||||
ver_minor=1
|
ver_minor=2
|
||||||
ver_release=0
|
ver_release=0
|
||||||
|
|
||||||
# Prevents configure from adding a lot of defines to the CFLAGS
|
# Prevents configure from adding a lot of defines to the CFLAGS
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH KVAZAAR "1" "May 2019" "kvazaar v1.2.0" "User Commands"
|
.TH KVAZAAR "1" "July 2019" "kvazaar v1.2.0" "User Commands"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
kvazaar \- open source HEVC encoder
|
kvazaar \- open source HEVC encoder
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -278,6 +278,13 @@ Skip CABAC cost for residual coefficients
|
||||||
Check intra modes in rdo stage only until
|
Check intra modes in rdo stage only until
|
||||||
a zero coefficient CU is found. [disabled]
|
a zero coefficient CU is found. [disabled]
|
||||||
.TP
|
.TP
|
||||||
|
\fB\-\-(no\-)early\-skip
|
||||||
|
Try to find skip cu from merge candidates.
|
||||||
|
Perform no further search if skip is found.
|
||||||
|
For rd=0..1: Try the first candidate.
|
||||||
|
For rd=2.. : Try the best candidate based
|
||||||
|
on luma satd cost. [enabled]
|
||||||
|
.TP
|
||||||
\fB\-\-(no\-)implicit\-rdpcm
|
\fB\-\-(no\-)implicit\-rdpcm
|
||||||
Implicit residual DPCM. Currently only supported
|
Implicit residual DPCM. Currently only supported
|
||||||
with lossless coding. [disabled]
|
with lossless coding. [disabled]
|
||||||
|
|
16
src/cfg.c
16
src/cfg.c
|
@ -139,6 +139,7 @@ int kvz_config_init(kvz_config *cfg)
|
||||||
cfg->scaling_list = KVZ_SCALING_LIST_OFF;
|
cfg->scaling_list = KVZ_SCALING_LIST_OFF;
|
||||||
|
|
||||||
cfg->max_merge = 5;
|
cfg->max_merge = 5;
|
||||||
|
cfg->early_skip = true;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -385,7 +386,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
|
|
||||||
static const char * const scaling_list_names[] = { "off", "custom", "default", NULL };
|
static const char * const scaling_list_names[] = { "off", "custom", "default", NULL };
|
||||||
|
|
||||||
static const char * const preset_values[11][23*2] = {
|
static const char * const preset_values[11][24*2] = {
|
||||||
{
|
{
|
||||||
"ultrafast",
|
"ultrafast",
|
||||||
"rd", "0",
|
"rd", "0",
|
||||||
|
@ -409,6 +410,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "sensitive",
|
"me-early-termination", "sensitive",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "28",
|
"fast-residual-cost", "28",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -435,6 +437,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "sensitive",
|
"me-early-termination", "sensitive",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "28",
|
"fast-residual-cost", "28",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -461,6 +464,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "sensitive",
|
"me-early-termination", "sensitive",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "28",
|
"fast-residual-cost", "28",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -487,6 +491,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "sensitive",
|
"me-early-termination", "sensitive",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -513,6 +518,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "sensitive",
|
"me-early-termination", "sensitive",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -539,6 +545,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "on",
|
"me-early-termination", "on",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -565,6 +572,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "on",
|
"me-early-termination", "on",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -591,6 +599,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "off",
|
"me-early-termination", "off",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -617,6 +626,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "zero",
|
"cu-split-termination", "zero",
|
||||||
"me-early-termination", "off",
|
"me-early-termination", "off",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -643,6 +653,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
"cu-split-termination", "off",
|
"cu-split-termination", "off",
|
||||||
"me-early-termination", "off",
|
"me-early-termination", "off",
|
||||||
"intra-rdo-et", "0",
|
"intra-rdo-et", "0",
|
||||||
|
"early-skip", "1",
|
||||||
"fast-residual-cost", "0",
|
"fast-residual-cost", "0",
|
||||||
NULL
|
NULL
|
||||||
},
|
},
|
||||||
|
@ -1236,6 +1247,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
||||||
}
|
}
|
||||||
cfg->max_merge = (uint8_t)max_merge;
|
cfg->max_merge = (uint8_t)max_merge;
|
||||||
}
|
}
|
||||||
|
else if OPT("early-skip") {
|
||||||
|
cfg->early_skip = (bool)atobool(value);
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,6 +135,8 @@ static const struct option long_options[] = {
|
||||||
{ "no-open-gop", no_argument, NULL, 0 },
|
{ "no-open-gop", no_argument, NULL, 0 },
|
||||||
{ "scaling-list", required_argument, NULL, 0 },
|
{ "scaling-list", required_argument, NULL, 0 },
|
||||||
{ "max-merge", required_argument, NULL, 0 },
|
{ "max-merge", required_argument, NULL, 0 },
|
||||||
|
{ "early-skip", no_argument, NULL, 0 },
|
||||||
|
{ "no-early-skip", no_argument, NULL, 0 },
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -489,6 +491,11 @@ void print_help(void)
|
||||||
" when QP is below the limit. [0]\n"
|
" when QP is below the limit. [0]\n"
|
||||||
" --(no-)intra-rdo-et : Check intra modes in rdo stage only until\n"
|
" --(no-)intra-rdo-et : Check intra modes in rdo stage only until\n"
|
||||||
" a zero coefficient CU is found. [disabled]\n"
|
" a zero coefficient CU is found. [disabled]\n"
|
||||||
|
" --(no-)early-skip : Try to find skip cu from merge candidates.\n"
|
||||||
|
" Perform no further search if skip is found.\n"
|
||||||
|
" For rd=0..1: Try the first candidate.\n"
|
||||||
|
" For rd=2.. : Try the best candidate based\n"
|
||||||
|
" on luma satd cost. [enabled]\n"
|
||||||
" --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n"
|
" --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n"
|
||||||
" with lossless coding. [disabled]\n"
|
" with lossless coding. [disabled]\n"
|
||||||
" --(no-)tmvp : Temporal motion vector prediction [enabled]\n"
|
" --(no-)tmvp : Temporal motion vector prediction [enabled]\n"
|
||||||
|
|
|
@ -387,6 +387,9 @@ typedef struct kvz_config
|
||||||
/** \brief Maximum number of merge cadidates */
|
/** \brief Maximum number of merge cadidates */
|
||||||
uint8_t max_merge;
|
uint8_t max_merge;
|
||||||
|
|
||||||
|
/** \brief Enable Early Skip Mode Decision */
|
||||||
|
uint8_t early_skip;
|
||||||
|
|
||||||
} kvz_config;
|
} kvz_config;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
129
src/search.c
129
src/search.c
|
@ -403,6 +403,30 @@ static double calc_mode_bits(const encoder_state_t *state,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Sort modes and costs to ascending order according to costs.
|
||||||
|
*/
|
||||||
|
void kvz_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length)
|
||||||
|
{
|
||||||
|
// Length for intra is always between 5 and 23, and is either 21, 17, 9 or 8 about
|
||||||
|
// 60% of the time, so there should be no need for anything more complex
|
||||||
|
// than insertion sort.
|
||||||
|
// Length for merge is 5 or less.
|
||||||
|
for (uint8_t i = 1; i < length; ++i) {
|
||||||
|
const double cur_cost = costs[i];
|
||||||
|
const int8_t cur_mode = modes[i];
|
||||||
|
uint8_t j = i;
|
||||||
|
while (j > 0 && cur_cost < costs[j - 1]) {
|
||||||
|
costs[j] = costs[j - 1];
|
||||||
|
modes[j] = modes[j - 1];
|
||||||
|
--j;
|
||||||
|
}
|
||||||
|
costs[j] = cur_cost;
|
||||||
|
modes[j] = cur_mode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
|
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
|
||||||
{
|
{
|
||||||
vector2d_t lcu_cu = { SUB_SCU(x), SUB_SCU(y) };
|
vector2d_t lcu_cu = { SUB_SCU(x), SUB_SCU(y) };
|
||||||
|
@ -482,29 +506,31 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
cur_cu->type = CU_INTER;
|
cur_cu->type = CU_INTER;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try SMP and AMP partitioning.
|
if (!cur_cu->skipped) {
|
||||||
static const part_mode_t mp_modes[] = {
|
// Try SMP and AMP partitioning.
|
||||||
// SMP
|
static const part_mode_t mp_modes[] = {
|
||||||
SIZE_2NxN, SIZE_Nx2N,
|
// SMP
|
||||||
// AMP
|
SIZE_2NxN, SIZE_Nx2N,
|
||||||
SIZE_2NxnU, SIZE_2NxnD,
|
// AMP
|
||||||
SIZE_nLx2N, SIZE_nRx2N,
|
SIZE_2NxnU, SIZE_2NxnD,
|
||||||
};
|
SIZE_nLx2N, SIZE_nRx2N,
|
||||||
|
};
|
||||||
|
|
||||||
const int first_mode = ctrl->cfg.smp_enable ? 0 : 2;
|
const int first_mode = ctrl->cfg.smp_enable ? 0 : 2;
|
||||||
const int last_mode = (ctrl->cfg.amp_enable && cu_width >= 16) ? 5 : 1;
|
const int last_mode = (ctrl->cfg.amp_enable && cu_width >= 16) ? 5 : 1;
|
||||||
for (int i = first_mode; i <= last_mode; ++i) {
|
for (int i = first_mode; i <= last_mode; ++i) {
|
||||||
kvz_search_cu_smp(state,
|
kvz_search_cu_smp(state,
|
||||||
x, y,
|
x, y,
|
||||||
depth,
|
depth,
|
||||||
mp_modes[i],
|
mp_modes[i],
|
||||||
&work_tree[depth + 1],
|
&work_tree[depth + 1],
|
||||||
&mode_cost, &mode_bitcost);
|
&mode_cost, &mode_bitcost);
|
||||||
if (mode_cost < cost) {
|
if (mode_cost < cost) {
|
||||||
cost = mode_cost;
|
cost = mode_cost;
|
||||||
inter_bitcost = mode_bitcost;
|
inter_bitcost = mode_bitcost;
|
||||||
// Copy inter prediction info to current level.
|
// Copy inter prediction info to current level.
|
||||||
copy_cu_info(x_local, y_local, cu_width, &work_tree[depth + 1], lcu);
|
copy_cu_info(x_local, y_local, cu_width, &work_tree[depth + 1], lcu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -512,9 +538,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
// Try to skip intra search in rd==0 mode.
|
// Try to skip intra search in rd==0 mode.
|
||||||
// This can be quite severe on bdrate. It might be better to do this
|
// This can be quite severe on bdrate. It might be better to do this
|
||||||
// decision after reconstructing the inter frame.
|
// decision after reconstructing the inter frame.
|
||||||
bool skip_intra = state->encoder_control->cfg.rdo == 0
|
bool skip_intra = (state->encoder_control->cfg.rdo == 0
|
||||||
&& cur_cu->type != CU_NOTSET
|
&& cur_cu->type != CU_NOTSET
|
||||||
&& cost / (cu_width * cu_width) < INTRA_THRESHOLD;
|
&& cost / (cu_width * cu_width) < INTRA_THRESHOLD)
|
||||||
|
|| cur_cu->skipped;
|
||||||
|
|
||||||
int32_t cu_width_intra_min = LCU_WIDTH >> ctrl->cfg.pu_depth_intra.max;
|
int32_t cu_width_intra_min = LCU_WIDTH >> ctrl->cfg.pu_depth_intra.max;
|
||||||
bool can_use_intra =
|
bool can_use_intra =
|
||||||
|
@ -567,43 +594,47 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
NULL, lcu);
|
NULL, lcu);
|
||||||
}
|
}
|
||||||
} else if (cur_cu->type == CU_INTER) {
|
} else if (cur_cu->type == CU_INTER) {
|
||||||
// Reset transform depth because intra messes with them.
|
|
||||||
// This will no longer be necessary if the transform depths are not shared.
|
|
||||||
int tr_depth = MAX(1, depth);
|
|
||||||
if (cur_cu->part_size != SIZE_2Nx2N) {
|
|
||||||
tr_depth = depth + 1;
|
|
||||||
}
|
|
||||||
kvz_lcu_set_trdepth(lcu, x, y, depth, tr_depth);
|
|
||||||
|
|
||||||
kvz_inter_recon_cu(state, lcu, x, y, cu_width);
|
if (!cur_cu->skipped) {
|
||||||
|
// Reset transform depth because intra messes with them.
|
||||||
|
// This will no longer be necessary if the transform depths are not shared.
|
||||||
|
int tr_depth = MAX(1, depth);
|
||||||
|
if (cur_cu->part_size != SIZE_2Nx2N) {
|
||||||
|
tr_depth = depth + 1;
|
||||||
|
}
|
||||||
|
kvz_lcu_set_trdepth(lcu, x, y, depth, tr_depth);
|
||||||
|
|
||||||
if (!ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) {
|
kvz_inter_recon_cu(state, lcu, x, y, cu_width);
|
||||||
//Calculate cost for zero coeffs
|
|
||||||
inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda;
|
|
||||||
|
|
||||||
}
|
if (!ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) {
|
||||||
|
//Calculate cost for zero coeffs
|
||||||
|
inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda;
|
||||||
|
|
||||||
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
}
|
||||||
kvz_quantize_lcu_residual(state,
|
|
||||||
true, has_chroma,
|
|
||||||
x, y, depth,
|
|
||||||
NULL,
|
|
||||||
lcu);
|
|
||||||
|
|
||||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
||||||
|
kvz_quantize_lcu_residual(state,
|
||||||
|
true, has_chroma,
|
||||||
|
x, y, depth,
|
||||||
|
NULL,
|
||||||
|
lcu);
|
||||||
|
|
||||||
if (cur_cu->merged && !cbf && cur_cu->part_size == SIZE_2Nx2N) {
|
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||||
cur_cu->merged = 0;
|
|
||||||
cur_cu->skipped = 1;
|
if (cur_cu->merged && !cbf && cur_cu->part_size == SIZE_2Nx2N) {
|
||||||
// Selecting skip reduces bits needed to code the CU
|
cur_cu->merged = 0;
|
||||||
if (inter_bitcost > 1) {
|
cur_cu->skipped = 1;
|
||||||
inter_bitcost -= 1;
|
// Selecting skip reduces bits needed to code the CU
|
||||||
|
if (inter_bitcost > 1) {
|
||||||
|
inter_bitcost -= 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lcu_set_inter(lcu, x_local, y_local, cu_width);
|
lcu_set_inter(lcu, x_local, y_local, cu_width);
|
||||||
lcu_set_coeff(lcu, x_local, y_local, cu_width, cur_cu);
|
lcu_set_coeff(lcu, x_local, y_local, cu_width, cur_cu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
||||||
cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||||
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "global.h" // IWYU pragma: keep
|
#include "global.h" // IWYU pragma: keep
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
|
||||||
|
void kvz_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length);
|
||||||
|
|
||||||
void kvz_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf);
|
void kvz_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf);
|
||||||
|
|
||||||
|
|
|
@ -1510,6 +1510,90 @@ static void search_pu_inter(encoder_state_t * const state,
|
||||||
CU_SET_MV_CAND(cur_cu, 0, 0);
|
CU_SET_MV_CAND(cur_cu, 0, 0);
|
||||||
CU_SET_MV_CAND(cur_cu, 1, 0);
|
CU_SET_MV_CAND(cur_cu, 1, 0);
|
||||||
|
|
||||||
|
// Early Skip Mode Decision
|
||||||
|
if (cfg->early_skip && cur_cu->part_size == SIZE_2Nx2N) {
|
||||||
|
|
||||||
|
int num_rdo_cands = 0;
|
||||||
|
int8_t mrg_cands[MRG_MAX_NUM_CANDS] = { 0, 1, 2, 3, 4 };
|
||||||
|
double mrg_costs[MRG_MAX_NUM_CANDS] = { MAX_DOUBLE };
|
||||||
|
|
||||||
|
// Check motion vector constraints and perform rough search
|
||||||
|
for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) {
|
||||||
|
|
||||||
|
cur_cu->inter.mv_dir = info.merge_cand[merge_idx].dir;
|
||||||
|
cur_cu->inter.mv_ref[0] = info.merge_cand[merge_idx].ref[0];
|
||||||
|
cur_cu->inter.mv_ref[1] = info.merge_cand[merge_idx].ref[1];
|
||||||
|
cur_cu->inter.mv[0][0] = info.merge_cand[merge_idx].mv[0][0];
|
||||||
|
cur_cu->inter.mv[0][1] = info.merge_cand[merge_idx].mv[0][1];
|
||||||
|
cur_cu->inter.mv[1][0] = info.merge_cand[merge_idx].mv[1][0];
|
||||||
|
cur_cu->inter.mv[1][1] = info.merge_cand[merge_idx].mv[1][1];
|
||||||
|
|
||||||
|
// Don't try merge candidates that don't satisfy mv constraints.
|
||||||
|
if (!fracmv_within_tile(&info, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]) ||
|
||||||
|
!fracmv_within_tile(&info, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cfg->rdo >= 2) {
|
||||||
|
|
||||||
|
kvz_lcu_set_trdepth(lcu, x, y, depth, depth);
|
||||||
|
kvz_inter_recon_cu(state, lcu, x, y, width);
|
||||||
|
mrg_costs[merge_idx] = kvz_satd_any_size(width, height,
|
||||||
|
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
|
||||||
|
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
num_rdo_cands++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (cfg->rdo >= 2) {
|
||||||
|
// Sort candidates by cost
|
||||||
|
kvz_sort_modes(mrg_cands, mrg_costs, num_rdo_cands);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Limit by availability
|
||||||
|
// TODO: Do not limit to just 1
|
||||||
|
num_rdo_cands = MIN(1, num_rdo_cands);
|
||||||
|
|
||||||
|
// RDO search
|
||||||
|
for (int merge_rdo_idx = 0; merge_rdo_idx < num_rdo_cands; ++merge_rdo_idx) {
|
||||||
|
|
||||||
|
// Reconstruct blocks with merge candidate.
|
||||||
|
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set
|
||||||
|
// and chroma exists.
|
||||||
|
// Early terminate if merge candidate with zero CBF is found.
|
||||||
|
int merge_idx = mrg_cands[merge_rdo_idx];
|
||||||
|
cur_cu->inter.mv_dir = info.merge_cand[merge_idx].dir;
|
||||||
|
cur_cu->inter.mv_ref[0] = info.merge_cand[merge_idx].ref[0];
|
||||||
|
cur_cu->inter.mv_ref[1] = info.merge_cand[merge_idx].ref[1];
|
||||||
|
cur_cu->inter.mv[0][0] = info.merge_cand[merge_idx].mv[0][0];
|
||||||
|
cur_cu->inter.mv[0][1] = info.merge_cand[merge_idx].mv[0][1];
|
||||||
|
cur_cu->inter.mv[1][0] = info.merge_cand[merge_idx].mv[1][0];
|
||||||
|
cur_cu->inter.mv[1][1] = info.merge_cand[merge_idx].mv[1][1];
|
||||||
|
kvz_lcu_set_trdepth(lcu, x, y, depth, depth);
|
||||||
|
kvz_inter_recon_cu(state, lcu, x, y, width);
|
||||||
|
kvz_quantize_lcu_residual(state, true, false, x, y, depth, cur_cu, lcu);
|
||||||
|
|
||||||
|
if (cbf_is_set(cur_cu->cbf, depth, COLOR_Y)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if(state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||||
|
|
||||||
|
kvz_quantize_lcu_residual(state, false, true, x, y, depth, cur_cu, lcu);
|
||||||
|
if (!cbf_is_set_any(cur_cu->cbf, depth)) {
|
||||||
|
cur_cu->type = CU_INTER;
|
||||||
|
cur_cu->merge_idx = merge_idx;
|
||||||
|
cur_cu->skipped = true;
|
||||||
|
*inter_cost = 0.0; // TODO: Check this
|
||||||
|
*inter_bitcost = 0; // TODO: Check this
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int ref_idx = 0; ref_idx < state->frame->ref->used_size; ref_idx++) {
|
for (int ref_idx = 0; ref_idx < state->frame->ref->used_size; ref_idx++) {
|
||||||
info.ref_idx = ref_idx;
|
info.ref_idx = ref_idx;
|
||||||
info.ref = state->frame->ref->images[ref_idx];
|
info.ref = state->frame->ref->images[ref_idx];
|
||||||
|
|
|
@ -41,29 +41,6 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Sort modes and costs to ascending order according to costs.
|
|
||||||
*/
|
|
||||||
static INLINE void sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length)
|
|
||||||
{
|
|
||||||
// Length is always between 5 and 23, and is either 21, 17, 9 or 8 about
|
|
||||||
// 60% of the time, so there should be no need for anything more complex
|
|
||||||
// than insertion sort.
|
|
||||||
for (uint8_t i = 1; i < length; ++i) {
|
|
||||||
const double cur_cost = costs[i];
|
|
||||||
const int8_t cur_mode = modes[i];
|
|
||||||
uint8_t j = i;
|
|
||||||
while (j > 0 && cur_cost < costs[j - 1]) {
|
|
||||||
costs[j] = costs[j - 1];
|
|
||||||
modes[j] = modes[j - 1];
|
|
||||||
--j;
|
|
||||||
}
|
|
||||||
costs[j] = cur_cost;
|
|
||||||
modes[j] = cur_mode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Select mode with the smallest cost.
|
* \brief Select mode with the smallest cost.
|
||||||
*/
|
*/
|
||||||
|
@ -367,7 +344,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
||||||
costs[i] += satd_func(pred, orig_block);
|
costs[i] += satd_func(pred, orig_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
sort_modes(modes, costs, 5);
|
kvz_sort_modes(modes, costs, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -630,7 +607,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update order according to new costs
|
// Update order according to new costs
|
||||||
sort_modes(modes, costs, modes_to_check);
|
kvz_sort_modes(modes, costs, modes_to_check);
|
||||||
|
|
||||||
// The best transform split hierarchy is not saved anywhere, so to get the
|
// The best transform split hierarchy is not saved anywhere, so to get the
|
||||||
// transform split hierarchy the search has to be performed again with the
|
// transform split hierarchy the search has to be performed again with the
|
||||||
|
@ -868,7 +845,7 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
int num_modes_to_check = MIN(number_of_modes, number_of_modes_to_search);
|
int num_modes_to_check = MIN(number_of_modes, number_of_modes_to_search);
|
||||||
|
|
||||||
sort_modes(modes, costs, number_of_modes);
|
kvz_sort_modes(modes, costs, number_of_modes);
|
||||||
number_of_modes = search_intra_rdo(state,
|
number_of_modes = search_intra_rdo(state,
|
||||||
x_px, y_px, depth,
|
x_px, y_px, depth,
|
||||||
ref_pixels, LCU_WIDTH,
|
ref_pixels, LCU_WIDTH,
|
||||||
|
|
Loading…
Reference in a new issue