mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Probably correct RD cost calculation for all inter modes
This commit is contained in:
parent
1a9e54601f
commit
6d73db5a2a
33
src/search.c
33
src/search.c
|
@ -60,14 +60,6 @@
|
|||
// Cost threshold for doing intra search in inter frames with --rd=0.
|
||||
static const int INTRA_THRESHOLD = 8;
|
||||
|
||||
// Modify weight of luma SSD.
|
||||
#ifndef LUMA_MULT
|
||||
# define LUMA_MULT 0.8
|
||||
#endif
|
||||
// Modify weight of chroma SSD.
|
||||
#ifndef CHROMA_MULT
|
||||
# define CHROMA_MULT 1.5
|
||||
#endif
|
||||
|
||||
static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
|
||||
{
|
||||
|
@ -216,16 +208,16 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
|
|||
const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2);
|
||||
|
||||
double ssd = 0.0;
|
||||
ssd += LUMA_MULT * kvz_pixels_calc_ssd(
|
||||
ssd += KVZ_LUMA_MULT * kvz_pixels_calc_ssd(
|
||||
&lcu->ref.y[luma_index], &lcu->rec.y[luma_index],
|
||||
LCU_WIDTH, LCU_WIDTH, cu_width
|
||||
);
|
||||
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||
ssd += CHROMA_MULT * kvz_pixels_calc_ssd(
|
||||
ssd += KVZ_CHROMA_MULT * kvz_pixels_calc_ssd(
|
||||
&lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||
);
|
||||
ssd += CHROMA_MULT * kvz_pixels_calc_ssd(
|
||||
ssd += KVZ_CHROMA_MULT * kvz_pixels_calc_ssd(
|
||||
&lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||
);
|
||||
|
@ -253,6 +245,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
double *bit_cost)
|
||||
{
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
||||
|
||||
// cur_cu is used for TU parameters.
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
|
@ -280,7 +273,8 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
if (width <= TR_MAX_WIDTH
|
||||
&& width > TR_MIN_WIDTH
|
||||
&& !intra_split_flag
|
||||
&& MIN(tr_cu->tr_depth, depth) - tr_cu->depth < max_tr_depth)
|
||||
&& MIN(tr_cu->tr_depth, depth) - tr_cu->depth < max_tr_depth
|
||||
&& !skip_residual_coding)
|
||||
{
|
||||
cabac_ctx_t *ctx = &(cabac->ctx.trans_subdiv_model[5 - (6 - depth)]);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, tr_depth > 0, tr_tree_bits, "tr_split_search");
|
||||
|
@ -300,7 +294,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
|
||||
if (cabac->update && tr_cu->tr_depth == tr_cu->depth) {
|
||||
if (cabac->update && tr_cu->tr_depth == tr_cu->depth && !skip_residual_coding) {
|
||||
// Because these need to be coded before the luma cbf they also need to be counted
|
||||
// before the cabac state changes. However, since this branch is only executed when
|
||||
// calculating the last RD cost it is not problem to include the chroma cbf costs in
|
||||
|
@ -340,7 +334,8 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
width);
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
if (!skip_residual_coding) {
|
||||
int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
|
||||
const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
|
||||
|
||||
|
@ -349,7 +344,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
return (double)ssd * LUMA_MULT + bits * state->lambda;
|
||||
return (double)ssd * KVZ_LUMA_MULT + bits * state->lambda;
|
||||
}
|
||||
|
||||
|
||||
|
@ -362,6 +357,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
|
||||
const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
||||
|
||||
double tr_tree_bits = 0;
|
||||
double coeff_bits = 0;
|
||||
|
@ -376,7 +372,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
// See luma for why the second condition
|
||||
if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth)) {
|
||||
if (depth < MAX_PU_DEPTH && (!state->search_cabac.update || tr_cu->tr_depth != tr_cu->depth) && !skip_residual_coding) {
|
||||
const int tr_depth = depth - pred_cu->depth;
|
||||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_chroma[tr_depth]);
|
||||
|
@ -417,6 +413,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
ssd = ssd_u + ssd_v;
|
||||
}
|
||||
|
||||
if (!skip_residual_coding)
|
||||
{
|
||||
int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
|
@ -427,7 +424,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
return (double)ssd * CHROMA_MULT + bits * state->lambda;
|
||||
return (double)ssd * KVZ_CHROMA_MULT + bits * state->lambda;
|
||||
}
|
||||
|
||||
static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
||||
|
@ -553,7 +550,7 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
|||
}
|
||||
*bit_cost += coeff_bits;
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
return luma_ssd * LUMA_MULT + chroma_ssd * CHROMA_MULT + bits * state->lambda;
|
||||
return luma_ssd * KVZ_LUMA_MULT + chroma_ssd * KVZ_CHROMA_MULT + bits * state->lambda;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -46,6 +46,15 @@
|
|||
|
||||
#define MAX_UNIT_STATS_MAP_SIZE MAX(MAX_REF_PIC_COUNT, MRG_MAX_NUM_CANDS)
|
||||
|
||||
// Modify weight of luma SSD.
|
||||
#ifndef KVZ_LUMA_MULT
|
||||
# define KVZ_LUMA_MULT 0.8
|
||||
#endif
|
||||
// Modify weight of chroma SSD.
|
||||
#ifndef KVZ_CHROMA_MULT
|
||||
# define KVZ_CHROMA_MULT 1.5
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \brief Data collected during search processes.
|
||||
*
|
||||
|
|
|
@ -1160,6 +1160,30 @@ static void search_frac(inter_search_info_t *info,
|
|||
*best_bits = bitcost;
|
||||
}
|
||||
|
||||
int kvz_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a) {
|
||||
assert(!(lcu && cu_a));
|
||||
int context = 0;
|
||||
if(lcu) {
|
||||
int x_local = SUB_SCU(x);
|
||||
int y_local = SUB_SCU(y);
|
||||
if (x) {
|
||||
context += LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local)->skipped;
|
||||
}
|
||||
if (y) {
|
||||
context += LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1)->skipped;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (x > 0) {
|
||||
context += kvz_cu_array_at_const(cu_a, x - 1, y)->skipped;
|
||||
}
|
||||
if (y > 0) {
|
||||
context += kvz_cu_array_at_const(cu_a, x, y - 1)->skipped;
|
||||
}
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Calculate the scaled MV
|
||||
*/
|
||||
|
@ -1676,7 +1700,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
|
||||
double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
|
||||
if(state->encoder_control->cfg.rdo >= 2) {
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, lcu, &merge->cost[merge->size], &bits);
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits);
|
||||
}
|
||||
else {
|
||||
merge->cost[merge->size] = kvz_satd_any_size(width, height,
|
||||
|
@ -1773,10 +1797,6 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
amvp[0].size > 0 ? amvp[0].keys[0] : 0,
|
||||
amvp[1].size > 0 ? amvp[1].keys[0] : 0
|
||||
};
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]);
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]]);
|
||||
}
|
||||
|
||||
cu_info_t *best_unipred[2] = {
|
||||
&amvp[0].unit[best_keys[0]],
|
||||
|
@ -1808,6 +1828,11 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
}
|
||||
}
|
||||
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]);
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]]);
|
||||
}
|
||||
|
||||
// Fractional-pixel motion estimation.
|
||||
// Refine the best PUs so far from both lists, if available.
|
||||
for (int list = 0; list < 2; ++list) {
|
||||
|
@ -1859,7 +1884,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
CU_SET_MV_CAND(unipred_pu, list, cu_mv_cand);
|
||||
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, lcu, &frac_cost, &frac_bits);
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, unipred_pu, lcu, &frac_cost, &frac_bits);
|
||||
}
|
||||
|
||||
amvp[list].cost[key] = frac_cost;
|
||||
|
@ -1985,7 +2010,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
assert(amvp[2].size <= MAX_UNIT_STATS_MAP_SIZE);
|
||||
kvz_sort_keys_by_cost(&amvp[2]);
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]]);
|
||||
kvz_cu_cost_inter_rd2(state, x, y, depth, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2012,39 +2037,96 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
*/
|
||||
void kvz_cu_cost_inter_rd2(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
cu_info_t* cur_cu,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
double* inter_bitcost){
|
||||
|
||||
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
||||
|
||||
int tr_depth = MAX(1, depth);
|
||||
if (cur_cu->part_size != SIZE_2Nx2N) {
|
||||
tr_depth = depth + 1;
|
||||
}
|
||||
kvz_lcu_fill_trdepth(lcu, x, y, depth, tr_depth);
|
||||
|
||||
const int x_px = SUB_SCU(x);
|
||||
const int y_px = SUB_SCU(y);
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
|
||||
const bool reconstruct_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
||||
kvz_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, reconstruct_chroma);
|
||||
kvz_quantize_lcu_residual(state, true, reconstruct_chroma,
|
||||
x, y, depth,
|
||||
NULL,
|
||||
lcu,
|
||||
false);
|
||||
|
||||
int index = y_px * LCU_WIDTH + x_px;
|
||||
double ssd = kvz_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
|
||||
LCU_WIDTH, LCU_WIDTH,
|
||||
width) * KVZ_LUMA_MULT;
|
||||
if (reconstruct_chroma) {
|
||||
int index = y_px / 2 * LCU_WIDTH_C + x_px / 2;
|
||||
double ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
double ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
ssd += ssd_u + ssd_v;
|
||||
ssd *= KVZ_CHROMA_MULT;
|
||||
}
|
||||
double no_cbf_bits;
|
||||
double bits = 0;
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||
*inter_bitcost += CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_qt_root_cbf_model, !!cbf);
|
||||
int skip_context = kvz_get_skip_context(x, y, lcu, NULL);
|
||||
if (cur_cu->merged) {
|
||||
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1);
|
||||
bits += CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 0);
|
||||
}
|
||||
else {
|
||||
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_qt_root_cbf_model, 0);
|
||||
bits += CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_qt_root_cbf_model, 1);
|
||||
}
|
||||
double no_cbf_cost = ssd + (no_cbf_bits + *inter_bitcost) * state->lambda;
|
||||
|
||||
kvz_quantize_lcu_residual(state, true, reconstruct_chroma,
|
||||
x, y, depth,
|
||||
NULL,
|
||||
lcu,
|
||||
false);
|
||||
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||
|
||||
double temp_bits = 0;
|
||||
if(cbf) {
|
||||
*inter_cost = kvz_cu_rd_cost_luma(state, SUB_SCU(x), SUB_SCU(y), depth, cur_cu, lcu, &bits);
|
||||
*inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu, &temp_bits);
|
||||
if (reconstruct_chroma) {
|
||||
*inter_cost += kvz_cu_rd_cost_chroma(state, SUB_SCU(x), SUB_SCU(y), depth, cur_cu, lcu, &bits);
|
||||
*inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu, &temp_bits);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// If we have no coeffs after quant we already have the cost calculated
|
||||
*inter_cost = no_cbf_cost;
|
||||
if(cur_cu->merged) {
|
||||
*inter_bitcost += no_cbf_bits;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
FILE_BITS(bits, x, y, depth, "inter rd 2 bits");
|
||||
|
||||
*inter_cost += *inter_bitcost * state->lambda;
|
||||
*inter_cost += (*inter_bitcost +bits )* state->lambda;
|
||||
|
||||
if(no_cbf_cost < *inter_cost && 0) {
|
||||
cur_cu->cbf = 0;
|
||||
if (cur_cu->merged) {
|
||||
cur_cu->skipped = 1;
|
||||
}
|
||||
kvz_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, reconstruct_chroma);
|
||||
*inter_cost = no_cbf_cost;
|
||||
if (cur_cu->merged) {
|
||||
*inter_bitcost += no_cbf_bits;
|
||||
}
|
||||
}
|
||||
else if(cur_cu->merged) {
|
||||
if (cur_cu->merged) {
|
||||
*inter_bitcost += bits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -2267,7 +2349,8 @@ void kvz_search_cu_smp(encoder_state_t * const state,
|
|||
// Calculate more accurate cost when needed
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
kvz_cu_cost_inter_rd2(state,
|
||||
x, y, depth,
|
||||
x, y, depth,
|
||||
LCU_GET_CU_AT_PX(lcu, x_local, y_local),
|
||||
lcu,
|
||||
inter_cost,
|
||||
inter_bitcost);
|
||||
|
|
|
@ -94,8 +94,11 @@ unsigned kvz_inter_satd_cost(const encoder_state_t* state,
|
|||
int y);
|
||||
void kvz_cu_cost_inter_rd2(encoder_state_t* const state,
|
||||
int x, int y, int depth,
|
||||
cu_info_t* cur_cu,
|
||||
lcu_t* lcu,
|
||||
double* inter_cost,
|
||||
double* inter_bitcost);
|
||||
|
||||
int kvz_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a);
|
||||
|
||||
#endif // SEARCH_INTER_H_
|
||||
|
|
Loading…
Reference in a new issue