mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Check zero coefficient cost for inter
Checks the cost of flushing all coefficients of an inter block to zero. This is much faster than doing full RDOQ but can still reduce bitrate significantly. Encoding speed is increased since fewer coefficient bits have to be coded with CABAC.
This commit is contained in:
parent
018b5ffa64
commit
8c53417006
51
src/search.c
51
src/search.c
|
@ -392,6 +392,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
const videoframe_t * const frame = state->tile->frame;
|
const videoframe_t * const frame = state->tile->frame;
|
||||||
int cu_width = LCU_WIDTH >> depth;
|
int cu_width = LCU_WIDTH >> depth;
|
||||||
double cost = MAX_INT;
|
double cost = MAX_INT;
|
||||||
|
double inter_zero_coeff_cost = MAX_INT;
|
||||||
uint32_t inter_bitcost = MAX_INT;
|
uint32_t inter_bitcost = MAX_INT;
|
||||||
cu_info_t *cur_cu;
|
cu_info_t *cur_cu;
|
||||||
|
|
||||||
|
@ -518,7 +519,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
// rd2. Possibly because the luma mode search already takes chroma
|
// rd2. Possibly because the luma mode search already takes chroma
|
||||||
// into account, so there is less of a chanse of luma mode being
|
// into account, so there is less of a chanse of luma mode being
|
||||||
// really bad for chroma.
|
// really bad for chroma.
|
||||||
if (state->encoder_control->cfg.rdo == 3) {
|
if (ctrl->cfg.rdo == 3) {
|
||||||
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu);
|
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu);
|
||||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||||
}
|
}
|
||||||
|
@ -540,6 +541,30 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
|
|
||||||
kvz_inter_recon_cu(state, lcu, x, y, cu_width);
|
kvz_inter_recon_cu(state, lcu, x, y, cu_width);
|
||||||
|
|
||||||
|
if (!ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) {
|
||||||
|
const int luma_index = y_local * LCU_WIDTH + x_local;
|
||||||
|
const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2);
|
||||||
|
|
||||||
|
double ssd = 0.0;
|
||||||
|
ssd += LUMA_MULT * kvz_pixels_calc_ssd(
|
||||||
|
&lcu->ref.y[luma_index], &lcu->rec.y[luma_index],
|
||||||
|
LCU_WIDTH, LCU_WIDTH, cu_width
|
||||||
|
);
|
||||||
|
ssd += CHROMA_MULT * kvz_pixels_calc_ssd(
|
||||||
|
&lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index],
|
||||||
|
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||||
|
);
|
||||||
|
ssd += CHROMA_MULT * kvz_pixels_calc_ssd(
|
||||||
|
&lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index],
|
||||||
|
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||||
|
);
|
||||||
|
|
||||||
|
inter_zero_coeff_cost = ssd + inter_bitcost * state->lambda;
|
||||||
|
|
||||||
|
// Save the pixels at a lower level of the working tree.
|
||||||
|
copy_cu_pixels(x_local, y_local, cu_width, lcu, &work_tree[depth + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
||||||
kvz_quantize_lcu_residual(state,
|
kvz_quantize_lcu_residual(state,
|
||||||
true, has_chroma,
|
true, has_chroma,
|
||||||
|
@ -549,7 +574,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
|
|
||||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||||
|
|
||||||
if(cur_cu->merged && !cbf && cur_cu->part_size == SIZE_2Nx2N) {
|
if (cur_cu->merged && !cbf && cur_cu->part_size == SIZE_2Nx2N) {
|
||||||
cur_cu->merged = 0;
|
cur_cu->merged = 0;
|
||||||
cur_cu->skipped = 1;
|
cur_cu->skipped = 1;
|
||||||
// Selecting skip reduces bits needed to code the CU
|
// Selecting skip reduces bits needed to code the CU
|
||||||
|
@ -575,6 +600,28 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
}
|
}
|
||||||
|
|
||||||
cost += mode_bits * state->lambda;
|
cost += mode_bits * state->lambda;
|
||||||
|
|
||||||
|
if (inter_zero_coeff_cost <= cost) {
|
||||||
|
cost = inter_zero_coeff_cost;
|
||||||
|
|
||||||
|
// Restore saved pixels from lower level of the working tree.
|
||||||
|
copy_cu_pixels(x_local, y_local, cu_width, &work_tree[depth + 1], lcu);
|
||||||
|
|
||||||
|
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
|
||||||
|
cur_cu->merged = 0;
|
||||||
|
cur_cu->skipped = 1;
|
||||||
|
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur_cu->tr_depth != depth) {
|
||||||
|
// Reset transform depth since there are no coefficients. This
|
||||||
|
// ensures that CBF is cleared for the whole area of the CU.
|
||||||
|
kvz_lcu_set_trdepth(lcu, x, y, depth, depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_cu->cbf = 0;
|
||||||
|
lcu_set_coeff(lcu, x_local, y_local, cu_width, cur_cu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_split_cu =
|
bool can_split_cu =
|
||||||
|
|
Loading…
Reference in a new issue