Merge branch 'smp-merge-analysis'

This commit is contained in:
Ari Lemmetti 2019-09-03 16:47:07 +03:00
commit 4e94d60552
3 changed files with 144 additions and 107 deletions

View file

@ -519,48 +519,75 @@ void kvz_inter_recon_cu(const encoder_state_t * const state,
bool predict_chroma)
{
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
const int num_pu = kvz_part_mode_num_parts[cu->part_size];
for (int i = 0; i < num_pu; ++i) {
const int pu_x = PU_GET_X(cu->part_size, width, x, i);
const int pu_y = PU_GET_Y(cu->part_size, width, y, i);
const int pu_w = PU_GET_W(cu->part_size, width, i);
const int pu_h = PU_GET_H(cu->part_size, width, i);
kvz_inter_pred_pu(state, lcu, x, y, width, predict_luma, predict_chroma, i);
}
}
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
/**
* Predict a single PU.
*
* The PU may use either uniprediction or biprediction.
*
* \param state encoder state
* \param lcu containing LCU
* \param x x-coordinate of the CU in pixels
* \param y y-coordinate of the CU in pixels
* \param width CU width
* \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call.
* \param i_pu Index of the PU. Always zero for 2Nx2N. Used for SMP+AMP.
*/
void kvz_inter_pred_pu(const encoder_state_t * const state,
lcu_t *lcu,
int32_t x,
int32_t y,
int32_t width,
bool predict_luma,
bool predict_chroma,
int i_pu)
if (pu->inter.mv_dir == 3) {
const kvz_picture *const refs[2] = {
state->frame->ref->images[
state->frame->ref_LX[0][
pu->inter.mv_ref[0]]],
state->frame->ref->images[
state->frame->ref_LX[1][
pu->inter.mv_ref[1]]],
};
kvz_inter_recon_bipred(state,
refs[0], refs[1],
pu_x, pu_y,
pu_w, pu_h,
pu->inter.mv,
lcu,
predict_luma, predict_chroma);
} else {
const int mv_idx = pu->inter.mv_dir - 1;
const kvz_picture *const ref =
state->frame->ref->images[
state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]];
{
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
inter_recon_unipred(state,
ref,
pu_x, pu_y,
pu_w, pu_h,
pu->inter.mv[mv_idx],
lcu,
NULL,
predict_luma, predict_chroma);
}
if (pu->inter.mv_dir == 3) {
const kvz_picture *const refs[2] = {
state->frame->ref->images[
state->frame->ref_LX[0][
pu->inter.mv_ref[0]]],
state->frame->ref->images[
state->frame->ref_LX[1][
pu->inter.mv_ref[1]]],
};
kvz_inter_recon_bipred(state,
refs[0], refs[1],
pu_x, pu_y,
pu_w, pu_h,
pu->inter.mv,
lcu,
predict_luma, predict_chroma);
}
else {
const int mv_idx = pu->inter.mv_dir - 1;
const kvz_picture *const ref =
state->frame->ref->images[
state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]];
inter_recon_unipred(state,
ref,
pu_x, pu_y,
pu_w, pu_h,
pu->inter.mv[mv_idx],
lcu,
NULL,
predict_luma, predict_chroma);
}
}

View file

@ -48,6 +48,15 @@ void kvz_inter_recon_cu(const encoder_state_t * const state,
bool predict_luma,
bool predict_chroma);
void kvz_inter_pred_pu(const encoder_state_t * const state,
lcu_t *lcu,
int32_t x,
int32_t y,
int32_t width,
bool predict_luma,
bool predict_chroma,
int i_pu);
void kvz_inter_recon_bipred(const encoder_state_t * const state,
const kvz_picture * ref1,
const kvz_picture * ref2,

View file

@ -1551,87 +1551,88 @@ static void search_pu_inter(encoder_state_t * const state,
mrg_costs[i] = MAX_DOUBLE;
}
if (cur_cu->part_size == SIZE_2Nx2N) {
int num_rdo_cands = 0;
int num_rdo_cands = 0;
// Check motion vector constraints and perform rough search
for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) {
// Check motion vector constraints and perform rough search
for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) {
inter_merge_cand_t *cur_cand = &info.merge_cand[merge_idx];
cur_cu->inter.mv_dir = cur_cand->dir;
cur_cu->inter.mv_ref[0] = cur_cand->ref[0];
cur_cu->inter.mv_ref[1] = cur_cand->ref[1];
cur_cu->inter.mv[0][0] = cur_cand->mv[0][0];
cur_cu->inter.mv[0][1] = cur_cand->mv[0][1];
cur_cu->inter.mv[1][0] = cur_cand->mv[1][0];
cur_cu->inter.mv[1][1] = cur_cand->mv[1][1];
inter_merge_cand_t *cur_cand = &info.merge_cand[merge_idx];
cur_cu->inter.mv_dir = cur_cand->dir;
cur_cu->inter.mv_ref[0] = cur_cand->ref[0];
cur_cu->inter.mv_ref[1] = cur_cand->ref[1];
cur_cu->inter.mv[0][0] = cur_cand->mv[0][0];
cur_cu->inter.mv[0][1] = cur_cand->mv[0][1];
cur_cu->inter.mv[1][0] = cur_cand->mv[1][0];
cur_cu->inter.mv[1][1] = cur_cand->mv[1][1];
// If bipred is not enabled, do not try candidates with mv_dir == 3.
// Bipred is also forbidden for 4x8 and 8x4 blocks by the standard.
if (cur_cu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
if (cur_cu->inter.mv_dir == 3 && !(width + height > 12)) continue;
if (cur_cu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
bool is_duplicate = merge_candidate_in_list(info.merge_cand, cur_cand,
mrg_cands,
num_rdo_cands);
bool is_duplicate = merge_candidate_in_list(info.merge_cand, cur_cand,
mrg_cands,
num_rdo_cands);
// Don't try merge candidates that don't satisfy mv constraints.
// Don't add duplicates to list
if (!fracmv_within_tile(&info, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]) ||
!fracmv_within_tile(&info, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]) ||
is_duplicate)
{
continue;
}
kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
mrg_costs[num_rdo_cands] = kvz_satd_any_size(width, height,
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
mrg_cands[num_rdo_cands] = merge_idx;
num_rdo_cands++;
// Don't try merge candidates that don't satisfy mv constraints.
// Don't add duplicates to list
if (!fracmv_within_tile(&info, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]) ||
!fracmv_within_tile(&info, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]) ||
is_duplicate)
{
continue;
}
// Sort candidates by cost
kvz_sort_modes(mrg_cands, mrg_costs, num_rdo_cands);
kvz_inter_pred_pu(state, lcu, x_cu, y_cu, width_cu, true, false, i_pu);
mrg_costs[num_rdo_cands] = kvz_satd_any_size(width, height,
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
// Limit by availability
// TODO: Do not limit to just 1
num_rdo_cands = MIN(1, num_rdo_cands);
mrg_cands[num_rdo_cands] = merge_idx;
num_rdo_cands++;
}
// Sort candidates by cost
kvz_sort_modes(mrg_cands, mrg_costs, num_rdo_cands);
// Limit by availability
// TODO: Do not limit to just 1
num_rdo_cands = MIN(1, num_rdo_cands);
// Early Skip Mode Decision
bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
if (cfg->early_skip) {
for (int merge_rdo_idx = 0; merge_rdo_idx < num_rdo_cands; ++merge_rdo_idx) {
// Early Skip Mode Decision
bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
if (cfg->early_skip && cur_cu->part_size == SIZE_2Nx2N) {
for (int merge_rdo_idx = 0; merge_rdo_idx < num_rdo_cands; ++merge_rdo_idx) {
// Reconstruct blocks with merge candidate.
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set
// and chroma exists.
// Early terminate if merge candidate with zero CBF is found.
int merge_idx = mrg_cands[merge_rdo_idx];
cur_cu->inter.mv_dir = info.merge_cand[merge_idx].dir;
cur_cu->inter.mv_ref[0] = info.merge_cand[merge_idx].ref[0];
cur_cu->inter.mv_ref[1] = info.merge_cand[merge_idx].ref[1];
cur_cu->inter.mv[0][0] = info.merge_cand[merge_idx].mv[0][0];
cur_cu->inter.mv[0][1] = info.merge_cand[merge_idx].mv[0][1];
cur_cu->inter.mv[1][0] = info.merge_cand[merge_idx].mv[1][0];
cur_cu->inter.mv[1][1] = info.merge_cand[merge_idx].mv[1][1];
kvz_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth));
kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
kvz_quantize_lcu_residual(state, true, false, x, y, depth, cur_cu, lcu);
// Reconstruct blocks with merge candidate.
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set
// and chroma exists.
// Early terminate if merge candidate with zero CBF is found.
int merge_idx = mrg_cands[merge_rdo_idx];
cur_cu->inter.mv_dir = info.merge_cand[merge_idx].dir;
cur_cu->inter.mv_ref[0] = info.merge_cand[merge_idx].ref[0];
cur_cu->inter.mv_ref[1] = info.merge_cand[merge_idx].ref[1];
cur_cu->inter.mv[0][0] = info.merge_cand[merge_idx].mv[0][0];
cur_cu->inter.mv[0][1] = info.merge_cand[merge_idx].mv[0][1];
cur_cu->inter.mv[1][0] = info.merge_cand[merge_idx].mv[1][0];
cur_cu->inter.mv[1][1] = info.merge_cand[merge_idx].mv[1][1];
kvz_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth));
kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
kvz_quantize_lcu_residual(state, true, false, x, y, depth, cur_cu, lcu);
if (cbf_is_set(cur_cu->cbf, depth, COLOR_Y)) {
continue;
}
else if (has_chroma) {
kvz_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
kvz_quantize_lcu_residual(state, false, has_chroma, x, y, depth, cur_cu, lcu);
if (!cbf_is_set_any(cur_cu->cbf, depth)) {
cur_cu->type = CU_INTER;
cur_cu->merge_idx = merge_idx;
cur_cu->skipped = true;
*inter_cost = 0.0; // TODO: Check this
*inter_bitcost = 0; // TODO: Check this
return;
}
if (cbf_is_set(cur_cu->cbf, depth, COLOR_Y)) {
continue;
}
else if (has_chroma) {
kvz_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
kvz_quantize_lcu_residual(state, false, has_chroma, x, y, depth, cur_cu, lcu);
if (!cbf_is_set_any(cur_cu->cbf, depth)) {
cur_cu->type = CU_INTER;
cur_cu->merge_idx = merge_idx;
cur_cu->skipped = true;
*inter_cost = 0.0; // TODO: Check this
*inter_bitcost = 0; // TODO: Check this
return;
}
}
}