mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Relax inter-CTU dependencies when SAO is off
When using WPP and OWF, the first CTU of a row depends on the last CTU of the row below in the reference frame. This is necessary when SAO is enabled since we currently do SAO for a whole CTU row at a time. When SAO is disabled, however, it is unnecessary to wait for the whole row. Changes CTUs to depend only on the CTU below in the reference frame instead of the whole row when WPP and OWF are enabled and SAO disabled. Gives a significant speedup when running on a machine with many CPU cores.
This commit is contained in:
parent
1efa2708b2
commit
bb5354f7e2
|
@ -795,9 +795,10 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
|||
#endif
|
||||
kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]);
|
||||
state->tile->wf_jobs[lcu->id] = kvz_threadqueue_submit(state->encoder_control->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description);
|
||||
|
||||
threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id];
|
||||
|
||||
// If job object was returned, add dependancies and allow it to run.
|
||||
if (state->tile->wf_jobs[lcu->id]) {
|
||||
if (job[0]) {
|
||||
// Add inter frame dependancies when ecoding more than one frame at
|
||||
// once. The added dependancy is for the first LCU of each wavefront
|
||||
// row to depend on the reconstruction status of the row below in the
|
||||
|
@ -806,26 +807,33 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
|||
state->previous_encoder_state->tqj_recon_done &&
|
||||
state->frame->slicetype != KVZ_SLICE_I)
|
||||
{
|
||||
if (!lcu->left) {
|
||||
const lcu_order_element_t * const ref_lcu = &ref_state->lcu_order[i];
|
||||
if (lcu->below) {
|
||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], ref_lcu->below->encoder_state->tqj_recon_done);
|
||||
// We need to wait until the CTUs whose pixels we refer to are
|
||||
// done before we can start this CTU.
|
||||
if (lcu->below) {
|
||||
if (lcu->below->right) {
|
||||
kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->below->right->id]);
|
||||
} else {
|
||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], ref_lcu->encoder_state->tqj_recon_done);
|
||||
kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->below->id]);
|
||||
}
|
||||
} else {
|
||||
if (lcu->right) {
|
||||
kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->right->id]);
|
||||
} else {
|
||||
kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add local WPP dependancy to the LCU on the left.
|
||||
if (lcu->left) {
|
||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - 1]);
|
||||
kvz_threadqueue_job_dep_add(job[0], job[-1]);
|
||||
}
|
||||
// Add local WPP dependancy to the LCU on the top right.
|
||||
if (lcu->above) {
|
||||
if (lcu->above->right) {
|
||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu + 1]);
|
||||
kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu + 1]);
|
||||
} else {
|
||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu]);
|
||||
kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1005,6 +1005,7 @@ static INLINE bool add_mvp_candidate(const encoder_state_t *state,
|
|||
{
|
||||
if (!cand) return false;
|
||||
|
||||
assert(cand->inter.mv_dir != 0);
|
||||
const int cand_list = cand->inter.mv_dir & (1 << reflist) ? reflist : !reflist;
|
||||
|
||||
if (scaling) {
|
||||
|
|
|
@ -40,9 +40,50 @@
|
|||
*/
|
||||
static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int height, int wpp_limit)
|
||||
{
|
||||
const encoder_control_t *ctrl = state->encoder_control;
|
||||
|
||||
if (ctrl->cfg.owf && ctrl->cfg.wpp) {
|
||||
// Check that the block does not reference pixels that are not final.
|
||||
|
||||
// Fractional motion estimation and odd chroma interpolation need
|
||||
// 4 pixels below the bottom edge of the block.
|
||||
int margin = 4;
|
||||
if (ctrl->cfg.sao_enable) {
|
||||
// Make sure we don't refer to pixels for which SAO reconstruction
|
||||
// has not been done.
|
||||
margin += SAO_DELAY_PX;
|
||||
} else if (ctrl->cfg.deblock_enable) {
|
||||
// Make sure we don't refer to pixels that have not been deblocked.
|
||||
margin += DEBLOCK_DELAY_PX;
|
||||
}
|
||||
|
||||
// Coordinates of the top-left corner of the containing LCU.
|
||||
const vector2d_t orig_lcu = {
|
||||
.x = orig->x / LCU_WIDTH,
|
||||
.y = orig->y / LCU_WIDTH,
|
||||
};
|
||||
// Difference between the coordinates of the LCU containing the
|
||||
// bottom-left corner of the referenced block and the LCU containing
|
||||
// this block.
|
||||
const vector2d_t mv_lcu = {
|
||||
.x = (((orig->x + width + margin) << 2) + x) / (LCU_WIDTH << 2) - orig_lcu.x,
|
||||
.y = (((orig->y + height + margin) << 2) + y) / (LCU_WIDTH << 2) - orig_lcu.y,
|
||||
};
|
||||
|
||||
// TODO: Remove hard coded constants.
|
||||
if (mv_lcu.y > 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Remove hard coded constants.
|
||||
if (mv_lcu.x + mv_lcu.y > 2) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_NONE) {
|
||||
return (wpp_limit == -1 || y + (height << 2) <= (wpp_limit << 2));
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
int margin = 0;
|
||||
if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) {
|
||||
|
@ -1711,6 +1752,13 @@ void kvz_search_cu_smp(encoder_state_t * const state,
|
|||
|
||||
search_pu_inter(state, x, y, depth, part_mode, i, lcu, &cost, &bitcost);
|
||||
|
||||
if (cost >= MAX_INT) {
|
||||
// Could not find any motion vector.
|
||||
*inter_cost = MAX_INT;
|
||||
*inter_bitcost = MAX_INT;
|
||||
return;
|
||||
}
|
||||
|
||||
*inter_cost += cost;
|
||||
*inter_bitcost += bitcost;
|
||||
|
||||
|
|
Loading…
Reference in a new issue