diff --git a/src/encoderstate.c b/src/encoderstate.c index 71a068b0..da37cd20 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -795,9 +795,10 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) #endif kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]); state->tile->wf_jobs[lcu->id] = kvz_threadqueue_submit(state->encoder_control->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description); - + threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id]; + // If job object was returned, add dependancies and allow it to run. - if (state->tile->wf_jobs[lcu->id]) { + if (job[0]) { // Add inter frame dependancies when ecoding more than one frame at // once. The added dependancy is for the first LCU of each wavefront // row to depend on the reconstruction status of the row below in the @@ -806,26 +807,33 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) state->previous_encoder_state->tqj_recon_done && state->frame->slicetype != KVZ_SLICE_I) { - if (!lcu->left) { - const lcu_order_element_t * const ref_lcu = &ref_state->lcu_order[i]; - if (lcu->below) { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], ref_lcu->below->encoder_state->tqj_recon_done); + // We need to wait until the CTUs whose pixels we refer to are + // done before we can start this CTU. + if (lcu->below) { + if (lcu->below->right) { + kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->below->right->id]); } else { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], ref_lcu->encoder_state->tqj_recon_done); + kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->below->id]); + } + } else { + if (lcu->right) { + kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->right->id]); + } else { + kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[lcu->id]); } } } // Add local WPP dependancy to the LCU on the left. if (lcu->left) { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - 1]); + kvz_threadqueue_job_dep_add(job[0], job[-1]); } // Add local WPP dependancy to the LCU on the top right. if (lcu->above) { if (lcu->above->right) { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu + 1]); + kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu + 1]); } else { - kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu]); + kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]); } } diff --git a/src/inter.c b/src/inter.c index d0c90a88..adc16507 100644 --- a/src/inter.c +++ b/src/inter.c @@ -1005,6 +1005,7 @@ static INLINE bool add_mvp_candidate(const encoder_state_t *state, { if (!cand) return false; + assert(cand->inter.mv_dir != 0); const int cand_list = cand->inter.mv_dir & (1 << reflist) ? reflist : !reflist; if (scaling) { diff --git a/src/search_inter.c b/src/search_inter.c index 78e39978..4ff55c2c 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -40,9 +40,50 @@ */ static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int height, int wpp_limit) { + const encoder_control_t *ctrl = state->encoder_control; + + if (ctrl->cfg.owf && ctrl->cfg.wpp) { + // Check that the block does not reference pixels that are not final. + + // Fractional motion estimation and odd chroma interpolation need + // 4 pixels below the bottom edge of the block. + int margin = 4; + if (ctrl->cfg.sao_enable) { + // Make sure we don't refer to pixels for which SAO reconstruction + // has not been done. + margin += SAO_DELAY_PX; + } else if (ctrl->cfg.deblock_enable) { + // Make sure we don't refer to pixels that have not been deblocked. + margin += DEBLOCK_DELAY_PX; + } + + // Coordinates of the top-left corner of the containing LCU. + const vector2d_t orig_lcu = { + .x = orig->x / LCU_WIDTH, + .y = orig->y / LCU_WIDTH, + }; + // Difference between the coordinates of the LCU containing the + // bottom-left corner of the referenced block and the LCU containing + // this block. + const vector2d_t mv_lcu = { + .x = (((orig->x + width + margin) << 2) + x) / (LCU_WIDTH << 2) - orig_lcu.x, + .y = (((orig->y + height + margin) << 2) + y) / (LCU_WIDTH << 2) - orig_lcu.y, + }; + + // TODO: Remove hard coded constants. + if (mv_lcu.y > 1) { + return false; + } + + // TODO: Remove hard coded constants. + if (mv_lcu.x + mv_lcu.y > 2) { + return false; + } + } + if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_NONE) { - return (wpp_limit == -1 || y + (height << 2) <= (wpp_limit << 2)); - }; + return true; + } int margin = 0; if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) { @@ -1711,6 +1752,13 @@ void kvz_search_cu_smp(encoder_state_t * const state, search_pu_inter(state, x, y, depth, part_mode, i, lcu, &cost, &bitcost); + if (cost >= MAX_INT) { + // Could not find any motion vector. + *inter_cost = MAX_INT; + *inter_bitcost = MAX_INT; + return; + } + *inter_cost += cost; *inter_bitcost += bitcost;