mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Clean up and comment WPP threading code.
- Remove WPP row reconstruction dependency to the row above current one in the previous frame. It's obviously unnecessary. - Remove WPP row reconstruction dependency to the current row in the previous frame, unless the current row is the last row.
This commit is contained in:
parent
b9ec4b0a54
commit
d2bb71739f
|
@ -333,16 +333,18 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void encoder_state_encode_leaf(encoder_state_t * const state) {
|
static void encoder_state_encode_leaf(encoder_state_t * const state) {
|
||||||
const encoder_control_t * const encoder = state->encoder_control;
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
assert(state->is_leaf);
|
assert(state->is_leaf);
|
||||||
assert(state->lcu_order_count > 0);
|
assert(state->lcu_order_count > 0);
|
||||||
|
|
||||||
//If we're not using wavefronts, or we have a WAVEFRONT_ROW which is the single child of its parent, than we should not use parallelism
|
// Select whether to encode the frame/tile in current thread or to define
|
||||||
if (state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW || (state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && !state->parent->children[1].encoder_control)) {
|
// wavefront jobs for other threads to handle.
|
||||||
for (i = 0; i < state->lcu_order_count; ++i) {
|
bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
|
||||||
|
bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control);
|
||||||
|
if (!use_parallel_encoding) {
|
||||||
|
// Encode every LCU in order and perform SAO reconstruction after every
|
||||||
|
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
||||||
|
|
||||||
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_ENCODE_LCU);
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_ENCODE_LCU);
|
||||||
|
|
||||||
encoder_state_worker_encode_lcu(&state->lcu_order[i]);
|
encoder_state_worker_encode_lcu(&state->lcu_order[i]);
|
||||||
|
@ -355,7 +357,7 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) {
|
||||||
#endif //_DEBUG
|
#endif //_DEBUG
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encoder->sao_enable) {
|
if (state->encoder_control->sao_enable) {
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME);
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME);
|
||||||
sao_reconstruct_frame(state);
|
sao_reconstruct_frame(state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME, state->encoder_control->threadqueue, "type=sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count-1].position.y + state->tile->lcu_offset_y,
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME, state->encoder_control->threadqueue, "type=sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count-1].position.y + state->tile->lcu_offset_y,
|
||||||
|
@ -364,7 +366,10 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < state->lcu_order_count; ++i) {
|
// Add every LCU in the frame as a job to a queue, along with
|
||||||
|
// their dependancies, so they can be processed in parallel.
|
||||||
|
|
||||||
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
char job_description[256];
|
char job_description[256];
|
||||||
|
@ -373,39 +378,40 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) {
|
||||||
char* job_description = NULL;
|
char* job_description = NULL;
|
||||||
#endif
|
#endif
|
||||||
state->tile->wf_jobs[lcu->id] = threadqueue_submit(state->encoder_control->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description);
|
state->tile->wf_jobs[lcu->id] = threadqueue_submit(state->encoder_control->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description);
|
||||||
if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_recon_done && !state->global->is_radl_frame) {
|
assert(state->tile->wf_jobs[lcu->id] != NULL);
|
||||||
|
|
||||||
//Only for the first in the row (we reconstruct row-wise)
|
// Add dependancy for inter frames to the reconstruction of the row
|
||||||
|
// below current row in the previous frame. This ensures that we can
|
||||||
|
// search for motion vectors in the previous frame as long as we don't
|
||||||
|
// go more than one LCU below current row.
|
||||||
|
if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_recon_done && !state->global->is_radl_frame) {
|
||||||
|
// Only add the dependancy to the first LCU in the row.
|
||||||
if (!lcu->left) {
|
if (!lcu->left) {
|
||||||
//If we have a row below, then we wait till it's completed
|
|
||||||
if (lcu->below) {
|
if (lcu->below) {
|
||||||
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], lcu->below->encoder_state->previous_encoder_state->tqj_recon_done);
|
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], lcu->below->encoder_state->previous_encoder_state->tqj_recon_done);
|
||||||
}
|
} else {
|
||||||
//Also add always a dep on current line
|
|
||||||
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], lcu->encoder_state->previous_encoder_state->tqj_recon_done);
|
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], lcu->encoder_state->previous_encoder_state->tqj_recon_done);
|
||||||
if (lcu->above) {
|
|
||||||
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], lcu->above->encoder_state->previous_encoder_state->tqj_recon_done);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (state->tile->wf_jobs[lcu->id]) {
|
|
||||||
if (lcu->position.x > 0) {
|
// Add local WPP dependancy to the LCU on the left.
|
||||||
// Wait for the LCU on the left.
|
if (lcu->left) {
|
||||||
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - 1]);
|
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - 1]);
|
||||||
}
|
}
|
||||||
if (lcu->position.y > 0) {
|
// Add local WPP dependancy to the LCU on the top right.
|
||||||
if (lcu->position.x < state->tile->frame->width_in_lcu - 1) {
|
if (lcu->above) {
|
||||||
// Wait for the LCU to the top-right of this one.
|
if (lcu->above->right) {
|
||||||
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu + 1]);
|
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu + 1]);
|
||||||
} else {
|
} else {
|
||||||
// If there is no top-right LCU, wait for the one above.
|
|
||||||
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu]);
|
threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_jobs[lcu->id - state->tile->frame->width_in_lcu]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
threadqueue_job_unwait_job(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
|
threadqueue_job_unwait_job(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
|
||||||
}
|
|
||||||
if (lcu->position.x == state->tile->frame->width_in_lcu - 1) {
|
if (lcu->position.x == state->tile->frame->width_in_lcu - 1) {
|
||||||
if (!encoder->sao_enable) {
|
if (!state->encoder_control->sao_enable) {
|
||||||
// No SAO + last LCU: the row is reconstructed
|
// No SAO + last LCU: the row is reconstructed
|
||||||
assert(!state->tqj_recon_done);
|
assert(!state->tqj_recon_done);
|
||||||
state->tqj_recon_done = state->tile->wf_jobs[lcu->id];
|
state->tqj_recon_done = state->tile->wf_jobs[lcu->id];
|
||||||
|
|
Loading…
Reference in a new issue