mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
Merge branch 'threading_fixes'
This commit is contained in:
commit
4e5482817f
|
@ -136,21 +136,24 @@ uint64_t kvz_bitstream_tell(const bitstream_t *const stream)
|
||||||
*/
|
*/
|
||||||
void kvz_bitstream_writebyte(bitstream_t *const stream, const uint8_t byte)
|
void kvz_bitstream_writebyte(bitstream_t *const stream, const uint8_t byte)
|
||||||
{
|
{
|
||||||
assert(stream->cur_bit == 0);
|
assert(stream->cur_bit == 0 || stream->simulation);
|
||||||
|
|
||||||
if (stream->last == NULL || stream->last->len == KVZ_DATA_CHUNK_SIZE) {
|
if (!stream->simulation) {
|
||||||
// Need to allocate a new chunk.
|
|
||||||
kvz_data_chunk *new_chunk = kvz_bitstream_alloc_chunk();
|
|
||||||
assert(new_chunk);
|
|
||||||
|
|
||||||
if (!stream->first) stream->first = new_chunk;
|
if (stream->last == NULL || stream->last->len == KVZ_DATA_CHUNK_SIZE) {
|
||||||
if (stream->last) stream->last->next = new_chunk;
|
// Need to allocate a new chunk.
|
||||||
stream->last = new_chunk;
|
kvz_data_chunk* new_chunk = kvz_bitstream_alloc_chunk();
|
||||||
|
assert(new_chunk);
|
||||||
|
|
||||||
|
if (!stream->first) stream->first = new_chunk;
|
||||||
|
if (stream->last) stream->last->next = new_chunk;
|
||||||
|
stream->last = new_chunk;
|
||||||
|
}
|
||||||
|
assert(stream->last->len < KVZ_DATA_CHUNK_SIZE);
|
||||||
|
|
||||||
|
stream->last->data[stream->last->len] = byte;
|
||||||
|
stream->last->len += 1;
|
||||||
}
|
}
|
||||||
assert(stream->last->len < KVZ_DATA_CHUNK_SIZE);
|
|
||||||
|
|
||||||
stream->last->data[stream->last->len] = byte;
|
|
||||||
stream->last->len += 1;
|
|
||||||
stream->len += 1;
|
stream->len += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,9 +164,9 @@ void kvz_bitstream_writebyte(bitstream_t *const stream, const uint8_t byte)
|
||||||
*/
|
*/
|
||||||
void kvz_bitstream_move(bitstream_t *const dst, bitstream_t *const src)
|
void kvz_bitstream_move(bitstream_t *const dst, bitstream_t *const src)
|
||||||
{
|
{
|
||||||
assert(dst->cur_bit == 0);
|
assert(dst->cur_bit == 0 || src->simulation);
|
||||||
|
|
||||||
if (src->len > 0) {
|
if (src->len > 0 && !src->simulation) {
|
||||||
if (dst->first == NULL) {
|
if (dst->first == NULL) {
|
||||||
dst->first = src->first;
|
dst->first = src->first;
|
||||||
dst->last = src->last;
|
dst->last = src->last;
|
||||||
|
@ -179,6 +182,7 @@ void kvz_bitstream_move(bitstream_t *const dst, bitstream_t *const src)
|
||||||
dst->data = src->data;
|
dst->data = src->data;
|
||||||
dst->cur_bit = src->cur_bit;
|
dst->cur_bit = src->cur_bit;
|
||||||
dst->zerocount = src->zerocount;
|
dst->zerocount = src->zerocount;
|
||||||
|
dst->simulation = src->simulation;
|
||||||
|
|
||||||
src->first = src->last = NULL;
|
src->first = src->last = NULL;
|
||||||
kvz_bitstream_clear(src);
|
kvz_bitstream_clear(src);
|
||||||
|
@ -200,7 +204,7 @@ void kvz_bitstream_clear(bitstream_t *const stream)
|
||||||
*/
|
*/
|
||||||
void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data)
|
void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data)
|
||||||
{
|
{
|
||||||
assert(stream->cur_bit == 0);
|
assert(stream->cur_bit == 0 || stream->simulation);
|
||||||
const uint8_t emulation_prevention_three_byte = 0x03;
|
const uint8_t emulation_prevention_three_byte = 0x03;
|
||||||
|
|
||||||
if ((stream->zerocount == 2) && (data < 4)) {
|
if ((stream->zerocount == 2) && (data < 4)) {
|
||||||
|
@ -220,6 +224,10 @@ void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data)
|
||||||
*/
|
*/
|
||||||
void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits)
|
void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits)
|
||||||
{
|
{
|
||||||
|
if (stream->simulation) {
|
||||||
|
stream->cur_bit += bits;
|
||||||
|
return;
|
||||||
|
}
|
||||||
while (bits--) {
|
while (bits--) {
|
||||||
stream->data <<= 1;
|
stream->data <<= 1;
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,8 @@ typedef struct bitstream_t
|
||||||
uint8_t cur_bit;
|
uint8_t cur_bit;
|
||||||
|
|
||||||
uint8_t zerocount;
|
uint8_t zerocount;
|
||||||
|
|
||||||
|
bool simulation;
|
||||||
} bitstream_t;
|
} bitstream_t;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
|
|
|
@ -612,6 +612,8 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void encoder_state_worker_encode_lcu_bitstream(void* opaque);
|
||||||
|
|
||||||
static void encoder_state_worker_encode_lcu_search(void * opaque)
|
static void encoder_state_worker_encode_lcu_search(void * opaque)
|
||||||
{
|
{
|
||||||
lcu_order_element_t * const lcu = opaque;
|
lcu_order_element_t * const lcu = opaque;
|
||||||
|
@ -671,6 +673,12 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
||||||
kvz_sao_search_lcu(state, lcu->position.x, lcu->position.y);
|
kvz_sao_search_lcu(state, lcu->position.x, lcu->position.y);
|
||||||
encoder_sao_reconstruct(state, lcu);
|
encoder_sao_reconstruct(state, lcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Do simulated bitstream writing to update the cabac contexts
|
||||||
|
if (encoder->cfg.alf_type) {
|
||||||
|
state->stream.simulation = true;
|
||||||
|
encoder_state_worker_encode_lcu_bitstream(opaque);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
||||||
|
@ -695,9 +703,11 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
||||||
//Encode coding tree
|
//Encode coding tree
|
||||||
kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0, lcu->coeff);
|
kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0, lcu->coeff);
|
||||||
|
|
||||||
// Coeffs are not needed anymore.
|
if (!state->stream.simulation) {
|
||||||
free(lcu->coeff);
|
// Coeffs are not needed anymore.
|
||||||
lcu->coeff = NULL;
|
free(lcu->coeff);
|
||||||
|
lcu->coeff = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
bool end_of_slice_segment_flag;
|
bool end_of_slice_segment_flag;
|
||||||
if (state->encoder_control->cfg.slices & KVZ_SLICES_WPP) {
|
if (state->encoder_control->cfg.slices & KVZ_SLICES_WPP) {
|
||||||
|
@ -772,9 +782,30 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void encoder_state_init_children_after_simulation(encoder_state_t* const state) {
|
||||||
|
kvz_bitstream_clear(&state->stream);
|
||||||
|
|
||||||
|
if (state->is_leaf) {
|
||||||
|
//Leaf states have cabac and context
|
||||||
|
kvz_cabac_start(&state->cabac);
|
||||||
|
kvz_init_contexts(state, state->encoder_control->cfg.set_qp_in_cu ? 26 : state->frame->QP, state->frame->slicetype);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; state->children[i].encoder_control; ++i) {
|
||||||
|
encoder_state_init_children_after_simulation(&state->children[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void kvz_alf_enc_process_job(void* opaque) {
|
void kvz_alf_enc_process_job(void* opaque) {
|
||||||
kvz_alf_enc_process((encoder_state_t* const)opaque);
|
encoder_state_t* const state = (encoder_state_t* const)opaque;
|
||||||
|
|
||||||
|
kvz_alf_enc_process(state);
|
||||||
|
|
||||||
|
encoder_state_t* parent = state;
|
||||||
|
while (parent->parent) parent = parent->parent;
|
||||||
|
|
||||||
|
// If ALF was used the bitstream coding was simulated in search, reset the cabac/stream
|
||||||
|
encoder_state_init_children_after_simulation(parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void encoder_state_encode_leaf(encoder_state_t * const state)
|
static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
|
@ -799,16 +830,24 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
||||||
for (int i = 0; i < state->lcu_order_count; ++i) {
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
encoder_state_worker_encode_lcu_search(&state->lcu_order[i]);
|
encoder_state_worker_encode_lcu_search(&state->lcu_order[i]);
|
||||||
|
// Without alf we can code the bitstream right after each LCU to update cabac contexts
|
||||||
|
if (encoder->cfg.alf_type == 0) {
|
||||||
|
encoder_state_worker_encode_lcu_bitstream(&state->lcu_order[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//Encode ALF
|
//Encode ALF
|
||||||
if (encoder->cfg.alf_type) {
|
if (encoder->cfg.alf_type) {
|
||||||
kvz_alf_enc_process(state);
|
kvz_alf_enc_process(state);
|
||||||
|
// If ALF was used the bitstream coding was simulated in search, reset the cabac/stream
|
||||||
|
// And write the actual bitstream
|
||||||
|
encoder_state_init_children_after_simulation(state);
|
||||||
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
|
encoder_state_worker_encode_lcu_bitstream(&state->lcu_order[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < state->lcu_order_count; ++i) {
|
|
||||||
encoder_state_worker_encode_lcu_bitstream(&state->lcu_order[i]);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Add each LCU in the wavefront row as it's own job to the queue.
|
// Add each LCU in the wavefront row as it's own job to the queue.
|
||||||
|
|
||||||
|
@ -893,25 +932,38 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add local WPP dependancy to the LCU on the left.
|
|
||||||
if (lcu->left) {
|
|
||||||
kvz_threadqueue_job_dep_add(job[0], job[-1]);
|
|
||||||
kvz_threadqueue_job_dep_add(bitstream_job[0], bitstream_job[-1]);
|
|
||||||
}
|
|
||||||
// Add local WPP dependancy to the LCU on the top.
|
|
||||||
if (lcu->above) {
|
|
||||||
kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]);
|
|
||||||
kvz_threadqueue_job_dep_add(bitstream_job[0], bitstream_job[-state->tile->frame->width_in_lcu]);
|
|
||||||
}
|
|
||||||
|
|
||||||
kvz_threadqueue_submit(state->encoder_control->threadqueue, job[0]);
|
|
||||||
|
|
||||||
if (state->encoder_control->cfg.alf_type) {
|
if (state->encoder_control->cfg.alf_type) {
|
||||||
encoder_state_t* parent = state;
|
encoder_state_t* parent = state;
|
||||||
while (parent->parent) parent = parent->parent;
|
while (parent->parent) parent = parent->parent;
|
||||||
|
|
||||||
|
// Add local WPP dependancy to the LCU on the left.
|
||||||
|
if (lcu->left) {
|
||||||
|
kvz_threadqueue_job_dep_add(job[0], job[-1]);
|
||||||
|
kvz_threadqueue_job_dep_add(bitstream_job[0], bitstream_job[-1]);
|
||||||
|
}
|
||||||
|
// Add local WPP dependancy to the LCU on the top.
|
||||||
|
if (lcu->above) {
|
||||||
|
kvz_threadqueue_job_dep_add(job[0], job[-state->tile->frame->width_in_lcu]);
|
||||||
|
kvz_threadqueue_job_dep_add(bitstream_job[0], bitstream_job[-state->tile->frame->width_in_lcu]);
|
||||||
|
}
|
||||||
|
|
||||||
|
kvz_threadqueue_submit(state->encoder_control->threadqueue, job[0]);
|
||||||
|
|
||||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], parent->tqj_alf_process);
|
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], parent->tqj_alf_process);
|
||||||
kvz_threadqueue_job_dep_add(parent->tqj_alf_process, state->tile->wf_recon_jobs[lcu->id]);
|
kvz_threadqueue_job_dep_add(parent->tqj_alf_process, state->tile->wf_recon_jobs[lcu->id]);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
// Add local WPP dependancy to the LCU on the left.
|
||||||
|
if (lcu->left) {
|
||||||
|
kvz_threadqueue_job_dep_add(job[0], bitstream_job[-1]);
|
||||||
|
}
|
||||||
|
// Add local WPP dependancy to the LCU on the top.
|
||||||
|
if (lcu->above) {
|
||||||
|
kvz_threadqueue_job_dep_add(job[0], bitstream_job[-state->tile->frame->width_in_lcu]);
|
||||||
|
}
|
||||||
|
|
||||||
|
kvz_threadqueue_submit(state->encoder_control->threadqueue, job[0]);
|
||||||
|
|
||||||
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[lcu->id]);
|
kvz_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[lcu->id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ set -eu
|
||||||
|
|
||||||
. "${0%/*}/util.sh"
|
. "${0%/*}/util.sh"
|
||||||
|
|
||||||
common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no-tmvp --no-deblock --sao=0 --alf=full --pu-depth-intra 0-4'
|
common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no-tmvp --no-deblock --sao=0 --pu-depth-intra 0-4'
|
||||||
valgrind_test $common_args --rd=1
|
valgrind_test $common_args --rd=1
|
||||||
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
|
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
|
||||||
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
|
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
|
||||||
|
|
Loading…
Reference in a new issue