mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Merge branch 'threadqueue-rewrite'
This commit is contained in:
commit
4e2e0a76cc
|
@ -286,11 +286,8 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
encoder->threadqueue = MALLOC(threadqueue_queue_t, 1);
|
encoder->threadqueue = kvz_threadqueue_init(encoder->cfg.threads);
|
||||||
if (!encoder->threadqueue ||
|
if (!encoder->threadqueue) {
|
||||||
!kvz_threadqueue_init(encoder->threadqueue,
|
|
||||||
encoder->cfg.threads,
|
|
||||||
encoder->cfg.owf > 0)) {
|
|
||||||
fprintf(stderr, "Could not initialize threadqueue.\n");
|
fprintf(stderr, "Could not initialize threadqueue.\n");
|
||||||
goto init_failed;
|
goto init_failed;
|
||||||
}
|
}
|
||||||
|
@ -653,10 +650,8 @@ void kvz_encoder_control_free(encoder_control_t *const encoder)
|
||||||
|
|
||||||
kvz_scalinglist_destroy(&encoder->scaling_list);
|
kvz_scalinglist_destroy(&encoder->scaling_list);
|
||||||
|
|
||||||
if (encoder->threadqueue) {
|
kvz_threadqueue_free(encoder->threadqueue);
|
||||||
kvz_threadqueue_finalize(encoder->threadqueue);
|
encoder->threadqueue = NULL;
|
||||||
}
|
|
||||||
FREE_POINTER(encoder->threadqueue);
|
|
||||||
|
|
||||||
free(encoder);
|
free(encoder);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1018,17 +1018,11 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state)
|
||||||
kvz_bitstream_add_rbsp_trailing_bits(stream);
|
kvz_bitstream_add_rbsp_trailing_bits(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
|
||||||
encoder_state_write_bitstream_children(state);
|
encoder_state_write_bitstream_children(state);
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->frame->num, state->type);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state->encoder_control->cfg.hash != KVZ_HASH_NONE) {
|
if (state->encoder_control->cfg.hash != KVZ_HASH_NONE) {
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
|
||||||
// Calculate checksum
|
// Calculate checksum
|
||||||
add_checksum(state);
|
add_checksum(state);
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->frame->num, state->type);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get bitstream length for stats
|
//Get bitstream length for stats
|
||||||
|
|
|
@ -725,16 +725,7 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
||||||
|
|
||||||
for (int i = 0; i < state->lcu_order_count; ++i) {
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_LCU);
|
|
||||||
|
|
||||||
encoder_state_worker_encode_lcu(&state->lcu_order[i]);
|
encoder_state_worker_encode_lcu(&state->lcu_order[i]);
|
||||||
|
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
{
|
|
||||||
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_LCU, ctrl->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->frame->num, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
|
|
||||||
}
|
|
||||||
#endif //KVZ_DEBUG
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Add each LCU in the wavefront row as it's own job to the queue.
|
// Add each LCU in the wavefront row as it's own job to the queue.
|
||||||
|
@ -769,14 +760,8 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
for (int i = 0; i < state->lcu_order_count; ++i) {
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
||||||
|
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
char job_description[256];
|
|
||||||
sprintf(job_description, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->frame->num, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
|
|
||||||
#else
|
|
||||||
char* job_description = NULL;
|
|
||||||
#endif
|
|
||||||
kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]);
|
kvz_threadqueue_free_job(&state->tile->wf_jobs[lcu->id]);
|
||||||
state->tile->wf_jobs[lcu->id] = kvz_threadqueue_submit(ctrl->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description);
|
state->tile->wf_jobs[lcu->id] = kvz_threadqueue_job_create(encoder_state_worker_encode_lcu, (void*)lcu);
|
||||||
threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id];
|
threadqueue_job_t **job = &state->tile->wf_jobs[lcu->id];
|
||||||
|
|
||||||
// If job object was returned, add dependancies and allow it to run.
|
// If job object was returned, add dependancies and allow it to run.
|
||||||
|
@ -814,7 +799,7 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kvz_threadqueue_job_unwait_job(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
|
kvz_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
|
||||||
|
|
||||||
// The wavefront row is done when the last LCU in the row is done.
|
// The wavefront row is done when the last LCU in the row is done.
|
||||||
if (i + 1 == state->lcu_order_count) {
|
if (i + 1 == state->lcu_order_count) {
|
||||||
|
@ -911,27 +896,13 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
|
||||||
for (int i = 0; main_state->children[i].encoder_control; ++i) {
|
for (int i = 0; main_state->children[i].encoder_control; ++i) {
|
||||||
//If we don't have wavefronts, parallelize encoding of children.
|
//If we don't have wavefronts, parallelize encoding of children.
|
||||||
if (main_state->children[i].type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
if (main_state->children[i].type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
char job_description[256];
|
|
||||||
switch (main_state->children[i].type) {
|
|
||||||
case ENCODER_STATE_TYPE_TILE:
|
|
||||||
sprintf(job_description, "type=encode_child,frame=%d,tile=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", main_state->children[i].frame->num, main_state->children[i].tile->id, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y,
|
|
||||||
main_state->children[i].lcu_order[0].position_px.x + main_state->children[i].tile->lcu_offset_x * LCU_WIDTH, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position_px.x + main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].size.x + main_state->children[i].tile->lcu_offset_x * LCU_WIDTH - 1,
|
|
||||||
main_state->children[i].lcu_order[0].position_px.y + main_state->children[i].tile->lcu_offset_y * LCU_WIDTH, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position_px.y + main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].size.y + main_state->children[i].tile->lcu_offset_y * LCU_WIDTH - 1);
|
|
||||||
break;
|
|
||||||
case ENCODER_STATE_TYPE_SLICE:
|
|
||||||
sprintf(job_description, "type=encode_child,frame=%d,slice=%d,start_in_ts=%d", main_state->children[i].frame->num, main_state->children[i].slice->id, main_state->children[i].slice->start_in_ts);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
sprintf(job_description, "type=encode_child,frame=%d,invalid", main_state->children[i].frame->num);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
char* job_description = NULL;
|
|
||||||
#endif
|
|
||||||
kvz_threadqueue_free_job(&main_state->children[i].tqj_recon_done);
|
kvz_threadqueue_free_job(&main_state->children[i].tqj_recon_done);
|
||||||
main_state->children[i].tqj_recon_done = kvz_threadqueue_submit(main_state->encoder_control->threadqueue, encoder_state_worker_encode_children, &(main_state->children[i]), 1, job_description);
|
main_state->children[i].tqj_recon_done =
|
||||||
if (main_state->children[i].previous_encoder_state != &main_state->children[i] && main_state->children[i].previous_encoder_state->tqj_recon_done && !main_state->children[i].frame->is_idr_frame) {
|
kvz_threadqueue_job_create(encoder_state_worker_encode_children, &main_state->children[i]);
|
||||||
|
if (main_state->children[i].previous_encoder_state != &main_state->children[i] &&
|
||||||
|
main_state->children[i].previous_encoder_state->tqj_recon_done &&
|
||||||
|
!main_state->children[i].frame->is_idr_frame)
|
||||||
|
{
|
||||||
#if 0
|
#if 0
|
||||||
// Disabled due to non-determinism.
|
// Disabled due to non-determinism.
|
||||||
if (main_state->encoder_control->cfg->mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN)
|
if (main_state->encoder_control->cfg->mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN)
|
||||||
|
@ -947,7 +918,7 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
kvz_threadqueue_job_unwait_job(main_state->encoder_control->threadqueue, main_state->children[i].tqj_recon_done);
|
kvz_threadqueue_submit(main_state->encoder_control->threadqueue, main_state->children[i].tqj_recon_done);
|
||||||
} else {
|
} else {
|
||||||
//Wavefront rows have parallelism at LCU level, so we should not launch multiple threads here!
|
//Wavefront rows have parallelism at LCU level, so we should not launch multiple threads here!
|
||||||
//FIXME: add an assert: we can only have wavefront children
|
//FIXME: add an assert: we can only have wavefront children
|
||||||
|
@ -1256,39 +1227,22 @@ static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const s
|
||||||
|
|
||||||
void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
|
void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
|
||||||
{
|
{
|
||||||
{
|
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
|
||||||
encoder_state_init_new_frame(state, frame);
|
encoder_state_init_new_frame(state, frame);
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=init_new_frame,frame=%d,poc=%d", state->frame->num, state->frame->poc);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
|
||||||
encoder_state_encode(state);
|
encoder_state_encode(state);
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=encode,frame=%d", state->frame->num);
|
|
||||||
}
|
|
||||||
//kvz_threadqueue_flush(main_state->encoder_control->threadqueue);
|
|
||||||
{
|
|
||||||
threadqueue_job_t *job;
|
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
char job_description[256];
|
|
||||||
sprintf(job_description, "type=write_bitstream,frame=%d", state->frame->num);
|
|
||||||
#else
|
|
||||||
char* job_description = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
job = kvz_threadqueue_submit(state->encoder_control->threadqueue, kvz_encoder_state_worker_write_bitstream, (void*) state, 1, job_description);
|
threadqueue_job_t *job =
|
||||||
|
kvz_threadqueue_job_create(kvz_encoder_state_worker_write_bitstream, state);
|
||||||
|
|
||||||
_encode_one_frame_add_bitstream_deps(state, job);
|
_encode_one_frame_add_bitstream_deps(state, job);
|
||||||
if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_bitstream_written) {
|
if (state->previous_encoder_state != state && state->previous_encoder_state->tqj_bitstream_written) {
|
||||||
//We need to depend on previous bitstream generation
|
//We need to depend on previous bitstream generation
|
||||||
kvz_threadqueue_job_dep_add(job, state->previous_encoder_state->tqj_bitstream_written);
|
kvz_threadqueue_job_dep_add(job, state->previous_encoder_state->tqj_bitstream_written);
|
||||||
}
|
}
|
||||||
kvz_threadqueue_job_unwait_job(state->encoder_control->threadqueue, job);
|
kvz_threadqueue_submit(state->encoder_control->threadqueue, job);
|
||||||
assert(!state->tqj_bitstream_written);
|
assert(!state->tqj_bitstream_written);
|
||||||
state->tqj_bitstream_written = job;
|
state->tqj_bitstream_written = job;
|
||||||
}
|
|
||||||
state->frame->done = 0;
|
state->frame->done = 0;
|
||||||
//kvz_threadqueue_flush(main_state->encoder_control->threadqueue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
11
src/search.c
11
src/search.c
|
@ -399,10 +399,6 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
|
|
||||||
int x_local = SUB_SCU(x);
|
int x_local = SUB_SCU(x);
|
||||||
int y_local = SUB_SCU(y);
|
int y_local = SUB_SCU(y);
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
int debug_split = 0;
|
|
||||||
#endif
|
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHCU);
|
|
||||||
|
|
||||||
// Stop recursion if the CU is completely outside the frame.
|
// Stop recursion if the CU is completely outside the frame.
|
||||||
if (x >= frame->width || y >= frame->height) {
|
if (x >= frame->width || y >= frame->height) {
|
||||||
|
@ -713,13 +709,6 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->frame->num, state->tile->id, state->slice->id,
|
|
||||||
state->tile->offset_x + x,
|
|
||||||
state->tile->offset_x + x + cu_width,
|
|
||||||
state->tile->offset_y + y,
|
|
||||||
state->tile->offset_y + y + cu_width,
|
|
||||||
depth, debug_split, (cur_cu->type == CU_INTRA) ? 1 : 0);
|
|
||||||
|
|
||||||
assert(cur_cu->type != CU_NOTSET);
|
assert(cur_cu->type != CU_NOTSET);
|
||||||
|
|
||||||
return cost;
|
return cost;
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -30,151 +30,22 @@
|
||||||
|
|
||||||
#include "global.h" // IWYU pragma: keep
|
#include "global.h" // IWYU pragma: keep
|
||||||
|
|
||||||
typedef enum {
|
typedef struct threadqueue_job_t threadqueue_job_t;
|
||||||
THREADQUEUE_JOB_STATE_QUEUED = 0,
|
typedef struct threadqueue_queue_t threadqueue_queue_t;
|
||||||
THREADQUEUE_JOB_STATE_RUNNING = 1,
|
|
||||||
THREADQUEUE_JOB_STATE_DONE = 2
|
|
||||||
} threadqueue_job_state;
|
|
||||||
|
|
||||||
typedef struct threadqueue_job_t {
|
threadqueue_queue_t * kvz_threadqueue_init(int thread_count);
|
||||||
pthread_mutex_t lock;
|
|
||||||
|
|
||||||
threadqueue_job_state state;
|
threadqueue_job_t * kvz_threadqueue_job_create(void (*fptr)(void *arg), void *arg);
|
||||||
|
int kvz_threadqueue_submit(threadqueue_queue_t * threadqueue, threadqueue_job_t *job);
|
||||||
|
|
||||||
unsigned int ndepends; //Number of active dependencies that this job wait for
|
int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *dependency);
|
||||||
|
|
||||||
struct threadqueue_job_t **rdepends; //array of pointer to jobs that depend on this one. They have to exist when the thread finishes, because they cannot be run before.
|
|
||||||
unsigned int rdepends_count; //number of rdepends
|
|
||||||
unsigned int rdepends_size; //allocated size of rdepends
|
|
||||||
|
|
||||||
// Reference count
|
|
||||||
int refcount;
|
|
||||||
|
|
||||||
//Job function and state to use
|
|
||||||
void (*fptr)(void *arg);
|
|
||||||
void *arg;
|
|
||||||
|
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
const char* debug_description;
|
|
||||||
|
|
||||||
int debug_worker_id;
|
|
||||||
|
|
||||||
KVZ_CLOCK_T debug_clock_enqueue;
|
|
||||||
KVZ_CLOCK_T debug_clock_start;
|
|
||||||
KVZ_CLOCK_T debug_clock_stop;
|
|
||||||
KVZ_CLOCK_T debug_clock_dequeue;
|
|
||||||
#endif
|
|
||||||
} threadqueue_job_t;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
pthread_mutex_t lock;
|
|
||||||
pthread_cond_t cond;
|
|
||||||
pthread_cond_t cb_cond;
|
|
||||||
|
|
||||||
pthread_t *threads;
|
|
||||||
int threads_count;
|
|
||||||
int threads_running;
|
|
||||||
|
|
||||||
bool stop; // if true, threads should stop asap
|
|
||||||
|
|
||||||
int fifo;
|
|
||||||
|
|
||||||
threadqueue_job_t **queue;
|
|
||||||
unsigned int queue_start;
|
|
||||||
unsigned int queue_count;
|
|
||||||
unsigned int queue_size;
|
|
||||||
unsigned int queue_waiting_execution; //Number of jobs without any dependency which could be run
|
|
||||||
unsigned int queue_waiting_dependency; //Number of jobs waiting for a dependency to complete
|
|
||||||
unsigned int queue_running; //Number of jobs running
|
|
||||||
|
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
//Format: pointer <tab> worker id <tab> time enqueued <tab> time started <tab> time stopped <tab> time dequeued <tab> job description
|
|
||||||
//For threads, pointer = "" and job description == "thread", time enqueued and time dequeued are equal to "-"
|
|
||||||
//For flush, pointer = "" and job description == "FLUSH", time enqueued, time dequeued and time started are equal to "-"
|
|
||||||
//Each time field, except the first one in the line be expressed in a relative way, by prepending the number of seconds by +.
|
|
||||||
//Dependencies: pointer -> pointer
|
|
||||||
|
|
||||||
FILE *debug_log;
|
|
||||||
|
|
||||||
KVZ_CLOCK_T *debug_clock_thread_start;
|
|
||||||
KVZ_CLOCK_T *debug_clock_thread_end;
|
|
||||||
#endif
|
|
||||||
} threadqueue_queue_t;
|
|
||||||
|
|
||||||
//Init a threadqueue (if fifo, then behave as a FIFO with dependencies, otherwise as a LIFO with dependencies)
|
|
||||||
int kvz_threadqueue_init(threadqueue_queue_t * threadqueue, int thread_count, int fifo);
|
|
||||||
|
|
||||||
//Add a job to the queue, and returs a threadqueue_job handle. If wait == 1, one has to run kvz_threadqueue_job_unwait_job in order to have it run
|
|
||||||
threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * threadqueue, void (*fptr)(void *arg), void *arg, int wait, const char* debug_description);
|
|
||||||
|
|
||||||
void kvz_threadqueue_free_job(threadqueue_job_t **job_ptr);
|
|
||||||
|
|
||||||
threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job);
|
threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job);
|
||||||
|
|
||||||
int kvz_threadqueue_job_unwait_job(threadqueue_queue_t * threadqueue, threadqueue_job_t *job);
|
void kvz_threadqueue_free_job(threadqueue_job_t **job_ptr);
|
||||||
|
|
||||||
//Add a dependency between two jobs.
|
|
||||||
int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *depends_on);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Stop all threads after they finish the current jobs.
|
|
||||||
*
|
|
||||||
* Blocks until all threads have stopped.
|
|
||||||
*/
|
|
||||||
int kvz_threadqueue_stop(threadqueue_queue_t * const threadqueue);
|
|
||||||
|
|
||||||
//Blocking call until job is executed. Job handles submitted before job should not be used any more as they are removed from the queue.
|
|
||||||
int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job);
|
int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job);
|
||||||
|
int kvz_threadqueue_stop(threadqueue_queue_t * threadqueue);
|
||||||
//Free ressources in a threadqueue
|
void kvz_threadqueue_free(threadqueue_queue_t * threadqueue);
|
||||||
int kvz_threadqueue_finalize(threadqueue_queue_t * threadqueue);
|
|
||||||
|
|
||||||
#ifdef KVZ_DEBUG
|
|
||||||
int threadqueue_log(threadqueue_queue_t * threadqueue, const KVZ_CLOCK_T *start, const KVZ_CLOCK_T *stop, const char* debug_description);
|
|
||||||
|
|
||||||
// Bitmasks for PERFORMANCE_MEASURE_START and PERFORMANCE_MEASURE_END.
|
|
||||||
#define KVZ_PERF_FRAME (1 << 0)
|
|
||||||
#define KVZ_PERF_JOB (1 << 1)
|
|
||||||
#define KVZ_PERF_LCU (1 << 2)
|
|
||||||
#define KVZ_PERF_SAOREC (1 << 3)
|
|
||||||
#define KVZ_PERF_BSLEAF (1 << 4)
|
|
||||||
#define KVZ_PERF_SEARCHCU (1 << 5)
|
|
||||||
|
|
||||||
#define IMPL_PERFORMANCE_MEASURE_START(mask) KVZ_CLOCK_T start, stop; if ((KVZ_DEBUG) & mask) { KVZ_GET_TIME(&start); }
|
|
||||||
#define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((KVZ_DEBUG) & mask) { KVZ_GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
// Disable VS conditional expression warning from debug code.
|
|
||||||
# define WITHOUT_CONSTANT_EXP_WARNING(macro) \
|
|
||||||
__pragma(warning(push)) \
|
|
||||||
__pragma(warning(disable:4127)) \
|
|
||||||
macro \
|
|
||||||
__pragma(warning(pop))
|
|
||||||
# define PERFORMANCE_MEASURE_START(mask) \
|
|
||||||
WITHOUT_CONSTANT_EXP_WARNING(IMPL_PERFORMANCE_MEASURE_START(mask))
|
|
||||||
# define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) \
|
|
||||||
WITHOUT_CONSTANT_EXP_WARNING(IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ##__VA_ARGS__))
|
|
||||||
#else
|
|
||||||
# define PERFORMANCE_MEASURE_START(mask) \
|
|
||||||
IMPL_PERFORMANCE_MEASURE_START(mask)
|
|
||||||
# define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) \
|
|
||||||
IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ##__VA_ARGS__)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
#define PERFORMANCE_MEASURE_START(mask)
|
|
||||||
#define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Constraints:
|
|
||||||
*
|
|
||||||
* - Always first lock threadqueue, than a job inside it
|
|
||||||
* - When job A depends on job B, always lock first job B and then job A
|
|
||||||
* - Jobs should be submitted in an order which is compatible with serial execution.
|
|
||||||
*
|
|
||||||
* */
|
|
||||||
|
|
||||||
#endif // THREADQUEUE_H_
|
#endif // THREADQUEUE_H_
|
||||||
|
|
|
@ -52,7 +52,7 @@ SUITE(coeff_sum_tests)
|
||||||
{
|
{
|
||||||
setup();
|
setup();
|
||||||
|
|
||||||
for (int i = 0; i < strategies.count; ++i) {
|
for (volatile int i = 0; i < strategies.count; ++i) {
|
||||||
if (strcmp(strategies.strategies[i].type, "coeff_abs_sum") != 0) {
|
if (strcmp(strategies.strategies[i].type, "coeff_abs_sum") != 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,7 +177,7 @@ SUITE(intra_sad_tests)
|
||||||
|
|
||||||
// Loop through all strategies picking out the intra sad ones and run
|
// Loop through all strategies picking out the intra sad ones and run
|
||||||
// selectec strategies though all tests.
|
// selectec strategies though all tests.
|
||||||
for (unsigned i = 0; i < strategies.count; ++i) {
|
for (volatile unsigned i = 0; i < strategies.count; ++i) {
|
||||||
const char * type = strategies.strategies[i].type;
|
const char * type = strategies.strategies[i].type;
|
||||||
|
|
||||||
if (strcmp(type, "sad_4x4") == 0) {
|
if (strcmp(type, "sad_4x4") == 0) {
|
||||||
|
|
|
@ -378,7 +378,7 @@ SUITE(sad_tests)
|
||||||
sad_test_env.tested_func = strategies.strategies[i].fptr;
|
sad_test_env.tested_func = strategies.strategies[i].fptr;
|
||||||
sad_test_env.strategy = &strategies.strategies[i];
|
sad_test_env.strategy = &strategies.strategies[i];
|
||||||
int num_dim_tests = sizeof(tested_dims) / sizeof(tested_dims[0]);
|
int num_dim_tests = sizeof(tested_dims) / sizeof(tested_dims[0]);
|
||||||
for (int dim_test = 0; dim_test < num_dim_tests; ++dim_test) {
|
for (volatile int dim_test = 0; dim_test < num_dim_tests; ++dim_test) {
|
||||||
sad_test_env.width = tested_dims[dim_test].width;
|
sad_test_env.width = tested_dims[dim_test].width;
|
||||||
sad_test_env.height = tested_dims[dim_test].height;
|
sad_test_env.height = tested_dims[dim_test].height;
|
||||||
RUN_TEST(test_reg_sad);
|
RUN_TEST(test_reg_sad);
|
||||||
|
|
|
@ -167,7 +167,7 @@ SUITE(satd_tests)
|
||||||
|
|
||||||
// Loop through all strategies picking out the intra sad ones and run
|
// Loop through all strategies picking out the intra sad ones and run
|
||||||
// selectec strategies though all tests.
|
// selectec strategies though all tests.
|
||||||
for (unsigned i = 0; i < strategies.count; ++i) {
|
for (volatile unsigned i = 0; i < strategies.count; ++i) {
|
||||||
const char * type = strategies.strategies[i].type;
|
const char * type = strategies.strategies[i].type;
|
||||||
|
|
||||||
if (strcmp(type, "satd_4x4") == 0) {
|
if (strcmp(type, "satd_4x4") == 0) {
|
||||||
|
|
|
@ -405,7 +405,7 @@ SUITE(speed_tests)
|
||||||
int num_tested_dims = sizeof(tested_dims) / sizeof(*tested_dims);
|
int num_tested_dims = sizeof(tested_dims) / sizeof(*tested_dims);
|
||||||
|
|
||||||
// Call reg_sad with all the sizes it is actually called with.
|
// Call reg_sad with all the sizes it is actually called with.
|
||||||
for (int dim_i = 0; dim_i < num_tested_dims; ++dim_i) {
|
for (volatile int dim_i = 0; dim_i < num_tested_dims; ++dim_i) {
|
||||||
test_env.width = tested_dims[dim_i].x;
|
test_env.width = tested_dims[dim_i].x;
|
||||||
test_env.height = tested_dims[dim_i].y;
|
test_env.height = tested_dims[dim_i].y;
|
||||||
RUN_TEST(inter_sad);
|
RUN_TEST(inter_sad);
|
||||||
|
|
Loading…
Reference in a new issue