Rename _DEBUG_PERF macros to KVZ_PERF

And move them to threadqueue.h, where the things that use them are.
This commit is contained in:
Ari Koivula 2015-09-14 12:34:41 +03:00
parent d70362978e
commit ec2d8d6ad7
7 changed files with 47 additions and 47 deletions

View file

@ -887,16 +887,16 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state)
} }
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
encoder_state_write_bitstream_children(state); encoder_state_write_bitstream_children(state);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type); PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type);
} }
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
// Calculate checksum // Calculate checksum
add_checksum(state); add_checksum(state);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type); PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type);
} }
//Get bitstream length for stats //Get bitstream length for stats

View file

@ -323,22 +323,22 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) {
// frame is encoded. Deblocking and SAO search is done during LCU encoding. // frame is encoded. Deblocking and SAO search is done during LCU encoding.
for (int i = 0; i < state->lcu_order_count; ++i) { for (int i = 0; i < state->lcu_order_count; ++i) {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_ENCODE_LCU); PERFORMANCE_MEASURE_START(KVZ_PERF_LCU);
encoder_state_worker_encode_lcu(&state->lcu_order[i]); encoder_state_worker_encode_lcu(&state->lcu_order[i]);
#ifdef _DEBUG #ifdef _DEBUG
{ {
const lcu_order_element_t * const lcu = &state->lcu_order[i]; const lcu_order_element_t * const lcu = &state->lcu_order[i];
PERFORMANCE_MEASURE_END(_DEBUG_PERF_ENCODE_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1); PERFORMANCE_MEASURE_END(KVZ_PERF_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
} }
#endif //_DEBUG #endif //_DEBUG
} }
if (state->encoder_control->sao_enable) { if (state->encoder_control->sao_enable) {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME); PERFORMANCE_MEASURE_START(KVZ_PERF_SAOREC);
kvz_sao_reconstruct_frame(state); kvz_sao_reconstruct_frame(state);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count-1].position.y + state->tile->lcu_offset_y, PERFORMANCE_MEASURE_END(KVZ_PERF_SAOREC, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count - 1].position.y + state->tile->lcu_offset_y,
state->tile->lcu_offset_x * LCU_WIDTH, state->tile->frame->width + state->tile->lcu_offset_x * LCU_WIDTH - 1, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->frame->width + state->tile->lcu_offset_x * LCU_WIDTH - 1,
state->tile->lcu_offset_y * LCU_WIDTH, state->tile->frame->height + state->tile->lcu_offset_y * LCU_WIDTH - 1 state->tile->lcu_offset_y * LCU_WIDTH, state->tile->frame->height + state->tile->lcu_offset_y * LCU_WIDTH - 1
); );
@ -405,9 +405,9 @@ static void encoder_state_worker_encode_children(void * opaque) {
encoder_state_encode(sub_state); encoder_state_encode(sub_state);
if (sub_state->is_leaf) { if (sub_state->is_leaf) {
if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) { if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_WRITE_BITSTREAM_LEAF); PERFORMANCE_MEASURE_START(KVZ_PERF_BSLEAF);
kvz_encoder_state_write_bitstream_leaf(sub_state); kvz_encoder_state_write_bitstream_leaf(sub_state);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_WRITE_BITSTREAM_LEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count-1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count-1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1); PERFORMANCE_MEASURE_END(KVZ_PERF_BSLEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count - 1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count - 1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count - 1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count - 1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1);
} else { } else {
threadqueue_job_t *job; threadqueue_job_t *job;
#ifdef _DEBUG #ifdef _DEBUG
@ -822,14 +822,14 @@ static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const s
void kvz_encode_one_frame(encoder_state_t * const state) void kvz_encode_one_frame(encoder_state_t * const state)
{ {
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
encoder_state_new_frame(state); encoder_state_new_frame(state);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", state->global->frame, state->global->poc); PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", state->global->frame, state->global->poc);
} }
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
encoder_state_encode(state); encoder_state_encode(state);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=encode,frame=%d", state->global->frame); PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=encode,frame=%d", state->global->frame);
} }
//kvz_threadqueue_flush(main_state->encoder_control->threadqueue); //kvz_threadqueue_flush(main_state->encoder_control->threadqueue);
{ {

View file

@ -209,15 +209,6 @@ typedef int16_t coeff_t;
#define EXP_GOLOMB_TABLE_SIZE (4096*8) #define EXP_GOLOMB_TABLE_SIZE (4096*8)
//DEBUG BITMASK
#define _DEBUG_PERF_FRAME_LEVEL 0x0001
#define _DEBUG_PERF_JOB 0x0002
#define _DEBUG_PERF_ENCODE_LCU 0x0004
#define _DEBUG_PERF_SAO_RECONSTRUCT_FRAME 0x0008
#define _DEBUG_PERF_WRITE_BITSTREAM_LEAF 0x0010
#define _DEBUG_PERF_SEARCH_CU 0x0020
#define _DEBUG_PERF_SEARCH_PIXELS 0x0040
//Constants //Constants
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_t; typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_t;
enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 }; enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 };

View file

@ -497,7 +497,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
#ifdef _DEBUG #ifdef _DEBUG
int debug_split = 0; int debug_split = 0;
#endif #endif
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_CU); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHCU);
// Stop recursion if the CU is completely outside the frame. // Stop recursion if the CU is completely outside the frame.
if (x >= frame->width || y >= frame->height) { if (x >= frame->width || y >= frame->height) {
@ -688,7 +688,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
} }
} }
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_CU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->global->frame, state->tile->id, state->slice->id, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->global->frame, state->tile->id, state->slice->id,
(state->tile->lcu_offset_x * LCU_WIDTH) + x, (state->tile->lcu_offset_x * LCU_WIDTH) + x,
(state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth), (state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth),
(state->tile->lcu_offset_y * LCU_WIDTH) + y, (state->tile->lcu_offset_y * LCU_WIDTH) + y,

View file

@ -246,14 +246,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
uint32_t bitcost; uint32_t bitcost;
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
@ -303,14 +303,14 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
uint32_t bitcost; uint32_t bitcost;
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
@ -370,7 +370,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
// Check whatever input vector we got, unless its (0, 0) which will be checked later. // Check whatever input vector we got, unless its (0, 0) which will be checked later.
if (mv.x || mv.y) if (mv.x || mv.y)
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -378,7 +378,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost); best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
@ -394,7 +394,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2; mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2; mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
uint32_t bitcost; uint32_t bitcost;
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
@ -403,7 +403,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
@ -547,7 +547,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
} }
if (!mv_in_merge_cand) { if (!mv_in_merge_cand) {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -557,7 +557,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
best_bitcost = bitcost; best_bitcost = bitcost;
best_index = num_cand; best_index = num_cand;
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
@ -571,7 +571,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2; mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2; mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -579,7 +579,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
@ -605,14 +605,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
const vector2d_t *pattern = &large_hexbs[i]; const vector2d_t *pattern = &large_hexbs[i];
unsigned cost; unsigned cost;
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
@ -648,13 +648,13 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
const vector2d_t *offset = &large_hexbs[start + i]; const vector2d_t *offset = &large_hexbs[start + i];
unsigned cost; unsigned cost;
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
@ -680,13 +680,13 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
const vector2d_t *offset = &small_hexbs[i]; const vector2d_t *offset = &small_hexbs[i];
unsigned cost; unsigned cost;
{ {
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,

View file

@ -307,7 +307,7 @@ int kvz_threadqueue_init(threadqueue_queue_t * const threadqueue, int thread_cou
static void threadqueue_free_job(threadqueue_queue_t * const threadqueue, int i) static void threadqueue_free_job(threadqueue_queue_t * const threadqueue, int i)
{ {
#ifdef _DEBUG #ifdef _DEBUG
#if _DEBUG & _DEBUG_PERF_JOB #if _DEBUG & KVZ_PERF_JOB
int j; int j;
GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue); GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue);
fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description); fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description);
@ -334,7 +334,7 @@ static void threadqueue_free_jobs(threadqueue_queue_t * const threadqueue) {
threadqueue->queue_count = 0; threadqueue->queue_count = 0;
threadqueue->queue_start = 0; threadqueue->queue_start = 0;
#ifdef _DEBUG #ifdef _DEBUG
#if _DEBUG & _DEBUG_PERF_JOB #if _DEBUG & KVZ_PERF_JOB
{ {
CLOCK_T time; CLOCK_T time;
GET_TIME(&time); GET_TIME(&time);
@ -512,9 +512,9 @@ threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * const threadque
//No lock here... this should be constant //No lock here... this should be constant
if (threadqueue->threads_count == 0) { if (threadqueue->threads_count == 0) {
//FIXME: This should be improved in order to handle dependencies //FIXME: This should be improved in order to handle dependencies
PERFORMANCE_MEASURE_START(_DEBUG_PERF_JOB); PERFORMANCE_MEASURE_START(KVZ_PERF_JOB);
fptr(arg); fptr(arg);
PERFORMANCE_MEASURE_END(_DEBUG_PERF_JOB, threadqueue, "%s", debug_description); PERFORMANCE_MEASURE_END(KVZ_PERF_JOB, threadqueue, "%s", debug_description);
return NULL; return NULL;
} }

View file

@ -119,6 +119,15 @@ int kvz_threadqueue_finalize(threadqueue_queue_t * threadqueue);
#ifdef _DEBUG #ifdef _DEBUG
int threadqueue_log(threadqueue_queue_t * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description); int threadqueue_log(threadqueue_queue_t * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description);
// Bitmasks for PERFORMANCE_MEASURE_START and PERFORMANCE_MEASURE_END.
#define KVZ_PERF_FRAME (1 << 0)
#define KVZ_PERF_JOB (1 << 1)
#define KVZ_PERF_LCU (1 << 2)
#define KVZ_PERF_SAOREC (1 << 3)
#define KVZ_PERF_BSLEAF (1 << 4)
#define KVZ_PERF_SEARCHCU (1 << 5)
#define KVZ_PERF_SEARCHPX (1 << 6)
#define IMPL_PERFORMANCE_MEASURE_START(mask) CLOCK_T start, stop; if ((_DEBUG) & mask) { GET_TIME(&start); } #define IMPL_PERFORMANCE_MEASURE_START(mask) CLOCK_T start, stop; if ((_DEBUG) & mask) { GET_TIME(&start); }
#define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((_DEBUG) & mask) { GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \ #define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((_DEBUG) & mask) { GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \