From ec2d8d6ad7ab0c90e41cf2ebba0fad52d14dd5f2 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Mon, 14 Sep 2015 12:34:41 +0300 Subject: [PATCH] Rename _DEBUG_PERF macros to KVZ_PERF And move them to threadqueue.h, where the things that use them are. --- src/encoder_state-bitstream.c | 8 ++++---- src/encoderstate.c | 20 +++++++++---------- src/global.h | 9 --------- src/search.c | 4 ++-- src/search_inter.c | 36 +++++++++++++++++------------------ src/threadqueue.c | 8 ++++---- src/threadqueue.h | 9 +++++++++ 7 files changed, 47 insertions(+), 47 deletions(-) diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index ca32a05b..e033fc0c 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -887,16 +887,16 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state) } { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); + PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); encoder_state_write_bitstream_children(state); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type); + PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type); } { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); + PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); // Calculate checksum add_checksum(state); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type); + PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type); } //Get bitstream length for stats diff --git a/src/encoderstate.c b/src/encoderstate.c index 1628a61f..ff08603e 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -323,22 +323,22 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) { // frame is encoded. Deblocking and SAO search is done during LCU encoding. for (int i = 0; i < state->lcu_order_count; ++i) { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_ENCODE_LCU); + PERFORMANCE_MEASURE_START(KVZ_PERF_LCU); encoder_state_worker_encode_lcu(&state->lcu_order[i]); #ifdef _DEBUG { const lcu_order_element_t * const lcu = &state->lcu_order[i]; - PERFORMANCE_MEASURE_END(_DEBUG_PERF_ENCODE_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1); + PERFORMANCE_MEASURE_END(KVZ_PERF_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1); } #endif //_DEBUG } if (state->encoder_control->sao_enable) { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME); + PERFORMANCE_MEASURE_START(KVZ_PERF_SAOREC); kvz_sao_reconstruct_frame(state); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count-1].position.y + state->tile->lcu_offset_y, + PERFORMANCE_MEASURE_END(KVZ_PERF_SAOREC, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count - 1].position.y + state->tile->lcu_offset_y, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->frame->width + state->tile->lcu_offset_x * LCU_WIDTH - 1, state->tile->lcu_offset_y * LCU_WIDTH, state->tile->frame->height + state->tile->lcu_offset_y * LCU_WIDTH - 1 ); @@ -405,9 +405,9 @@ static void encoder_state_worker_encode_children(void * opaque) { encoder_state_encode(sub_state); if (sub_state->is_leaf) { if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_WRITE_BITSTREAM_LEAF); + PERFORMANCE_MEASURE_START(KVZ_PERF_BSLEAF); kvz_encoder_state_write_bitstream_leaf(sub_state); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_WRITE_BITSTREAM_LEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count-1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count-1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1); + PERFORMANCE_MEASURE_END(KVZ_PERF_BSLEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count - 1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count - 1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count - 1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count - 1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1); } else { threadqueue_job_t *job; #ifdef _DEBUG @@ -822,14 +822,14 @@ static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const s void kvz_encode_one_frame(encoder_state_t * const state) { { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); + PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); encoder_state_new_frame(state); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", state->global->frame, state->global->poc); + PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", state->global->frame, state->global->poc); } { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL); + PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); encoder_state_encode(state); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=encode,frame=%d", state->global->frame); + PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=encode,frame=%d", state->global->frame); } //kvz_threadqueue_flush(main_state->encoder_control->threadqueue); { diff --git a/src/global.h b/src/global.h index 0fdb565c..a741d7da 100644 --- a/src/global.h +++ b/src/global.h @@ -209,15 +209,6 @@ typedef int16_t coeff_t; #define EXP_GOLOMB_TABLE_SIZE (4096*8) -//DEBUG BITMASK -#define _DEBUG_PERF_FRAME_LEVEL 0x0001 -#define _DEBUG_PERF_JOB 0x0002 -#define _DEBUG_PERF_ENCODE_LCU 0x0004 -#define _DEBUG_PERF_SAO_RECONSTRUCT_FRAME 0x0008 -#define _DEBUG_PERF_WRITE_BITSTREAM_LEAF 0x0010 -#define _DEBUG_PERF_SEARCH_CU 0x0020 -#define _DEBUG_PERF_SEARCH_PIXELS 0x0040 - //Constants typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_t; enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 }; diff --git a/src/search.c b/src/search.c index db60fa6a..255807d3 100644 --- a/src/search.c +++ b/src/search.c @@ -497,7 +497,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, #ifdef _DEBUG int debug_split = 0; #endif - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_CU); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHCU); // Stop recursion if the CU is completely outside the frame. if (x >= frame->width || y >= frame->height) { @@ -688,7 +688,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, } } - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_CU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->global->frame, state->tile->id, state->slice->id, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->global->frame, state->tile->id, state->slice->id, (state->tile->lcu_offset_x * LCU_WIDTH) + x, (state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth), (state->tile->lcu_offset_y * LCU_WIDTH) + y, diff --git a/src/search_inter.c b/src/search_inter.c index 935e1914..bb89909d 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -246,14 +246,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi uint32_t bitcost; { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, @@ -303,14 +303,14 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic uint32_t bitcost; { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, @@ -370,7 +370,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, // Check whatever input vector we got, unless its (0, 0) which will be checked later. if (mv.x || mv.y) { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -378,7 +378,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, block_width, block_width, max_lcu_below); best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, @@ -394,7 +394,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2; mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2; - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); uint32_t bitcost; unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, @@ -403,7 +403,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, @@ -547,7 +547,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep } if (!mv_in_merge_cand) { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -557,7 +557,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep best_bitcost = bitcost; best_index = num_cand; - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, @@ -571,7 +571,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2; mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2; - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -579,7 +579,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, @@ -605,14 +605,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep const vector2d_t *pattern = &large_hexbs[i]; unsigned cost; { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, @@ -648,13 +648,13 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep const vector2d_t *offset = &large_hexbs[start + i]; unsigned cost; { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, @@ -680,13 +680,13 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep const vector2d_t *offset = &small_hexbs[i]; unsigned cost; { - PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS); + PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX); cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width, max_lcu_below); cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, + PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, diff --git a/src/threadqueue.c b/src/threadqueue.c index ca03dabe..cd26845c 100644 --- a/src/threadqueue.c +++ b/src/threadqueue.c @@ -307,7 +307,7 @@ int kvz_threadqueue_init(threadqueue_queue_t * const threadqueue, int thread_cou static void threadqueue_free_job(threadqueue_queue_t * const threadqueue, int i) { #ifdef _DEBUG -#if _DEBUG & _DEBUG_PERF_JOB +#if _DEBUG & KVZ_PERF_JOB int j; GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue); fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description); @@ -334,7 +334,7 @@ static void threadqueue_free_jobs(threadqueue_queue_t * const threadqueue) { threadqueue->queue_count = 0; threadqueue->queue_start = 0; #ifdef _DEBUG -#if _DEBUG & _DEBUG_PERF_JOB +#if _DEBUG & KVZ_PERF_JOB { CLOCK_T time; GET_TIME(&time); @@ -512,9 +512,9 @@ threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * const threadque //No lock here... this should be constant if (threadqueue->threads_count == 0) { //FIXME: This should be improved in order to handle dependencies - PERFORMANCE_MEASURE_START(_DEBUG_PERF_JOB); + PERFORMANCE_MEASURE_START(KVZ_PERF_JOB); fptr(arg); - PERFORMANCE_MEASURE_END(_DEBUG_PERF_JOB, threadqueue, "%s", debug_description); + PERFORMANCE_MEASURE_END(KVZ_PERF_JOB, threadqueue, "%s", debug_description); return NULL; } diff --git a/src/threadqueue.h b/src/threadqueue.h index d3bcb4b4..71f9540c 100644 --- a/src/threadqueue.h +++ b/src/threadqueue.h @@ -119,6 +119,15 @@ int kvz_threadqueue_finalize(threadqueue_queue_t * threadqueue); #ifdef _DEBUG int threadqueue_log(threadqueue_queue_t * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description); +// Bitmasks for PERFORMANCE_MEASURE_START and PERFORMANCE_MEASURE_END. +#define KVZ_PERF_FRAME (1 << 0) +#define KVZ_PERF_JOB (1 << 1) +#define KVZ_PERF_LCU (1 << 2) +#define KVZ_PERF_SAOREC (1 << 3) +#define KVZ_PERF_BSLEAF (1 << 4) +#define KVZ_PERF_SEARCHCU (1 << 5) +#define KVZ_PERF_SEARCHPX (1 << 6) + #define IMPL_PERFORMANCE_MEASURE_START(mask) CLOCK_T start, stop; if ((_DEBUG) & mask) { GET_TIME(&start); } #define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((_DEBUG) & mask) { GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \