mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Rename _DEBUG_PERF macros to KVZ_PERF
And move them to threadqueue.h, where the things that use them are.
This commit is contained in:
parent
d70362978e
commit
ec2d8d6ad7
|
@ -887,16 +887,16 @@ static void encoder_state_write_bitstream_main(encoder_state_t * const state)
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
||||||
encoder_state_write_bitstream_children(state);
|
encoder_state_write_bitstream_children(state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->global->frame, state->type);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
||||||
// Calculate checksum
|
// Calculate checksum
|
||||||
add_checksum(state);
|
add_checksum(state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->global->frame, state->type);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get bitstream length for stats
|
//Get bitstream length for stats
|
||||||
|
|
|
@ -323,22 +323,22 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) {
|
||||||
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
// frame is encoded. Deblocking and SAO search is done during LCU encoding.
|
||||||
|
|
||||||
for (int i = 0; i < state->lcu_order_count; ++i) {
|
for (int i = 0; i < state->lcu_order_count; ++i) {
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_ENCODE_LCU);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_LCU);
|
||||||
|
|
||||||
encoder_state_worker_encode_lcu(&state->lcu_order[i]);
|
encoder_state_worker_encode_lcu(&state->lcu_order[i]);
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
{
|
{
|
||||||
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
const lcu_order_element_t * const lcu = &state->lcu_order[i];
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_ENCODE_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_LCU, state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
|
||||||
}
|
}
|
||||||
#endif //_DEBUG
|
#endif //_DEBUG
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state->encoder_control->sao_enable) {
|
if (state->encoder_control->sao_enable) {
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SAOREC);
|
||||||
kvz_sao_reconstruct_frame(state);
|
kvz_sao_reconstruct_frame(state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count-1].position.y + state->tile->lcu_offset_y,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SAOREC, state->encoder_control->threadqueue, "type=kvz_sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", state->global->frame, state->tile->id, state->slice->id, state->lcu_order[0].position.y + state->tile->lcu_offset_y, state->lcu_order[state->lcu_order_count - 1].position.y + state->tile->lcu_offset_y,
|
||||||
state->tile->lcu_offset_x * LCU_WIDTH, state->tile->frame->width + state->tile->lcu_offset_x * LCU_WIDTH - 1,
|
state->tile->lcu_offset_x * LCU_WIDTH, state->tile->frame->width + state->tile->lcu_offset_x * LCU_WIDTH - 1,
|
||||||
state->tile->lcu_offset_y * LCU_WIDTH, state->tile->frame->height + state->tile->lcu_offset_y * LCU_WIDTH - 1
|
state->tile->lcu_offset_y * LCU_WIDTH, state->tile->frame->height + state->tile->lcu_offset_y * LCU_WIDTH - 1
|
||||||
);
|
);
|
||||||
|
@ -405,9 +405,9 @@ static void encoder_state_worker_encode_children(void * opaque) {
|
||||||
encoder_state_encode(sub_state);
|
encoder_state_encode(sub_state);
|
||||||
if (sub_state->is_leaf) {
|
if (sub_state->is_leaf) {
|
||||||
if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_WRITE_BITSTREAM_LEAF);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_BSLEAF);
|
||||||
kvz_encoder_state_write_bitstream_leaf(sub_state);
|
kvz_encoder_state_write_bitstream_leaf(sub_state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_WRITE_BITSTREAM_LEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count-1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count-1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_BSLEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count - 1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count - 1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count - 1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count - 1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1);
|
||||||
} else {
|
} else {
|
||||||
threadqueue_job_t *job;
|
threadqueue_job_t *job;
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
|
@ -822,14 +822,14 @@ static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const s
|
||||||
void kvz_encode_one_frame(encoder_state_t * const state)
|
void kvz_encode_one_frame(encoder_state_t * const state)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
||||||
encoder_state_new_frame(state);
|
encoder_state_new_frame(state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", state->global->frame, state->global->poc);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", state->global->frame, state->global->poc);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
||||||
encoder_state_encode(state);
|
encoder_state_encode(state);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, state->encoder_control->threadqueue, "type=encode,frame=%d", state->global->frame);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=encode,frame=%d", state->global->frame);
|
||||||
}
|
}
|
||||||
//kvz_threadqueue_flush(main_state->encoder_control->threadqueue);
|
//kvz_threadqueue_flush(main_state->encoder_control->threadqueue);
|
||||||
{
|
{
|
||||||
|
|
|
@ -209,15 +209,6 @@ typedef int16_t coeff_t;
|
||||||
|
|
||||||
#define EXP_GOLOMB_TABLE_SIZE (4096*8)
|
#define EXP_GOLOMB_TABLE_SIZE (4096*8)
|
||||||
|
|
||||||
//DEBUG BITMASK
|
|
||||||
#define _DEBUG_PERF_FRAME_LEVEL 0x0001
|
|
||||||
#define _DEBUG_PERF_JOB 0x0002
|
|
||||||
#define _DEBUG_PERF_ENCODE_LCU 0x0004
|
|
||||||
#define _DEBUG_PERF_SAO_RECONSTRUCT_FRAME 0x0008
|
|
||||||
#define _DEBUG_PERF_WRITE_BITSTREAM_LEAF 0x0010
|
|
||||||
#define _DEBUG_PERF_SEARCH_CU 0x0020
|
|
||||||
#define _DEBUG_PERF_SEARCH_PIXELS 0x0040
|
|
||||||
|
|
||||||
//Constants
|
//Constants
|
||||||
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_t;
|
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_t;
|
||||||
enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 };
|
enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 };
|
||||||
|
|
|
@ -497,7 +497,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
int debug_split = 0;
|
int debug_split = 0;
|
||||||
#endif
|
#endif
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_CU);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHCU);
|
||||||
|
|
||||||
// Stop recursion if the CU is completely outside the frame.
|
// Stop recursion if the CU is completely outside the frame.
|
||||||
if (x >= frame->width || y >= frame->height) {
|
if (x >= frame->width || y >= frame->height) {
|
||||||
|
@ -688,7 +688,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_CU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->global->frame, state->tile->id, state->slice->id,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->global->frame, state->tile->id, state->slice->id,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth),
|
(state->tile->lcu_offset_x * LCU_WIDTH) + x + (LCU_WIDTH >> depth),
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + y,
|
||||||
|
|
|
@ -246,14 +246,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
|
||||||
uint32_t bitcost;
|
uint32_t bitcost;
|
||||||
|
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
||||||
|
@ -303,14 +303,14 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
|
||||||
uint32_t bitcost;
|
uint32_t bitcost;
|
||||||
|
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
||||||
|
@ -370,7 +370,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
||||||
// Check whatever input vector we got, unless its (0, 0) which will be checked later.
|
// Check whatever input vector we got, unless its (0, 0) which will be checked later.
|
||||||
if (mv.x || mv.y)
|
if (mv.x || mv.y)
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
|
|
||||||
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
|
@ -378,7 +378,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
|
@ -394,7 +394,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
||||||
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
||||||
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
|
|
||||||
uint32_t bitcost;
|
uint32_t bitcost;
|
||||||
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
|
@ -403,7 +403,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
|
@ -547,7 +547,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mv_in_merge_cand) {
|
if (!mv_in_merge_cand) {
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
|
|
||||||
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
|
@ -557,7 +557,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
best_bitcost = bitcost;
|
best_bitcost = bitcost;
|
||||||
best_index = num_cand;
|
best_index = num_cand;
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
|
@ -571,7 +571,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
||||||
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
|
|
||||||
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
|
@ -579,7 +579,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
|
@ -605,14 +605,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
const vector2d_t *pattern = &large_hexbs[i];
|
const vector2d_t *pattern = &large_hexbs[i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||||
|
@ -648,13 +648,13 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
const vector2d_t *offset = &large_hexbs[start + i];
|
const vector2d_t *offset = &large_hexbs[start + i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
|
@ -680,13 +680,13 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
||||||
const vector2d_t *offset = &small_hexbs[i];
|
const vector2d_t *offset = &small_hexbs[i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
|
|
|
@ -307,7 +307,7 @@ int kvz_threadqueue_init(threadqueue_queue_t * const threadqueue, int thread_cou
|
||||||
static void threadqueue_free_job(threadqueue_queue_t * const threadqueue, int i)
|
static void threadqueue_free_job(threadqueue_queue_t * const threadqueue, int i)
|
||||||
{
|
{
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
#if _DEBUG & _DEBUG_PERF_JOB
|
#if _DEBUG & KVZ_PERF_JOB
|
||||||
int j;
|
int j;
|
||||||
GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue);
|
GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue);
|
||||||
fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description);
|
fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description);
|
||||||
|
@ -334,7 +334,7 @@ static void threadqueue_free_jobs(threadqueue_queue_t * const threadqueue) {
|
||||||
threadqueue->queue_count = 0;
|
threadqueue->queue_count = 0;
|
||||||
threadqueue->queue_start = 0;
|
threadqueue->queue_start = 0;
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
#if _DEBUG & _DEBUG_PERF_JOB
|
#if _DEBUG & KVZ_PERF_JOB
|
||||||
{
|
{
|
||||||
CLOCK_T time;
|
CLOCK_T time;
|
||||||
GET_TIME(&time);
|
GET_TIME(&time);
|
||||||
|
@ -512,9 +512,9 @@ threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * const threadque
|
||||||
//No lock here... this should be constant
|
//No lock here... this should be constant
|
||||||
if (threadqueue->threads_count == 0) {
|
if (threadqueue->threads_count == 0) {
|
||||||
//FIXME: This should be improved in order to handle dependencies
|
//FIXME: This should be improved in order to handle dependencies
|
||||||
PERFORMANCE_MEASURE_START(_DEBUG_PERF_JOB);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_JOB);
|
||||||
fptr(arg);
|
fptr(arg);
|
||||||
PERFORMANCE_MEASURE_END(_DEBUG_PERF_JOB, threadqueue, "%s", debug_description);
|
PERFORMANCE_MEASURE_END(KVZ_PERF_JOB, threadqueue, "%s", debug_description);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -119,6 +119,15 @@ int kvz_threadqueue_finalize(threadqueue_queue_t * threadqueue);
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
int threadqueue_log(threadqueue_queue_t * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description);
|
int threadqueue_log(threadqueue_queue_t * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description);
|
||||||
|
|
||||||
|
// Bitmasks for PERFORMANCE_MEASURE_START and PERFORMANCE_MEASURE_END.
|
||||||
|
#define KVZ_PERF_FRAME (1 << 0)
|
||||||
|
#define KVZ_PERF_JOB (1 << 1)
|
||||||
|
#define KVZ_PERF_LCU (1 << 2)
|
||||||
|
#define KVZ_PERF_SAOREC (1 << 3)
|
||||||
|
#define KVZ_PERF_BSLEAF (1 << 4)
|
||||||
|
#define KVZ_PERF_SEARCHCU (1 << 5)
|
||||||
|
#define KVZ_PERF_SEARCHPX (1 << 6)
|
||||||
|
|
||||||
#define IMPL_PERFORMANCE_MEASURE_START(mask) CLOCK_T start, stop; if ((_DEBUG) & mask) { GET_TIME(&start); }
|
#define IMPL_PERFORMANCE_MEASURE_START(mask) CLOCK_T start, stop; if ((_DEBUG) & mask) { GET_TIME(&start); }
|
||||||
#define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((_DEBUG) & mask) { GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \
|
#define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((_DEBUG) & mask) { GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue