mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 20:54:07 +00:00
Improve logging
This commit is contained in:
parent
f1b303a2d2
commit
8502f3d850
|
@ -729,21 +729,21 @@ static void encoder_state_write_bitstream_main(encoder_state * const main_state)
|
||||||
main_state->global->is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code);
|
main_state->global->is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
||||||
for (i = 0; main_state->children[i].encoder_control; ++i) {
|
for (i = 0; main_state->children[i].encoder_control; ++i) {
|
||||||
//Append bitstream to main stream
|
//Append bitstream to main stream
|
||||||
bitstream_append(&main_state->stream, &main_state->children[i].stream);
|
bitstream_append(&main_state->stream, &main_state->children[i].stream);
|
||||||
//FIXME: Move this...
|
//FIXME: Move this...
|
||||||
bitstream_clear(&main_state->children[i].stream);
|
bitstream_clear(&main_state->children[i].stream);
|
||||||
}
|
}
|
||||||
PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", main_state->global->frame, main_state->type);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, main_state->encoder_control->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", main_state->global->frame, main_state->type);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
||||||
// Calculate checksum
|
// Calculate checksum
|
||||||
add_checksum(main_state);
|
add_checksum(main_state);
|
||||||
PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", main_state->global->frame, main_state->type);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, main_state->encoder_control->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", main_state->global->frame, main_state->type);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(main_state->tile->frame->poc == main_state->global->poc);
|
assert(main_state->tile->frame->poc == main_state->global->poc);
|
||||||
|
|
|
@ -316,29 +316,32 @@ static void encoder_state_encode_leaf(encoder_state * const encoder_state) {
|
||||||
//If we're not using wavefronts, or we have a WAVEFRONT_ROW which is the single child of its parent, than we should not use parallelism
|
//If we're not using wavefronts, or we have a WAVEFRONT_ROW which is the single child of its parent, than we should not use parallelism
|
||||||
if (encoder_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW || (encoder_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && !encoder_state->parent->children[1].encoder_control)) {
|
if (encoder_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW || (encoder_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && !encoder_state->parent->children[1].encoder_control)) {
|
||||||
for (i = 0; i < encoder_state->lcu_order_count; ++i) {
|
for (i = 0; i < encoder_state->lcu_order_count; ++i) {
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_ENCODE_LCU);
|
||||||
|
|
||||||
encoder_state_worker_encode_lcu(&encoder_state->lcu_order[i]);
|
encoder_state_worker_encode_lcu(&encoder_state->lcu_order[i]);
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
{
|
{
|
||||||
const lcu_order_element * const lcu = &encoder_state->lcu_order[i];
|
const lcu_order_element * const lcu = &encoder_state->lcu_order[i];
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=search_lcu,frame=%d,tile=%d,slice=%d,position_x=%d,position_y=%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, lcu->position.x + encoder_state->tile->lcu_offset_x, lcu->position.y + encoder_state->tile->lcu_offset_y);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_ENCODE_LCU, encoder_state->encoder_control->threadqueue, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, lcu->position_px.x + encoder_state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + encoder_state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + encoder_state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + encoder_state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
|
||||||
}
|
}
|
||||||
#endif //_DEBUG
|
#endif //_DEBUG
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encoder->sao_enable) {
|
if (encoder->sao_enable) {
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME);
|
||||||
sao_reconstruct_frame(encoder_state);
|
sao_reconstruct_frame(encoder_state);
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, encoder_state->lcu_order[0].position.y + encoder_state->tile->lcu_offset_y, encoder_state->lcu_order[encoder_state->lcu_order_count-1].position.y + encoder_state->tile->lcu_offset_y);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SAO_RECONSTRUCT_FRAME, encoder_state->encoder_control->threadqueue, "type=sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, encoder_state->lcu_order[0].position.y + encoder_state->tile->lcu_offset_y, encoder_state->lcu_order[encoder_state->lcu_order_count-1].position.y + encoder_state->tile->lcu_offset_y,
|
||||||
|
encoder_state->tile->lcu_offset_x * LCU_WIDTH, encoder_state->tile->frame->width + encoder_state->tile->lcu_offset_x * LCU_WIDTH - 1,
|
||||||
|
encoder_state->tile->lcu_offset_y * LCU_WIDTH, encoder_state->tile->frame->height + encoder_state->tile->lcu_offset_y * LCU_WIDTH - 1
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < encoder_state->lcu_order_count; ++i) {
|
for (i = 0; i < encoder_state->lcu_order_count; ++i) {
|
||||||
const lcu_order_element * const lcu = &encoder_state->lcu_order[i];
|
const lcu_order_element * const lcu = &encoder_state->lcu_order[i];
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
char job_description[256];
|
char job_description[256];
|
||||||
sprintf(job_description, "type=search_lcu,frame=%d,tile=%d,slice=%d,row=%d,position_x=%d,position_y=%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, encoder_state->wfrow->lcu_offset_y, lcu->position.x + encoder_state->tile->lcu_offset_x, lcu->position.y + encoder_state->tile->lcu_offset_y);
|
sprintf(job_description, "type=encode_lcu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, lcu->position_px.x + encoder_state->tile->lcu_offset_x * LCU_WIDTH, lcu->position_px.x + encoder_state->tile->lcu_offset_x * LCU_WIDTH + lcu->size.x - 1, lcu->position_px.y + encoder_state->tile->lcu_offset_y * LCU_WIDTH, lcu->position_px.y + encoder_state->tile->lcu_offset_y * LCU_WIDTH + lcu->size.y - 1);
|
||||||
#else
|
#else
|
||||||
char* job_description = NULL;
|
char* job_description = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
@ -392,14 +395,14 @@ static void encoder_state_worker_encode_children(void * opaque) {
|
||||||
encoder_state_encode(sub_state);
|
encoder_state_encode(sub_state);
|
||||||
if (sub_state->is_leaf) {
|
if (sub_state->is_leaf) {
|
||||||
if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_WRITE_BITSTREAM_LEAF);
|
||||||
encoder_state_write_bitstream_leaf(sub_state);
|
encoder_state_write_bitstream_leaf(sub_state);
|
||||||
PERFORMANCE_MEASURE_END(sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,row=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position.y + sub_state->tile->lcu_offset_y, sub_state->lcu_order[sub_state->lcu_order_count-1].position.y + sub_state->tile->lcu_offset_y);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_WRITE_BITSTREAM_LEAF, sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count-1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count-1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1);
|
||||||
} else {
|
} else {
|
||||||
threadqueue_job *job;
|
threadqueue_job *job;
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
char job_description[256];
|
char job_description[256];
|
||||||
sprintf(job_description, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,row=%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->wfrow->lcu_offset_y);
|
sprintf(job_description, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position_px.x + sub_state->tile->lcu_offset_x * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.x + sub_state->lcu_order[sub_state->lcu_order_count-1].size.x + sub_state->tile->lcu_offset_x * LCU_WIDTH - 1, sub_state->lcu_order[0].position_px.y + sub_state->tile->lcu_offset_y * LCU_WIDTH, sub_state->lcu_order[sub_state->lcu_order_count-1].position_px.y + sub_state->lcu_order[sub_state->lcu_order_count-1].size.y + sub_state->tile->lcu_offset_y * LCU_WIDTH - 1);
|
||||||
#else
|
#else
|
||||||
char* job_description = NULL;
|
char* job_description = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
@ -516,13 +519,15 @@ static void encoder_state_encode(encoder_state * const main_state) {
|
||||||
char job_description[256];
|
char job_description[256];
|
||||||
switch (main_state->children[i].type) {
|
switch (main_state->children[i].type) {
|
||||||
case ENCODER_STATE_TYPE_TILE:
|
case ENCODER_STATE_TYPE_TILE:
|
||||||
sprintf(job_description, "frame=%d,tile=%d,row=%d-%d,position_x=%d,position_y=%d", main_state->children[i].global->frame, main_state->children[i].tile->id, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].tile->lcu_offset_x, main_state->children[i].tile->lcu_offset_y);
|
sprintf(job_description, "type=encode_child,frame=%d,tile=%d,row=%d-%d,px_x=%d-%d,px_y=%d-%d", main_state->children[i].global->frame, main_state->children[i].tile->id, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y,
|
||||||
|
main_state->children[i].lcu_order[0].position_px.x + main_state->children[i].tile->lcu_offset_x * LCU_WIDTH, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position_px.x + main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].size.x + main_state->children[i].tile->lcu_offset_x * LCU_WIDTH - 1,
|
||||||
|
main_state->children[i].lcu_order[0].position_px.y + main_state->children[i].tile->lcu_offset_y * LCU_WIDTH, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position_px.y + main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].size.y + main_state->children[i].tile->lcu_offset_y * LCU_WIDTH - 1);
|
||||||
break;
|
break;
|
||||||
case ENCODER_STATE_TYPE_SLICE:
|
case ENCODER_STATE_TYPE_SLICE:
|
||||||
sprintf(job_description, "frame=%d,slice=%d,start_in_ts=%d", main_state->children[i].global->frame, main_state->children[i].slice->id, main_state->children[i].slice->start_in_ts);
|
sprintf(job_description, "type=encode_child,frame=%d,slice=%d,start_in_ts=%d", main_state->children[i].global->frame, main_state->children[i].slice->id, main_state->children[i].slice->start_in_ts);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
sprintf(job_description, "frame=%d,invalid", main_state->children[i].global->frame);
|
sprintf(job_description, "type=encode_child,frame=%d,invalid", main_state->children[i].global->frame);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -554,7 +559,7 @@ static void encoder_state_encode(encoder_state * const main_state) {
|
||||||
threadqueue_job *job;
|
threadqueue_job *job;
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
char job_description[256];
|
char job_description[256];
|
||||||
sprintf(job_description, "type=sao,frame=%d,tile=%d,position_y=%d", main_state->global->frame, main_state->tile->id, y + main_state->tile->lcu_offset_y);
|
sprintf(job_description, "type=sao,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d", main_state->global->frame, main_state->tile->id, main_state->tile->lcu_offset_x * LCU_WIDTH, main_state->tile->lcu_offset_x * LCU_WIDTH + main_state->tile->frame->width - 1, (main_state->tile->lcu_offset_y + y) * LCU_WIDTH, MIN(main_state->tile->lcu_offset_y * LCU_WIDTH + main_state->tile->frame->height, (main_state->tile->lcu_offset_y + y + 1) * LCU_WIDTH)-1);
|
||||||
#else
|
#else
|
||||||
char* job_description = NULL;
|
char* job_description = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
@ -683,14 +688,14 @@ static void _encode_one_frame_add_bitstream_deps(const encoder_state * const enc
|
||||||
void encode_one_frame(encoder_state * const main_state)
|
void encode_one_frame(encoder_state * const main_state)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
||||||
encoder_state_new_frame(main_state);
|
encoder_state_new_frame(main_state);
|
||||||
PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", main_state->global->frame, main_state->global->poc);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, main_state->encoder_control->threadqueue, "type=new_frame,frame=%d,poc=%d", main_state->global->frame, main_state->global->poc);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_FRAME_LEVEL);
|
||||||
encoder_state_encode(main_state);
|
encoder_state_encode(main_state);
|
||||||
PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=encode,frame=%d", main_state->global->frame);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_FRAME_LEVEL, main_state->encoder_control->threadqueue, "type=encode,frame=%d", main_state->global->frame);
|
||||||
}
|
}
|
||||||
//threadqueue_flush(main_state->encoder_control->threadqueue);
|
//threadqueue_flush(main_state->encoder_control->threadqueue);
|
||||||
{
|
{
|
||||||
|
|
10
src/global.h
10
src/global.h
|
@ -184,6 +184,16 @@ typedef int16_t coefficient;
|
||||||
|
|
||||||
#define MAX_TR_DYNAMIC_RANGE 15
|
#define MAX_TR_DYNAMIC_RANGE 15
|
||||||
|
|
||||||
|
|
||||||
|
//DEBUG BITMASK
|
||||||
|
#define _DEBUG_PERF_FRAME_LEVEL 0x0001
|
||||||
|
#define _DEBUG_PERF_JOB 0x0002
|
||||||
|
#define _DEBUG_PERF_ENCODE_LCU 0x0004
|
||||||
|
#define _DEBUG_PERF_SAO_RECONSTRUCT_FRAME 0x0008
|
||||||
|
#define _DEBUG_PERF_WRITE_BITSTREAM_LEAF 0x0010
|
||||||
|
#define _DEBUG_PERF_SEARCH_PIXELS 0x0020
|
||||||
|
#define _DEBUG_PERF_SEARCH_CU 0x0040
|
||||||
|
|
||||||
//Constants
|
//Constants
|
||||||
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_index;
|
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, NUM_COLORS } color_index;
|
||||||
enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 };
|
enum { SLICE_B = 0, SLICE_P = 1, SLICE_I = 2 };
|
||||||
|
|
51
src/search.c
51
src/search.c
|
@ -209,13 +209,18 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
const vector2d *pattern = &large_hexbs[i];
|
const vector2d *pattern = &large_hexbs[i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
||||||
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sad,frame=%d,ref=%d,x=%d,y=%d,ref_x=%d,ref_y=%d,width=%d,height=%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->y, (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, block_width, block_width);
|
|
||||||
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, encoder_state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,ref=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
|
orig->x + mv.x + pattern->x,
|
||||||
|
orig->x + mv.x + pattern->x + block_width,
|
||||||
|
orig->y + mv.y + pattern->y,
|
||||||
|
orig->y + mv.y + pattern->y + block_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
|
@ -229,13 +234,17 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
if (!(mv.x == 0 && mv.y == 0)) {
|
if (!(mv.x == 0 && mv.y == 0)) {
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
||||||
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sad,frame=%d,ref=%d,x=%d,y=%d,ref_x=%d,ref_y=%d,width=%d,height=%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->y, (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x, (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y, block_width, block_width);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, encoder_state->encoder_control->threadqueue, "type=sad,step=00vector,frame=%d,ref=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
|
orig->x,
|
||||||
|
orig->x + block_width,
|
||||||
|
orig->y,
|
||||||
|
orig->y + block_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the 0,0 is better, redo the hexagon around that point.
|
// If the 0,0 is better, redo the hexagon around that point.
|
||||||
|
@ -250,13 +259,17 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
const vector2d *pattern = &large_hexbs[i];
|
const vector2d *pattern = &large_hexbs[i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
||||||
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sad,frame=%d,ref=%d,x=%d,y=%d,ref_x=%d,ref_y=%d,width=%d,height=%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->y, (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x, (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y, block_width, block_width);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, encoder_state->encoder_control->threadqueue, "type=sad,step=large_hexbs_around00,frame=%d,ref=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
|
orig->x + pattern->x,
|
||||||
|
orig->x + pattern->x + block_width,
|
||||||
|
orig->y + pattern->y,
|
||||||
|
orig->y + pattern->y + block_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
|
@ -290,13 +303,17 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
const vector2d *offset = &large_hexbs[start + i];
|
const vector2d *offset = &large_hexbs[start + i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
||||||
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
orig->x + mv.x + offset->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
orig->y + mv.y + offset->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sad,frame=%d,ref=%d,x=%d,y=%d,ref_x=%d,ref_y=%d,width=%d,height=%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->y, (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, encoder_state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,ref=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
|
orig->x + mv.x + offset->x,
|
||||||
|
orig->x + mv.x + offset->x + block_width,
|
||||||
|
orig->y + mv.y + offset->y,
|
||||||
|
orig->y + mv.y + offset->y + block_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
|
@ -318,13 +335,17 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
const vector2d *offset = &small_hexbs[i];
|
const vector2d *offset = &small_hexbs[i];
|
||||||
unsigned cost;
|
unsigned cost;
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_PIXELS);
|
||||||
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width, max_lcu_below);
|
block_width, block_width, max_lcu_below);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sad,frame=%d,ref=%d,x=%d,y=%d,ref_x=%d,ref_y=%d,width=%d,height=%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->y, (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_PIXELS, encoder_state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,ref=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", encoder_state->global->frame, ref->poc - encoder_state->global->poc, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||||
|
orig->x + mv.x + offset->x,
|
||||||
|
orig->x + mv.x + offset->x + block_width,
|
||||||
|
orig->y + mv.y + offset->y,
|
||||||
|
orig->y + mv.y + offset->y + block_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cost > 0 && cost < best_cost) {
|
if (cost > 0 && cost < best_cost) {
|
||||||
|
@ -1126,10 +1147,10 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
int cost = MAX_INT;
|
int cost = MAX_INT;
|
||||||
cu_info *cur_cu;
|
cu_info *cur_cu;
|
||||||
int x_local = (x&0x3f), y_local = (y&0x3f);
|
int x_local = (x&0x3f), y_local = (y&0x3f);
|
||||||
#if _DEBUG
|
#ifdef _DEBUG
|
||||||
int debug_split = 0;
|
int debug_split = 0;
|
||||||
#endif
|
#endif
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_SEARCH_CU);
|
||||||
|
|
||||||
// Stop recursion if the CU is completely outside the frame.
|
// Stop recursion if the CU is completely outside the frame.
|
||||||
if (x >= frame->width || y >= frame->height) {
|
if (x >= frame->width || y >= frame->height) {
|
||||||
|
@ -1237,7 +1258,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,x=%d,y=%d,depth=%d,split=%d,cur_cu_is_intra=%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, x, y, depth, debug_split, (cur_cu->type==CU_INTRA)?1:0);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_SEARCH_CU, encoder_state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,x=%d,y=%d,depth=%d,split=%d,cur_cu_is_intra=%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, x, y, depth, debug_split, (cur_cu->type==CU_INTRA)?1:0);
|
||||||
|
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
|
@ -290,6 +290,7 @@ int threadqueue_init(threadqueue_queue * const threadqueue, int thread_count, in
|
||||||
static void threadqueue_free_job(threadqueue_queue * const threadqueue, int i)
|
static void threadqueue_free_job(threadqueue_queue * const threadqueue, int i)
|
||||||
{
|
{
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
|
#if _DEBUG & _DEBUG_PERF_JOB
|
||||||
int j;
|
int j;
|
||||||
GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue);
|
GET_TIME(&threadqueue->queue[i]->debug_clock_dequeue);
|
||||||
fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description);
|
fprintf(threadqueue->debug_log, "%p\t%d\t%lf\t+%lf\t+%lf\t+%lf\t%s\n", threadqueue->queue[i], threadqueue->queue[i]->debug_worker_id, CLOCK_T_AS_DOUBLE(threadqueue->queue[i]->debug_clock_enqueue), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_enqueue, threadqueue->queue[i]->debug_clock_start), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_start, threadqueue->queue[i]->debug_clock_stop), CLOCK_T_DIFF(threadqueue->queue[i]->debug_clock_stop, threadqueue->queue[i]->debug_clock_dequeue), threadqueue->queue[i]->debug_description);
|
||||||
|
@ -299,6 +300,7 @@ static void threadqueue_free_job(threadqueue_queue * const threadqueue, int i)
|
||||||
}
|
}
|
||||||
|
|
||||||
FREE_POINTER(threadqueue->queue[i]->debug_description);
|
FREE_POINTER(threadqueue->queue[i]->debug_description);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
FREE_POINTER(threadqueue->queue[i]->rdepends);
|
FREE_POINTER(threadqueue->queue[i]->rdepends);
|
||||||
|
|
||||||
|
@ -315,6 +317,7 @@ static void threadqueue_free_jobs(threadqueue_queue * const threadqueue) {
|
||||||
threadqueue->queue_count = 0;
|
threadqueue->queue_count = 0;
|
||||||
threadqueue->queue_start = 0;
|
threadqueue->queue_start = 0;
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
|
#if _DEBUG & _DEBUG_PERF_JOB
|
||||||
{
|
{
|
||||||
CLOCK_T time;
|
CLOCK_T time;
|
||||||
GET_TIME(&time);
|
GET_TIME(&time);
|
||||||
|
@ -322,6 +325,7 @@ static void threadqueue_free_jobs(threadqueue_queue * const threadqueue) {
|
||||||
fprintf(threadqueue->debug_log, "\t\t-\t-\t%lf\t-\tFLUSH\n", CLOCK_T_AS_DOUBLE(time));
|
fprintf(threadqueue->debug_log, "\t\t-\t-\t%lf\t-\tFLUSH\n", CLOCK_T_AS_DOUBLE(time));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int threadqueue_finalize(threadqueue_queue * const threadqueue) {
|
int threadqueue_finalize(threadqueue_queue * const threadqueue) {
|
||||||
|
@ -491,9 +495,9 @@ threadqueue_job * threadqueue_submit(threadqueue_queue * const threadqueue, void
|
||||||
//No lock here... this should be constant
|
//No lock here... this should be constant
|
||||||
if (threadqueue->threads_count == 0) {
|
if (threadqueue->threads_count == 0) {
|
||||||
//FIXME: This should be improved in order to handle dependencies
|
//FIXME: This should be improved in order to handle dependencies
|
||||||
PERFORMANCE_MEASURE_START();
|
PERFORMANCE_MEASURE_START(_DEBUG_PERF_JOB);
|
||||||
fptr(arg);
|
fptr(arg);
|
||||||
PERFORMANCE_MEASURE_END(threadqueue, "%s", debug_description);
|
PERFORMANCE_MEASURE_END(_DEBUG_PERF_JOB, threadqueue, "%s", debug_description);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -118,12 +118,19 @@ int threadqueue_finalize(threadqueue_queue * threadqueue);
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
int threadqueue_log(threadqueue_queue * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description);
|
int threadqueue_log(threadqueue_queue * threadqueue, const CLOCK_T *start, const CLOCK_T *stop, const char* debug_description);
|
||||||
|
|
||||||
//This macro HAS TO BE at the beginning of a block
|
#ifdef _GNUC
|
||||||
#define PERFORMANCE_MEASURE_START() CLOCK_T start, stop; GET_TIME(&start)
|
#define ATTR_UNUSED __attribute__((unused))
|
||||||
#define PERFORMANCE_MEASURE_END(threadqueue, str, ...) do {GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description); }} while (0)
|
|
||||||
#else
|
#else
|
||||||
#define PERFORMANCE_MEASURE_START() do {} while (0)
|
#define ATTR_UNUSED
|
||||||
#define PERFORMANCE_MEASURE_END(threadqueue, str, ...) do {} while (0)
|
#endif
|
||||||
|
|
||||||
|
//This macro HAS TO BE at the beginning of a block
|
||||||
|
#define PERFORMANCE_MEASURE_START(mask) CLOCK_T start ATTR_UNUSED, stop ATTR_UNUSED; if (_DEBUG & mask) GET_TIME(&start)
|
||||||
|
#define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) do {if (_DEBUG & mask) { GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}}} while (0) \
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define PERFORMANCE_MEASURE_START(mask) do {} while (0)
|
||||||
|
#define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) do {} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Constraints:
|
/* Constraints:
|
||||||
|
|
Loading…
Reference in a new issue