mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Fix bug causing unnecessary copying of memory
This bug caused a single tiles worth of lcu_info_t structs to be copied unnecessarily for every LCU in the frame. This obviously caused huge memory bandwidth issues when coding large frames without tiles. The effect was minimized somewhat with a large number of tiles, because only the current tile was copied. From context it is clear that this piece of code was supposed to copy a single tile or frame, once the frame was done, but because it was placed in a function which is called for every LCU, it copied the data for the LCU, but also lots of extra stuff. The fix is to copy only the current LCU instead of the whole tile.
This commit is contained in:
parent
d8b50d6951
commit
513e80bcca
|
@ -247,6 +247,8 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
|
|||
// Copy LCU cu_array to main states cu_array, because that is the only one
|
||||
// which is given to the next frame through image_list_t.
|
||||
{
|
||||
PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME);
|
||||
|
||||
encoder_state_t *main_state = state;
|
||||
while (main_state->parent) main_state = main_state->parent;
|
||||
assert(main_state != state);
|
||||
|
@ -256,11 +258,17 @@ static void encoder_state_worker_encode_lcu(void * opaque) {
|
|||
unsigned main_width_in_scu = main_state->tile->frame->width_in_lcu << MAX_DEPTH;
|
||||
unsigned tile_x = state->tile->lcu_offset_x;
|
||||
unsigned tile_y = state->tile->lcu_offset_y;
|
||||
for (unsigned y = 0; y < child_height_in_scu; ++y) {
|
||||
cu_info_t *main_row = &main_state->tile->frame->cu_array->data[tile_x + (tile_y + y) * main_width_in_scu];
|
||||
cu_info_t *child_row = &state->tile->frame->cu_array->data[y * child_width_in_scu];
|
||||
memcpy(main_row, child_row, sizeof(cu_info_t) * child_width_in_scu);
|
||||
|
||||
unsigned x = lcu->position.x << MAX_DEPTH;
|
||||
unsigned y = lcu->position.y << MAX_DEPTH;
|
||||
|
||||
for (unsigned lcu_row = 0; lcu_row < 8; ++lcu_row) {
|
||||
cu_info_t *main_row = &main_state->tile->frame->cu_array->data[x + tile_x + (y + tile_y + lcu_row) * main_width_in_scu];
|
||||
cu_info_t *child_row = &state->tile->frame->cu_array->data[x + (y + lcu_row) * child_width_in_scu];
|
||||
memcpy(main_row, child_row, sizeof(cu_info_t) * 8);
|
||||
}
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, state->encoder_control->threadqueue, "type=copy_cuinfo,frame=%d,tile=%d", state->global->frame, state->tile->id);
|
||||
}
|
||||
|
||||
//Now write data to bitstream (required to have a correct CABAC state)
|
||||
|
|
Loading…
Reference in a new issue