Copy pixels more efficiently in lcu recon.

This commit is contained in:
Ari Lemmetti 2016-03-18 20:10:03 +02:00
parent 50923238cc
commit f4538ab474

View file

@ -373,14 +373,24 @@ void kvz_inter_recon_lcu(const encoder_state_t * const state,
}
}
} else { //If no overflow, we can copy without checking boundaries
#if LCU_WIDTH == 64
#define CHUNK int64_t
#else
#define CHUNK kvz_pixel
#endif
// Copy Luma
for (y = ypos; y < ypos + height; y++) {
int y_in_lcu = (y & ((LCU_WIDTH)-1));
coord_y = ((y + state->tile->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate
for (x = xpos; x < xpos + width; x++) {
for (x = xpos; x < xpos + width; x+=sizeof(CHUNK)/sizeof(kvz_pixel)) {
int x_in_lcu = (x & ((LCU_WIDTH)-1));
kvz_pixel *dst = &(lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu]);
kvz_pixel *src = &(ref->y[coord_y + (x + state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]]);
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y[coord_y + (x + state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]];
//Copy one or many pixels simultaneously
*(CHUNK*)dst = *(CHUNK*)src;
}
}
@ -397,6 +407,8 @@ void kvz_inter_recon_lcu(const encoder_state_t * const state,
}
}
}
#undef CHUNK
}
}
}