Pad the image buffer by 64 bytes from both ends

This will be necessary for an efficient and straightforward
implementation of hor_sad for blocks over 16 pixels wide, because they
cannot use the shuffle trick because inter-lane shuffling is so hard to
do
This commit is contained in:
Pauli Oikkonen 2019-01-30 01:33:39 +02:00
parent c36482a11a
commit 760bd0397d
3 changed files with 12 additions and 5 deletions

View file

@ -239,8 +239,10 @@ typedef int16_t coeff_t;
#ifdef _MSC_VER
// Buggy VS2010 throws intellisense warnings if void* is not casted.
#define MALLOC(type, num) (type *)malloc(sizeof(type) * (num))
#define MALLOC_SIMD_PADDED(type, num, padding) (type *)malloc(sizeof(type) * (num) + (padding))
#else
#define MALLOC(type, num) malloc(sizeof(type) * (num))
#define MALLOC_SIMD_PADDED(type, num, padding) malloc(sizeof(type) * (num) + (padding))
#endif
// Use memset through FILL and FILL_ARRAY when appropriate, such as when

View file

@ -47,6 +47,8 @@ kvz_picture * kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_
assert((width % 2) == 0);
assert((height % 2) == 0);
const size_t simd_padding_width = 64;
kvz_picture *im = MALLOC(kvz_picture, 1);
if (!im) return NULL;
@ -56,12 +58,13 @@ kvz_picture * kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_
im->chroma_format = chroma_format;
//Allocate memory
im->fulldata = MALLOC(kvz_pixel, (luma_size + 2 * chroma_size));
if (!im->fulldata) {
//Allocate memory, pad the full data buffer from both ends
im->fulldata_buf = MALLOC_SIMD_PADDED(kvz_pixel, (luma_size + 2 * chroma_size), simd_padding_width * 2);
if (!im->fulldata_buf) {
free(im);
return NULL;
}
im->fulldata = im->fulldata_buf + simd_padding_width / sizeof(kvz_pixel);
im->base_image = im;
im->refcount = 1; //We give a reference to caller
@ -110,11 +113,12 @@ void kvz_image_free(kvz_picture *const im)
// Free our reference to the base image.
kvz_image_free(im->base_image);
} else {
free(im->fulldata);
free(im->fulldata_buf);
}
// Make sure freed data won't be used.
im->base_image = NULL;
im->fulldata_buf = NULL;
im->fulldata = NULL;
im->y = im->u = im->v = NULL;
im->data[COLOR_Y] = im->data[COLOR_U] = im->data[COLOR_V] = NULL;

View file

@ -392,7 +392,8 @@ typedef struct kvz_config
* Function picture_alloc in kvz_api must be used for allocation.
*/
typedef struct kvz_picture {
kvz_pixel *fulldata; //!< \brief Allocated buffer (only used in the base_image)
kvz_pixel *fulldata_buf; //!< \brief Allocated buffer with padding (only used in the base_image)
kvz_pixel *fulldata; //!< \brief Allocated buffer portion that's actually used
kvz_pixel *y; //!< \brief Pointer to luma pixel array.
kvz_pixel *u; //!< \brief Pointer to chroma U pixel array.