From 760bd0397dc89d0ccddbf293c9139a5727b7d877 Mon Sep 17 00:00:00 2001 From: Pauli Oikkonen Date: Wed, 30 Jan 2019 01:33:39 +0200 Subject: [PATCH] Pad the image buffer by 64 bytes from both ends This will be necessary for an efficient and straightforward implementation of hor_sad for blocks over 16 pixels wide, because they cannot use the shuffle trick because inter-lane shuffling is so hard to do --- src/global.h | 2 ++ src/image.c | 12 ++++++++---- src/kvazaar.h | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/global.h b/src/global.h index c7e6c8b0..5ced48a1 100644 --- a/src/global.h +++ b/src/global.h @@ -239,8 +239,10 @@ typedef int16_t coeff_t; #ifdef _MSC_VER // Buggy VS2010 throws intellisense warnings if void* is not casted. #define MALLOC(type, num) (type *)malloc(sizeof(type) * (num)) + #define MALLOC_SIMD_PADDED(type, num, padding) (type *)malloc(sizeof(type) * (num) + (padding)) #else #define MALLOC(type, num) malloc(sizeof(type) * (num)) + #define MALLOC_SIMD_PADDED(type, num, padding) malloc(sizeof(type) * (num) + (padding)) #endif // Use memset through FILL and FILL_ARRAY when appropriate, such as when diff --git a/src/image.c b/src/image.c index fe626d72..6d973b59 100644 --- a/src/image.c +++ b/src/image.c @@ -47,6 +47,8 @@ kvz_picture * kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_ assert((width % 2) == 0); assert((height % 2) == 0); + const size_t simd_padding_width = 64; + kvz_picture *im = MALLOC(kvz_picture, 1); if (!im) return NULL; @@ -56,12 +58,13 @@ kvz_picture * kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_ im->chroma_format = chroma_format; - //Allocate memory - im->fulldata = MALLOC(kvz_pixel, (luma_size + 2 * chroma_size)); - if (!im->fulldata) { + //Allocate memory, pad the full data buffer from both ends + im->fulldata_buf = MALLOC_SIMD_PADDED(kvz_pixel, (luma_size + 2 * chroma_size), simd_padding_width * 2); + if (!im->fulldata_buf) { free(im); return NULL; } + im->fulldata = im->fulldata_buf + simd_padding_width / sizeof(kvz_pixel); im->base_image = im; im->refcount = 1; //We give a reference to caller @@ -110,11 +113,12 @@ void kvz_image_free(kvz_picture *const im) // Free our reference to the base image. kvz_image_free(im->base_image); } else { - free(im->fulldata); + free(im->fulldata_buf); } // Make sure freed data won't be used. im->base_image = NULL; + im->fulldata_buf = NULL; im->fulldata = NULL; im->y = im->u = im->v = NULL; im->data[COLOR_Y] = im->data[COLOR_U] = im->data[COLOR_V] = NULL; diff --git a/src/kvazaar.h b/src/kvazaar.h index 7209b636..f98a818b 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -392,7 +392,8 @@ typedef struct kvz_config * Function picture_alloc in kvz_api must be used for allocation. */ typedef struct kvz_picture { - kvz_pixel *fulldata; //!< \brief Allocated buffer (only used in the base_image) + kvz_pixel *fulldata_buf; //!< \brief Allocated buffer with padding (only used in the base_image) + kvz_pixel *fulldata; //!< \brief Allocated buffer portion that's actually used kvz_pixel *y; //!< \brief Pointer to luma pixel array. kvz_pixel *u; //!< \brief Pointer to chroma U pixel array.