mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-04 13:54:05 +00:00
627 lines
22 KiB
C
627 lines
22 KiB
C
/*****************************************************************************
|
|
* This file is part of uvg266 VVC encoder.
|
|
*
|
|
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
* list of conditions and the following disclaimer in the documentation and/or
|
|
* other materials provided with the distribution.
|
|
*
|
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
****************************************************************************/
|
|
|
|
#include "image.h"
|
|
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "strategies/strategies-ipol.h"
|
|
#include "strategies/strategies-picture.h"
|
|
#include "threads.h"
|
|
|
|
/**
|
|
* \brief Allocate a new image with 420.
|
|
* This function signature is part of the libkvz API.
|
|
* \return image pointer or NULL on failure
|
|
*/
|
|
uvg_picture * uvg_image_alloc_420(const int32_t width, const int32_t height)
|
|
{
|
|
return uvg_image_alloc(UVG_CSP_420, width, height);
|
|
}
|
|
|
|
/**
|
|
* \brief Allocate a new image.
|
|
* \return image pointer or NULL on failure
|
|
*/
|
|
uvg_picture * uvg_image_alloc(enum uvg_chroma_format chroma_format, const int32_t width, const int32_t height)
|
|
{
|
|
//Assert that we have a well defined image
|
|
assert((width % 2) == 0);
|
|
assert((height % 2) == 0);
|
|
|
|
const size_t simd_padding_width = 64;
|
|
|
|
uvg_picture *im = MALLOC(uvg_picture, 1);
|
|
if (!im) return NULL;
|
|
|
|
//Add 4 pixel boundary to each side of luma for ALF
|
|
//This results also 2 pixel boundary for chroma
|
|
unsigned int luma_size = (width + FRAME_PADDING_LUMA) * (height + FRAME_PADDING_LUMA);
|
|
|
|
unsigned chroma_sizes[] = { 0, luma_size / 4, luma_size / 2, luma_size };
|
|
unsigned chroma_size = chroma_sizes[chroma_format];
|
|
|
|
im->chroma_format = chroma_format;
|
|
|
|
//Allocate memory, pad the full data buffer from both ends
|
|
im->fulldata_buf = MALLOC_SIMD_PADDED(uvg_pixel, (luma_size + 2 * chroma_size), simd_padding_width * 2);
|
|
if (!im->fulldata_buf) {
|
|
free(im);
|
|
return NULL;
|
|
}
|
|
|
|
//Shift the image to allow ALF filtering
|
|
im->refcount = 1; //We give a reference to caller
|
|
im->width = width;
|
|
im->height = height;
|
|
im->stride = width + FRAME_PADDING_LUMA;
|
|
im->chroma_format = chroma_format;
|
|
const int padding_before_first_pixel_luma = (FRAME_PADDING_LUMA / 2) * (im->stride) + FRAME_PADDING_LUMA / 2;
|
|
const int padding_before_first_pixel_chroma = (FRAME_PADDING_CHROMA / 2) * (im->stride/2) + FRAME_PADDING_CHROMA / 2;
|
|
im->fulldata = &im->fulldata_buf[padding_before_first_pixel_luma] + simd_padding_width / sizeof(uvg_pixel);
|
|
im->base_image = im;
|
|
|
|
im->y = im->data[COLOR_Y] = &im->fulldata[0];
|
|
|
|
if (chroma_format == UVG_CSP_400) {
|
|
im->u = im->data[COLOR_U] = NULL;
|
|
im->v = im->data[COLOR_V] = NULL;
|
|
} else {
|
|
im->u = im->data[COLOR_U] = &im->fulldata[luma_size - padding_before_first_pixel_luma + padding_before_first_pixel_chroma];
|
|
im->v = im->data[COLOR_V] = &im->fulldata[luma_size - padding_before_first_pixel_luma + chroma_size + padding_before_first_pixel_chroma];
|
|
}
|
|
|
|
im->pts = 0;
|
|
im->dts = 0;
|
|
|
|
im->interlacing = UVG_INTERLACING_NONE;
|
|
|
|
im->roi.roi_array = NULL;
|
|
im->roi.width = 0;
|
|
im->roi.height = 0;
|
|
|
|
return im;
|
|
}
|
|
|
|
/**
|
|
* \brief Free an image.
|
|
*
|
|
* Decrement reference count of the image and deallocate associated memory
|
|
* if no references exist any more.
|
|
*
|
|
* \param im image to free
|
|
*/
|
|
void uvg_image_free(uvg_picture *const im)
|
|
{
|
|
if (im == NULL) return;
|
|
|
|
int32_t new_refcount = UVG_ATOMIC_DEC(&(im->refcount));
|
|
if (new_refcount > 0) {
|
|
// There are still references so we don't free the data yet.
|
|
return;
|
|
}
|
|
|
|
if (im->base_image != im) {
|
|
// Free our reference to the base image.
|
|
uvg_image_free(im->base_image);
|
|
} else {
|
|
free(im->fulldata_buf);
|
|
if (im->roi.roi_array) FREE_POINTER(im->roi.roi_array);
|
|
}
|
|
|
|
// Make sure freed data won't be used.
|
|
im->base_image = NULL;
|
|
im->fulldata_buf = NULL;
|
|
im->fulldata = NULL;
|
|
im->y = im->u = im->v = NULL;
|
|
im->data[COLOR_Y] = im->data[COLOR_U] = im->data[COLOR_V] = NULL;
|
|
free(im);
|
|
}
|
|
|
|
/**
|
|
* \brief Get a new pointer to an image.
|
|
*
|
|
* Increment reference count and return the image.
|
|
*/
|
|
uvg_picture *uvg_image_copy_ref(uvg_picture *im)
|
|
{
|
|
int32_t new_refcount = UVG_ATOMIC_INC(&im->refcount);
|
|
// The caller should have had another reference and we added one
|
|
// reference so refcount should be at least 2.
|
|
assert(new_refcount >= 2);
|
|
return im;
|
|
}
|
|
|
|
uvg_picture *uvg_image_make_subimage(uvg_picture *const orig_image,
|
|
const unsigned x_offset,
|
|
const unsigned y_offset,
|
|
const unsigned width,
|
|
const unsigned height)
|
|
{
|
|
// Assert that we have a well defined image
|
|
assert((width % 2) == 0);
|
|
assert((height % 2) == 0);
|
|
|
|
assert((x_offset % 2) == 0);
|
|
assert((y_offset % 2) == 0);
|
|
|
|
assert(x_offset + width <= orig_image->width);
|
|
assert(y_offset + height <= orig_image->height);
|
|
|
|
uvg_picture *im = MALLOC(uvg_picture, 1);
|
|
if (!im) return NULL;
|
|
|
|
im->base_image = uvg_image_copy_ref(orig_image->base_image);
|
|
im->refcount = 1; // We give a reference to caller
|
|
im->width = width;
|
|
im->height = height;
|
|
im->stride = orig_image->stride;
|
|
im->chroma_format = orig_image->chroma_format;
|
|
|
|
im->y = im->data[COLOR_Y] = &orig_image->y[x_offset + y_offset * orig_image->stride];
|
|
if (orig_image->chroma_format != UVG_CSP_400) {
|
|
im->u = im->data[COLOR_U] = &orig_image->u[x_offset / 2 + y_offset / 2 * orig_image->stride / 2];
|
|
im->v = im->data[COLOR_V] = &orig_image->v[x_offset / 2 + y_offset / 2 * orig_image->stride / 2];
|
|
}
|
|
|
|
im->pts = 0;
|
|
im->dts = 0;
|
|
|
|
im->roi = orig_image->roi;
|
|
|
|
return im;
|
|
}
|
|
|
|
yuv_t * uvg_yuv_t_alloc(int luma_size, int chroma_size)
|
|
{
|
|
yuv_t *yuv = (yuv_t *)malloc(sizeof(*yuv));
|
|
yuv->size = luma_size;
|
|
|
|
// Get buffers with separate mallocs in order to take advantage of
|
|
// automatic buffer overrun checks.
|
|
yuv->y = (uvg_pixel *)malloc(luma_size * sizeof(*yuv->y));
|
|
if (chroma_size == 0) {
|
|
yuv->u = NULL;
|
|
yuv->v = NULL;
|
|
} else {
|
|
yuv->u = (uvg_pixel *)malloc(chroma_size * sizeof(*yuv->u));
|
|
yuv->v = (uvg_pixel *)malloc(chroma_size * sizeof(*yuv->v));
|
|
}
|
|
|
|
return yuv;
|
|
}
|
|
|
|
void uvg_yuv_t_free(yuv_t *yuv)
|
|
{
|
|
if (yuv) {
|
|
FREE_POINTER(yuv->y);
|
|
FREE_POINTER(yuv->u);
|
|
FREE_POINTER(yuv->v);
|
|
}
|
|
FREE_POINTER(yuv);
|
|
}
|
|
|
|
hi_prec_buf_t * uvg_hi_prec_buf_t_alloc(int luma_size)
|
|
{
|
|
// Get buffers with separate mallocs in order to take advantage of
|
|
// automatic buffer overrun checks.
|
|
hi_prec_buf_t *yuv = (hi_prec_buf_t *)malloc(sizeof(*yuv));
|
|
yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y));
|
|
yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
|
|
yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
|
|
yuv->joint_u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
|
|
yuv->joint_v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
|
|
yuv->size = luma_size;
|
|
|
|
return yuv;
|
|
}
|
|
|
|
void uvg_hi_prec_buf_t_free(hi_prec_buf_t * yuv)
|
|
{
|
|
free(yuv->y);
|
|
free(yuv->u);
|
|
free(yuv->v);
|
|
free(yuv->joint_v);
|
|
free(yuv->joint_u);
|
|
free(yuv);
|
|
}
|
|
|
|
static INLINE uint32_t reg_sad_maybe_optimized(const uvg_pixel * const data1, const uvg_pixel * const data2,
|
|
const int32_t width, const int32_t height, const uint32_t stride1,
|
|
const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad)
|
|
{
|
|
if (optimized_sad != NULL)
|
|
return optimized_sad(data1, data2, height, stride1, stride2);
|
|
else
|
|
return uvg_reg_sad(data1, data2, width, height, stride1, stride2);
|
|
}
|
|
|
|
/**
|
|
* \brief Diagonally interpolate SAD outside the frame.
|
|
*
|
|
* \param data1 Starting point of the first picture.
|
|
* \param data2 Starting point of the second picture.
|
|
* \param width Width of the region for which SAD is calculated.
|
|
* \param height Height of the region for which SAD is calculated.
|
|
* \param width Width of the pixel array.
|
|
*
|
|
* \returns Sum of Absolute Differences
|
|
*/
|
|
static unsigned cor_sad(const uvg_pixel *pic_data, const uvg_pixel *ref_data,
|
|
int block_width, int block_height, unsigned pic_stride)
|
|
{
|
|
uvg_pixel ref = *ref_data;
|
|
int x, y;
|
|
unsigned sad = 0;
|
|
|
|
for (y = 0; y < block_height; ++y) {
|
|
for (x = 0; x < block_width; ++x) {
|
|
sad += abs(pic_data[y * pic_stride + x] - ref);
|
|
}
|
|
}
|
|
|
|
return sad;
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Handle special cases of comparing blocks that are not completely
|
|
* inside the frame.
|
|
*
|
|
* \param pic First frame.
|
|
* \param ref Second frame.
|
|
* \param pic_x X coordinate of the first block.
|
|
* \param pic_y Y coordinate of the first block.
|
|
* \param ref_x X coordinate of the second block.
|
|
* \param ref_y Y coordinate of the second block.
|
|
* \param block_width Width of the blocks.
|
|
* \param block_height Height of the blocks.
|
|
*/
|
|
static unsigned image_interpolated_sad(const uvg_picture *pic, const uvg_picture *ref,
|
|
int pic_x, int pic_y, int ref_x, int ref_y,
|
|
int block_width, int block_height,
|
|
optimized_sad_func_ptr_t optimized_sad)
|
|
{
|
|
uvg_pixel *pic_data, *ref_data;
|
|
|
|
int left, right, top, bottom;
|
|
int result = 0;
|
|
|
|
// Change the movement vector to point right next to the frame. This doesn't
|
|
// affect the result but removes some special cases.
|
|
if (ref_x > ref->width) ref_x = ref->width;
|
|
if (ref_y > ref->height) ref_y = ref->height;
|
|
if (ref_x + block_width < 0) ref_x = -block_width;
|
|
if (ref_y + block_height < 0) ref_y = -block_height;
|
|
|
|
// These are the number of pixels by how far the movement vector points
|
|
// outside the frame. They are always >= 0. If all of them are 0, the
|
|
// movement vector doesn't point outside the frame.
|
|
left = (ref_x < 0) ? -ref_x : 0;
|
|
top = (ref_y < 0) ? -ref_y : 0;
|
|
right = (ref_x + block_width > ref->width) ? ref_x + block_width - ref->width : 0;
|
|
bottom = (ref_y + block_height > ref->height) ? ref_y + block_height - ref->height : 0;
|
|
|
|
// Center picture to the current block and reference to the point where
|
|
// movement vector is pointing to. That point might be outside the buffer,
|
|
// but that is ok because we project the movement vector to the buffer
|
|
// before dereferencing the pointer.
|
|
pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
|
ref_data = &ref->y[ref_y * ref->stride + ref_x];
|
|
|
|
// The handling of movement vectors that point outside the picture is done
|
|
// in the following way.
|
|
// - Correct the index of ref_data so that it points to the top-left
|
|
// of the area we want to compare against.
|
|
// - Correct the index of pic_data to point inside the current block, so
|
|
// that we compare the right part of the block to the ref_data.
|
|
// - Reduce block_width and block_height so that the the size of the area
|
|
// being compared is correct.
|
|
//
|
|
// NOTE: No more correct since hor_sad was modified to be a separate
|
|
// strategy
|
|
if (top && left) {
|
|
result += cor_sad(pic_data,
|
|
&ref_data[top * ref->stride + left],
|
|
left, top, pic->stride);
|
|
result += uvg_ver_sad(&pic_data[left],
|
|
&ref_data[top * ref->stride + left],
|
|
block_width - left, top, pic->stride);
|
|
|
|
result += uvg_hor_sad(pic_data + top * pic->stride,
|
|
ref_data + top * ref->stride,
|
|
block_width, block_height - top,
|
|
pic->stride, ref->stride,
|
|
left, right);
|
|
|
|
} else if (top && right) {
|
|
result += uvg_ver_sad(pic_data,
|
|
&ref_data[top * ref->stride],
|
|
block_width - right, top, pic->stride);
|
|
result += cor_sad(&pic_data[block_width - right],
|
|
&ref_data[top * ref->stride + (block_width - right - 1)],
|
|
right, top, pic->stride);
|
|
|
|
result += uvg_hor_sad(pic_data + top * pic->stride,
|
|
ref_data + top * ref->stride,
|
|
block_width, block_height - top,
|
|
pic->stride, ref->stride,
|
|
left, right);
|
|
|
|
} else if (bottom && left) {
|
|
result += uvg_hor_sad(pic_data, ref_data, block_width, block_height - bottom,
|
|
pic->stride, ref->stride, left, right);
|
|
|
|
result += cor_sad(&pic_data[(block_height - bottom) * pic->stride],
|
|
&ref_data[(block_height - bottom - 1) * ref->stride + left],
|
|
left, bottom, pic->stride);
|
|
result += uvg_ver_sad(&pic_data[(block_height - bottom) * pic->stride + left],
|
|
&ref_data[(block_height - bottom - 1) * ref->stride + left],
|
|
block_width - left, bottom, pic->stride);
|
|
} else if (bottom && right) {
|
|
result += uvg_hor_sad(pic_data, ref_data, block_width, block_height - bottom,
|
|
pic->stride, ref->stride, left, right);
|
|
|
|
result += uvg_ver_sad(&pic_data[(block_height - bottom) * pic->stride],
|
|
&ref_data[(block_height - bottom - 1) * ref->stride],
|
|
block_width - right, bottom, pic->stride);
|
|
result += cor_sad(&pic_data[(block_height - bottom) * pic->stride + block_width - right],
|
|
&ref_data[(block_height - bottom - 1) * ref->stride + block_width - right - 1],
|
|
right, bottom, pic->stride);
|
|
} else if (top) {
|
|
result += uvg_ver_sad(pic_data,
|
|
&ref_data[top * ref->stride],
|
|
block_width, top, pic->stride);
|
|
result += reg_sad_maybe_optimized(&pic_data[top * pic->stride],
|
|
&ref_data[top * ref->stride],
|
|
block_width, block_height - top, pic->stride, ref->stride,
|
|
optimized_sad);
|
|
} else if (bottom) {
|
|
result += reg_sad_maybe_optimized(pic_data,
|
|
ref_data,
|
|
block_width, block_height - bottom, pic->stride, ref->stride,
|
|
optimized_sad);
|
|
result += uvg_ver_sad(&pic_data[(block_height - bottom) * pic->stride],
|
|
&ref_data[(block_height - bottom - 1) * ref->stride],
|
|
block_width, bottom, pic->stride);
|
|
} else if (left | right) {
|
|
result += uvg_hor_sad(pic_data, ref_data,
|
|
block_width, block_height, pic->stride,
|
|
ref->stride, left, right);
|
|
} else {
|
|
result += reg_sad_maybe_optimized(pic_data, ref_data,
|
|
block_width, block_height,
|
|
pic->stride, ref->stride,
|
|
optimized_sad);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* \brief Calculate interpolated SAD between two blocks.
|
|
*
|
|
* \param pic Image for the block we are trying to find.
|
|
* \param ref Image where we are trying to find the block.
|
|
*
|
|
* \returns Sum of absolute differences
|
|
*/
|
|
unsigned uvg_image_calc_sad(const uvg_picture *pic,
|
|
const uvg_picture *ref,
|
|
int pic_x,
|
|
int pic_y,
|
|
int ref_x,
|
|
int ref_y,
|
|
int block_width,
|
|
int block_height,
|
|
optimized_sad_func_ptr_t optimized_sad)
|
|
{
|
|
assert(pic_x >= 0 && pic_x <= pic->width - block_width);
|
|
assert(pic_y >= 0 && pic_y <= pic->height - block_height);
|
|
|
|
uint32_t res;
|
|
|
|
if (ref_x >= 0 && ref_x <= ref->width - block_width &&
|
|
ref_y >= 0 && ref_y <= ref->height - block_height)
|
|
{
|
|
// Reference block is completely inside the frame, so just calculate the
|
|
// SAD directly. This is the most common case, which is why it's first.
|
|
const uvg_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
|
const uvg_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x];
|
|
|
|
res = reg_sad_maybe_optimized(pic_data,
|
|
ref_data,
|
|
block_width,
|
|
block_height,
|
|
pic->stride,
|
|
ref->stride,
|
|
optimized_sad);
|
|
} else {
|
|
// Call a routine that knows how to interpolate pixels outside the frame.
|
|
res = image_interpolated_sad(pic, ref, pic_x, pic_y, ref_x, ref_y, block_width, block_height, optimized_sad);
|
|
}
|
|
return res >> (UVG_BIT_DEPTH - 8);
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Calculate interpolated SATD between two blocks.
|
|
*
|
|
* \param pic Image for the block we are trying to find.
|
|
* \param ref Image where we are trying to find the block.
|
|
*/
|
|
unsigned uvg_image_calc_satd(const uvg_picture *pic,
|
|
const uvg_picture *ref,
|
|
int pic_x,
|
|
int pic_y,
|
|
int ref_x,
|
|
int ref_y,
|
|
int block_width,
|
|
int block_height)
|
|
{
|
|
assert(pic_x >= 0 && pic_x <= pic->width - block_width);
|
|
assert(pic_y >= 0 && pic_y <= pic->height - block_height);
|
|
|
|
if (ref_x >= 0 && ref_x <= ref->width - block_width &&
|
|
ref_y >= 0 && ref_y <= ref->height - block_height)
|
|
{
|
|
// Reference block is completely inside the frame, so just calculate the
|
|
// SAD directly. This is the most common case, which is why it's first.
|
|
const uvg_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
|
const uvg_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x];
|
|
return uvg_satd_any_size(block_width,
|
|
block_height,
|
|
pic_data,
|
|
pic->stride,
|
|
ref_data,
|
|
ref->stride) >> (UVG_BIT_DEPTH - 8);
|
|
} else {
|
|
// Extrapolate pixels from outside the frame.
|
|
|
|
// Space for extrapolated pixels and the part from the picture
|
|
// The extrapolation function will set the pointers and stride.
|
|
uvg_pixel ext_buffer[LCU_LUMA_SIZE];
|
|
uvg_pixel *ext = NULL;
|
|
uvg_pixel *ext_origin = NULL;
|
|
int ext_s = 0;
|
|
uvg_epol_args epol_args = {
|
|
.src = ref->y,
|
|
.src_w = ref->width,
|
|
.src_h = ref->height,
|
|
.src_s = ref->stride,
|
|
.blk_x = ref_x,
|
|
.blk_y = ref_y,
|
|
.blk_w = block_width,
|
|
.blk_h = block_height,
|
|
.pad_l = 0,
|
|
.pad_r = 0,
|
|
.pad_t = 0,
|
|
.pad_b = 0,
|
|
.pad_b_simd = 0,
|
|
};
|
|
|
|
// Initialize separately. Gets rid of warning
|
|
// about using nonstandard extension.
|
|
epol_args.buf = ext_buffer;
|
|
epol_args.ext = &ext;
|
|
epol_args.ext_origin = &ext_origin;
|
|
epol_args.ext_s = &ext_s;
|
|
|
|
uvg_get_extended_block(&epol_args);
|
|
|
|
const uvg_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
|
|
|
unsigned satd = uvg_satd_any_size(block_width,
|
|
block_height,
|
|
pic_data,
|
|
pic->stride,
|
|
ext_origin,
|
|
ext_s) >> (UVG_BIT_DEPTH - 8);
|
|
|
|
return satd;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
* \brief BLock Image Transfer from one buffer to another.
|
|
*
|
|
* It's a stupidly simple loop that copies pixels.
|
|
*
|
|
* \param orig Start of the originating buffer.
|
|
* \param dst Start of the destination buffer.
|
|
* \param width Width of the copied region.
|
|
* \param height Height of the copied region.
|
|
* \param orig_stride Width of a row in the originating buffer.
|
|
* \param dst_stride Width of a row in the destination buffer.
|
|
*
|
|
* This should be inlined, but it's defined here for now to see if Visual
|
|
* Studios LTCG will inline it.
|
|
*/
|
|
#define BLIT_PIXELS_CASE(n) case n:\
|
|
for (y = 0; y < n; ++y) {\
|
|
memcpy(&dst[y*dst_stride], &orig[y*orig_stride], n * sizeof(uvg_pixel));\
|
|
}\
|
|
break;
|
|
|
|
void uvg_pixels_blit(const uvg_pixel * const orig, uvg_pixel * const dst,
|
|
const unsigned width, const unsigned height,
|
|
const unsigned orig_stride, const unsigned dst_stride)
|
|
{
|
|
unsigned y;
|
|
//There is absolutely no reason to have a width greater than the source or the destination stride.
|
|
assert(width <= orig_stride);
|
|
assert(width <= dst_stride);
|
|
|
|
#ifdef CHECKPOINTS
|
|
char *buffer = malloc((3 * width + 1) * sizeof(char));
|
|
for (y = 0; y < height; ++y) {
|
|
int p;
|
|
for (p = 0; p < width; ++p) {
|
|
sprintf((buffer + 3*p), "%02X ", orig[y*orig_stride]);
|
|
}
|
|
buffer[3*width] = 0;
|
|
CHECKPOINT("uvg_pixels_blit_avx2: %04d: %s", y, buffer);
|
|
}
|
|
FREE_POINTER(buffer);
|
|
#endif //CHECKPOINTS
|
|
|
|
if (width == orig_stride && width == dst_stride) {
|
|
memcpy(dst, orig, width * height * sizeof(uvg_pixel));
|
|
return;
|
|
}
|
|
|
|
int nxn_width = (width == height) ? width : 0;
|
|
switch (nxn_width) {
|
|
BLIT_PIXELS_CASE(4)
|
|
BLIT_PIXELS_CASE(8)
|
|
BLIT_PIXELS_CASE(16)
|
|
BLIT_PIXELS_CASE(32)
|
|
BLIT_PIXELS_CASE(64)
|
|
default:
|
|
|
|
if (orig == dst) {
|
|
//If we have the same array, then we should have the same stride
|
|
assert(orig_stride == dst_stride);
|
|
return;
|
|
}
|
|
assert(orig != dst || orig_stride == dst_stride);
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(uvg_pixel));
|
|
}
|
|
break;
|
|
}
|
|
}
|