mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Add support for 4x4 blocks to SATD_ANY_SIZE.
Makes functions satd_any_size_generic and satd_any_size_8bit_avx2 work on blocks whose width and/or height are not multiples of 8.
This commit is contained in:
parent
2ae260e422
commit
bf26661782
|
@ -31,6 +31,7 @@
|
|||
#include "strategies/strategies-picture.h"
|
||||
#include "strategyselector.h"
|
||||
#include "strategies/strategies-common.h"
|
||||
#include "strategies/generic/picture-generic.h"
|
||||
|
||||
|
||||
/**
|
||||
|
@ -454,6 +455,19 @@ INLINE static void hor_transform_block_dual_avx2(__m256i (*row_diff)[8])
|
|||
hor_transform_row_dual_avx2((*row_diff) + 7);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
|
||||
*/
|
||||
static unsigned kvz_satd_4x4_subblock_8bit_avx2(const kvz_pixel * buf1,
|
||||
const int32_t stride1,
|
||||
const kvz_pixel * buf2,
|
||||
const int32_t stride2)
|
||||
{
|
||||
// TODO: AVX2 implementation
|
||||
return kvz_satd_4x4_subblock_generic(buf1, stride1, buf2, stride2);
|
||||
}
|
||||
|
||||
static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
|
||||
{
|
||||
__m128i temp[8];
|
||||
|
|
|
@ -99,19 +99,13 @@ static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel *
|
|||
return sad;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
|
||||
* \brief Transform differences between two 4x4 blocks.
|
||||
* From HM 13.0
|
||||
*/
|
||||
static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
|
||||
static int32_t hadamard_4x4_generic(int32_t diff[4*4])
|
||||
{
|
||||
int32_t k, satd = 0, diff[16], m[16], d[16];
|
||||
for (k = 0; k < 16; ++k) {
|
||||
diff[k] = piOrg[k] - piCur[k];
|
||||
}
|
||||
|
||||
/*===== hadamard transform =====*/
|
||||
int32_t m[4 * 4];
|
||||
m[0] = diff[0] + diff[12];
|
||||
m[1] = diff[1] + diff[13];
|
||||
m[2] = diff[2] + diff[14];
|
||||
|
@ -129,6 +123,7 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
|
|||
m[14] = diff[2] - diff[14];
|
||||
m[15] = diff[3] - diff[15];
|
||||
|
||||
int32_t d[4 * 4];
|
||||
d[0] = m[0] + m[4];
|
||||
d[1] = m[1] + m[5];
|
||||
d[2] = m[2] + m[6];
|
||||
|
@ -180,14 +175,45 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
|
|||
d[14] = m[14] + m[15];
|
||||
d[15] = m[15] - m[14];
|
||||
|
||||
for (k = 0; k<16; ++k) {
|
||||
satd += abs(d[k]);
|
||||
int32_t satd = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
satd += abs(d[i]);
|
||||
}
|
||||
satd = ((satd + 1) >> 1);
|
||||
|
||||
return satd;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Calculate SATD between two 4x4 blocks.
|
||||
*/
|
||||
static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
|
||||
{
|
||||
int32_t diff[4 * 4];
|
||||
for (int i = 0; i < 4 * 4; i++) {
|
||||
diff[i] = piOrg[i] - piCur[i];
|
||||
}
|
||||
return hadamard_4x4_generic(diff);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
|
||||
*/
|
||||
unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
|
||||
const int32_t stride1,
|
||||
const kvz_pixel * buf2,
|
||||
const int32_t stride2)
|
||||
{
|
||||
int32_t diff[4 * 4];
|
||||
for (int y = 0; y < 4; y++) {
|
||||
for (int x = 0; x < 4; x++) {
|
||||
diff[x + y * 4] = buf1[x + y * stride1] - buf2[x + y * stride2];
|
||||
}
|
||||
}
|
||||
return hadamard_4x4_generic(diff);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Calculate SATD between two 8x8 blocks inside bigger arrays.
|
||||
*/
|
||||
|
|
|
@ -39,4 +39,9 @@ kvz_pixel kvz_fast_clip_16bit_to_pixel(int16_t value);
|
|||
// Assumes PIXEL_MAX to be 2^n-1
|
||||
kvz_pixel kvz_fast_clip_32bit_to_pixel(int32_t value);
|
||||
|
||||
unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
|
||||
const int32_t stride1,
|
||||
const kvz_pixel * buf2,
|
||||
const int32_t stride2);
|
||||
|
||||
#endif //STRATEGIES_PICTURE_GENERIC_H_
|
||||
|
|
|
@ -66,11 +66,33 @@ static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
|
|||
const kvz_pixel *block2, int stride2) \
|
||||
{ \
|
||||
unsigned sum = 0; \
|
||||
if (width % 8 != 0) { \
|
||||
/* Process the first column using 4x4 blocks. */ \
|
||||
for (int y = 0; y < height; y += 4) { \
|
||||
sum += kvz_satd_4x4_subblock_ ## suffix(&block1[y * stride1], stride1, \
|
||||
&block2[y * stride2], stride2); \
|
||||
} \
|
||||
block1 += 4; \
|
||||
block2 += 4; \
|
||||
width -= 4; \
|
||||
} \
|
||||
if (height % 8 != 0) { \
|
||||
/* Process the first row using 4x4 blocks. */ \
|
||||
for (int x = 0; x < width; x += 4) { \
|
||||
sum += kvz_satd_4x4_subblock_ ## suffix(&block1[x], stride1, \
|
||||
&block2[x], stride2); \
|
||||
} \
|
||||
block1 += 4 * stride1; \
|
||||
block2 += 4 * stride2; \
|
||||
height -= 4; \
|
||||
} \
|
||||
/* The rest can now be processed with 8x8 blocks. */ \
|
||||
for (int y = 0; y < height; y += 8) { \
|
||||
const kvz_pixel *row1 = &block1[y * stride1]; \
|
||||
const kvz_pixel *row2 = &block2[y * stride2]; \
|
||||
for (int x = 0; x < width; x += 8) { \
|
||||
sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, &row2[x], stride2); \
|
||||
sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, \
|
||||
&row2[x], stride2); \
|
||||
} \
|
||||
} \
|
||||
return sum >> (KVZ_BIT_DEPTH - 8); \
|
||||
|
|
Loading…
Reference in a new issue