Add support for 4x4 blocks to SATD_ANY_SIZE.

Makes functions satd_any_size_generic and satd_any_size_8bit_avx2 work
on blocks whose width and/or height are not multiples of 8.
This commit is contained in:
Arttu Ylä-Outinen 2016-06-15 12:18:32 +09:00
parent 2ae260e422
commit bf26661782
4 changed files with 79 additions and 12 deletions

View file

@ -31,6 +31,7 @@
#include "strategies/strategies-picture.h" #include "strategies/strategies-picture.h"
#include "strategyselector.h" #include "strategyselector.h"
#include "strategies/strategies-common.h" #include "strategies/strategies-common.h"
#include "strategies/generic/picture-generic.h"
/** /**
@ -454,6 +455,19 @@ INLINE static void hor_transform_block_dual_avx2(__m256i (*row_diff)[8])
hor_transform_row_dual_avx2((*row_diff) + 7); hor_transform_row_dual_avx2((*row_diff) + 7);
} }
/**
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
*/
static unsigned kvz_satd_4x4_subblock_8bit_avx2(const kvz_pixel * buf1,
const int32_t stride1,
const kvz_pixel * buf2,
const int32_t stride2)
{
// TODO: AVX2 implementation
return kvz_satd_4x4_subblock_generic(buf1, stride1, buf2, stride2);
}
static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2) static unsigned satd_8x8_subblock_8bit_avx2(const kvz_pixel * buf1, unsigned stride1, const kvz_pixel * buf2, unsigned stride2)
{ {
__m128i temp[8]; __m128i temp[8];

View file

@ -99,19 +99,13 @@ static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel *
return sad; return sad;
} }
/** /**
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays. * \brief Transform differences between two 4x4 blocks.
* From HM 13.0 * From HM 13.0
*/ */
static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur) static int32_t hadamard_4x4_generic(int32_t diff[4*4])
{ {
int32_t k, satd = 0, diff[16], m[16], d[16]; int32_t m[4 * 4];
for (k = 0; k < 16; ++k) {
diff[k] = piOrg[k] - piCur[k];
}
/*===== hadamard transform =====*/
m[0] = diff[0] + diff[12]; m[0] = diff[0] + diff[12];
m[1] = diff[1] + diff[13]; m[1] = diff[1] + diff[13];
m[2] = diff[2] + diff[14]; m[2] = diff[2] + diff[14];
@ -129,6 +123,7 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
m[14] = diff[2] - diff[14]; m[14] = diff[2] - diff[14];
m[15] = diff[3] - diff[15]; m[15] = diff[3] - diff[15];
int32_t d[4 * 4];
d[0] = m[0] + m[4]; d[0] = m[0] + m[4];
d[1] = m[1] + m[5]; d[1] = m[1] + m[5];
d[2] = m[2] + m[6]; d[2] = m[2] + m[6];
@ -180,14 +175,45 @@ static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
d[14] = m[14] + m[15]; d[14] = m[14] + m[15];
d[15] = m[15] - m[14]; d[15] = m[15] - m[14];
for (k = 0; k<16; ++k) { int32_t satd = 0;
satd += abs(d[k]); for (int i = 0; i < 16; i++) {
satd += abs(d[i]);
} }
satd = ((satd + 1) >> 1); satd = ((satd + 1) >> 1);
return satd; return satd;
} }
/**
* \brief Calculate SATD between two 4x4 blocks.
*/
static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
{
int32_t diff[4 * 4];
for (int i = 0; i < 4 * 4; i++) {
diff[i] = piOrg[i] - piCur[i];
}
return hadamard_4x4_generic(diff);
}
/**
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
*/
unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
const int32_t stride1,
const kvz_pixel * buf2,
const int32_t stride2)
{
int32_t diff[4 * 4];
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
diff[x + y * 4] = buf1[x + y * stride1] - buf2[x + y * stride2];
}
}
return hadamard_4x4_generic(diff);
}
/** /**
* \brief Calculate SATD between two 8x8 blocks inside bigger arrays. * \brief Calculate SATD between two 8x8 blocks inside bigger arrays.
*/ */

View file

@ -39,4 +39,9 @@ kvz_pixel kvz_fast_clip_16bit_to_pixel(int16_t value);
// Assumes PIXEL_MAX to be 2^n-1 // Assumes PIXEL_MAX to be 2^n-1
kvz_pixel kvz_fast_clip_32bit_to_pixel(int32_t value); kvz_pixel kvz_fast_clip_32bit_to_pixel(int32_t value);
unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
const int32_t stride1,
const kvz_pixel * buf2,
const int32_t stride2);
#endif //STRATEGIES_PICTURE_GENERIC_H_ #endif //STRATEGIES_PICTURE_GENERIC_H_

View file

@ -66,11 +66,33 @@ static unsigned satd_ ## n ## x ## n ## _ ## suffix ( \
const kvz_pixel *block2, int stride2) \ const kvz_pixel *block2, int stride2) \
{ \ { \
unsigned sum = 0; \ unsigned sum = 0; \
if (width % 8 != 0) { \
/* Process the first column using 4x4 blocks. */ \
for (int y = 0; y < height; y += 4) { \
sum += kvz_satd_4x4_subblock_ ## suffix(&block1[y * stride1], stride1, \
&block2[y * stride2], stride2); \
} \
block1 += 4; \
block2 += 4; \
width -= 4; \
} \
if (height % 8 != 0) { \
/* Process the first row using 4x4 blocks. */ \
for (int x = 0; x < width; x += 4) { \
sum += kvz_satd_4x4_subblock_ ## suffix(&block1[x], stride1, \
&block2[x], stride2); \
} \
block1 += 4 * stride1; \
block2 += 4 * stride2; \
height -= 4; \
} \
/* The rest can now be processed with 8x8 blocks. */ \
for (int y = 0; y < height; y += 8) { \ for (int y = 0; y < height; y += 8) { \
const kvz_pixel *row1 = &block1[y * stride1]; \ const kvz_pixel *row1 = &block1[y * stride1]; \
const kvz_pixel *row2 = &block2[y * stride2]; \ const kvz_pixel *row2 = &block2[y * stride2]; \
for (int x = 0; x < width; x += 8) { \ for (int x = 0; x < width; x += 8) { \
sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, &row2[x], stride2); \ sum += satd_8x8_subblock_ ## suffix(&row1[x], stride1, \
&row2[x], stride2); \
} \ } \
} \ } \
return sum >> (KVZ_BIT_DEPTH - 8); \ return sum >> (KVZ_BIT_DEPTH - 8); \