From e7bcb58846780b168527c8be95678456899a1bf8 Mon Sep 17 00:00:00 2001 From: Ari Lemmetti Date: Thu, 25 Sep 2014 17:17:47 +0300 Subject: [PATCH] Added 32x32 IDCT --- src/strategies/avx2/dct-avx2.c | 89 ++++++---------------------------- 1 file changed, 14 insertions(+), 75 deletions(-) diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c index 53eb5bac..c83d626e 100644 --- a/src/strategies/avx2/dct-avx2.c +++ b/src/strategies/avx2/dct-avx2.c @@ -552,84 +552,16 @@ static void matrix_transform_2d_32x32_avx2(const int16_t *src, int16_t *dst, con mul_matrix_32x32_avx2(transform, tmp, dst, shift1); } - -static void partial_butterfly_inverse_32_avx2(int16_t *src, int16_t *dst, - int32_t shift) +static void matrix_itransform_2d_32x32_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) { - int32_t j, k; - int32_t e[16], o[16]; - int32_t ee[8], eo[8]; - int32_t eee[4], eeo[4]; - int32_t eeee[2], eeeo[2]; - int32_t add = 1 << (shift - 1); - const int32_t line = 32; + int16_t tmp[32 * 32]; + int16_t transposed[32 * 32]; - for (j = 0; j> shift)); - dst[k + 16] = (short)MAX(-32768, MIN(32767, (e[15 - k] - o[15 - k] + add) >> shift)); - } - src++; - dst += 32; - } + transpose_32x32_16bit(transform, transposed); + mul_matrix_32x32_avx2(transposed, src, tmp, shift0); + mul_matrix_32x32_avx2(tmp, transform, dst, shift1); } -#define DCT_NXN_AVX2(n) \ -static void dct_ ## n ## x ## n ## _avx2(int8_t bitdepth, int16_t *block, int16_t *coeff) { \ - \ - int16_t tmp[n*n]; \ - int32_t shift_1st = g_convert_to_bit[n] + 1 + (bitdepth - 8); \ - int32_t shift_2nd = g_convert_to_bit[n] + 8; \ - \ - partial_butterfly_ ## n ## _avx2(block, tmp, shift_1st); \ - partial_butterfly_ ## n ## _avx2(tmp, coeff, shift_2nd); \ -} - -#define IDCT_NXN_AVX2(n) \ -static void idct_ ## n ## x ## n ## _avx2(int8_t bitdepth, int16_t *block, int16_t *coeff) { \ -\ - int16_t tmp[n*n]; \ - int32_t shift_1st = 7; \ - int32_t shift_2nd = 12 - (bitdepth - 8); \ -\ - partial_butterfly_inverse_ ## n ## _avx2(coeff, tmp, shift_1st); \ - partial_butterfly_inverse_ ## n ## _avx2(tmp, block, shift_2nd); \ -} - -IDCT_NXN_AVX2(32); - static void matrix_dst_4x4_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) { int32_t shift_1st = g_convert_to_bit[4] + 1 + (bitdepth - 8); @@ -692,6 +624,13 @@ static void matrix_dct_32x32_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) int32_t shift_2nd = g_convert_to_bit[32] + 8; matrix_transform_2d_32x32_avx2(src, dst, (const int16_t*)g_t32, shift_1st, shift_2nd); } + +static void matrix_idct_32x32_avx2(int8_t bitdepth, int16_t *dst, int16_t *src) +{ + int32_t shift_1st = 7; + int32_t shift_2nd = 12 - (bitdepth - 8); + matrix_itransform_2d_32x32_avx2(src, dst, (const int16_t*)g_t32, shift_1st, shift_2nd); +} #endif //COMPILE_INTEL_AVX2 int strategy_register_dct_avx2(void* opaque) @@ -710,7 +649,7 @@ int strategy_register_dct_avx2(void* opaque) success &= strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); success &= strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); success &= strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); - success &= strategyselector_register(opaque, "idct_32x32", "avx2", 40, &idct_32x32_avx2); + success &= strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); #endif //COMPILE_INTEL_AVX2 return success; }