From d6af146a2e18c68bfcf8b4429d0cc663806c2690 Mon Sep 17 00:00:00 2001
From: Ari Lemmetti <ari.lemmetti@gmail.com>
Date: Wed, 17 Sep 2014 16:28:12 +0300
Subject: [PATCH] Added part of the functions 16x16 DCT needs

---
 src/strategies/avx2/dct-avx2.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c
index ff5a207a..f80ba01b 100644
--- a/src/strategies/avx2/dct-avx2.c
+++ b/src/strategies/avx2/dct-avx2.c
@@ -109,6 +109,9 @@ static void transpose_8x8_16bit(const int16_t *src, int16_t *dst)
   _mm_storeu_si128((__m128i*)dst + 7, _mm256_extracti128_si256(tmp[3], 1));
 }
 
+static void transpose_16x16_16bit(const int16_t *src, int16_t *dst)
+{
+}
 static void mul_matrix_4x4_avx2(const int16_t *first, const int16_t *second, int16_t *dst, int32_t shift)
 {
   __m256i b[2], a, result,  even[2], odd[2];
@@ -202,6 +205,11 @@ static void mul_matrix_8x8_avx2(const int16_t *first, const int16_t *second, int
   }
 }
 
+static void mul_matrix_16x16_avx2(const int16_t *first, const int16_t *second, int16_t *dst, const int32_t shift)
+{
+
+}
+
 static void matrix_transform_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
 {
   int16_t tmp[4 * 4];
@@ -242,6 +250,15 @@ static void matrix_itransform_2d_8x8_avx2(const int16_t *src, int16_t *dst, cons
   mul_matrix_8x8_avx2(tmp, transform, dst, shift1);
 }
 
+static void matrix_transform_2d_16x16_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
+{
+  int16_t tmp[16 * 16];
+  int16_t transposed[16 * 16];
+
+  transpose_16x16_16bit(transform, transposed);
+  mul_matrix_16x16_avx2(src, transposed, tmp, shift0);
+  mul_matrix_16x16_avx2(transform, tmp, dst, shift1);
+}
 
 static void partial_butterfly_16_avx2(short *src, short *dst,
   int32_t shift)
@@ -512,6 +529,13 @@ static void matrix_idct_8x8_avx2(int8_t bitdepth, int16_t *dst, int16_t *src)
   matrix_itransform_2d_8x8_avx2(src, dst, (const int16_t*)g_t8, shift_1st, shift_2nd);
 }
 
+static void matrix_dct_16x16_avx2(int8_t bitdepth, int16_t *src, int16_t *dst)
+{
+  int32_t shift_1st = g_convert_to_bit[16] + 1 + (bitdepth - 8);
+  int32_t shift_2nd = g_convert_to_bit[16] + 8;
+  matrix_transform_2d_16x16_avx2(src, dst, (const int16_t*)g_t16, shift_1st, shift_2nd);
+}
+
 #endif //COMPILE_INTEL_AVX2
 
 int strategy_register_dct_avx2(void* opaque)