Transform functions in dct-avx2.c are now generated with macros.

2024-11-24 02:24:07 +00:00 · 2014-10-02 13:11:46 +03:00 · 2014-10-02 13:11:46 +03:00 · 61e1510480
parent 9407610555
commit 61e1510480
3 changed files with 119 additions and 232 deletions
--- a/src/strategies/avx2/dct-avx2.c
+++ b/src/strategies/avx2/dct-avx2.c
@ -30,17 +30,17 @@
 #if COMPILE_INTEL_AVX2
 #include <immintrin.h>

-extern const int16_t g_dst[4][4];
-extern const int16_t g_t4[4][4];
-extern const int16_t g_t8[8][8];
-extern const int16_t g_t16[16][16];
-extern const int16_t g_t32[32][32];
+extern const int16_t g_dst_4[4][4];
+extern const int16_t g_dct_4[4][4];
+extern const int16_t g_dct_8[8][8];
+extern const int16_t g_dct_16[16][16];
+extern const int16_t g_dct_32[32][32];

-extern const int16_t g_dst_t[4][4];
-extern const int16_t g_t4_t[4][4];
-extern const int16_t g_t8_t[8][8];
-extern const int16_t g_t16_t[16][16];
-extern const int16_t g_t32_t[32][32];
+extern const int16_t g_dst_4_t[4][4];
+extern const int16_t g_dct_4_t[4][4];
+extern const int16_t g_dct_8_t[8][8];
+extern const int16_t g_dct_16_t[16][16];
+extern const int16_t g_dct_32_t[32][32];

 /**
 * \brief AVX2 transform functions
@ -481,155 +481,42 @@ static void mul_clip_matrix_32x32_avx2(const int16_t *first, const int16_t *seco
  }
 }

-static void matrix_dst_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[4 * 4];
+#define TRANSFORM(type, n) \
+\
+static void matrix_ ## type ## _ ## n ## x ## n ## _avx2(int8_t bitdepth, const int16_t *src, int16_t *dst)\
+{\
+  int32_t shift_1st = g_convert_to_bit[n] + 1 + (bitdepth - 8); \
+  int32_t shift_2nd = g_convert_to_bit[n] + 8; \
+  int16_t tmp[n * n];\
+\
+  mul_clip_matrix_ ## n ## x ## n ## _avx2(src, (int16_t*)g_ ## type ## _ ## n ## _t, tmp, shift_1st);\
+  mul_clip_matrix_ ## n ## x ## n ## _avx2((int16_t*)g_ ## type ## _ ## n ##, tmp, dst, shift_2nd);\
+}\

-  mul_clip_matrix_4x4_avx2(src, (int16_t*)g_dst_t, tmp, shift0);
-  mul_clip_matrix_4x4_avx2((int16_t*)g_dst, tmp, dst, shift1);
-}
+#define ITRANSFORM(type, n) \
+\
+static void matrix_i ## type ## _## n ## x ## n ## _avx2(int8_t bitdepth, const int16_t *dst, int16_t *src)\
+{\
+  int32_t shift_1st = 7; \
+  int32_t shift_2nd = 12 - (bitdepth - 8); \
+  int16_t tmp[n * n];\
+\
+  mul_clip_matrix_ ## n ## x ## n ## _avx2((int16_t*)g_ ## type ## _ ## n ## _t, src, tmp, shift_1st);\
+  mul_clip_matrix_ ## n ## x ## n ## _avx2(tmp, (int16_t*)g_ ## type ## _ ## n ##, dst, shift_2nd);\
+}\

-static void matrix_idst_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[4 * 4];
+TRANSFORM(dst, 4);
+TRANSFORM(dct, 4);
+TRANSFORM(dct, 8);
+TRANSFORM(dct, 16);
+TRANSFORM(dct, 32);

-  mul_clip_matrix_4x4_avx2((int16_t*)g_dst_t, src, tmp, shift0);
-  mul_clip_matrix_4x4_avx2(tmp, (int16_t*)g_dst, dst, shift1);
-}
+ITRANSFORM(dst, 4);
+ITRANSFORM(dct, 4);
+ITRANSFORM(dct, 8);
+ITRANSFORM(dct, 16);
+ITRANSFORM(dct, 32);

-static void matrix_transform_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[4 * 4];
-
-  mul_clip_matrix_4x4_avx2(src, (int16_t*)g_t4_t, tmp, shift0);
-  mul_clip_matrix_4x4_avx2((int16_t*)g_t4, tmp, dst, shift1);
-}
-
-static void matrix_itransform_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[4*4];
-
-  mul_clip_matrix_4x4_avx2((int16_t*)g_t4_t, src, tmp, shift0);
-  mul_clip_matrix_4x4_avx2(tmp, (int16_t*)g_t4, dst, shift1);
-}
-
-static void matrix_transform_2d_8x8_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[8 * 8];
-
-  mul_clip_matrix_8x8_avx2(src, (int16_t*)g_t8_t, tmp, shift0);
-  mul_clip_matrix_8x8_avx2((int16_t*)g_t8, tmp, dst, shift1);
-}
-
-static void matrix_itransform_2d_8x8_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[8 * 8];
-
-  mul_clip_matrix_8x8_avx2((int16_t*)g_t8_t, src, tmp, shift0);
-  mul_clip_matrix_8x8_avx2(tmp, (int16_t*)g_t8, dst, shift1);
-}
-
-static void matrix_transform_2d_16x16_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[16 * 16];
-
-  mul_clip_matrix_16x16_avx2(src, (int16_t*)g_t16_t, tmp, shift0);
-  mul_clip_matrix_16x16_avx2((int16_t*)g_t16, tmp, dst, shift1);
-}
-
-static void matrix_itransform_2d_16x16_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[16 * 16];
-
-  mul_clip_matrix_16x16_avx2((int16_t*)g_t16_t, src, tmp, shift0);
-  mul_clip_matrix_16x16_avx2(tmp, (int16_t*)g_t16, dst, shift1);
-}
-
-static void matrix_transform_2d_32x32_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[32 * 32];
-
-  mul_clip_matrix_32x32_avx2(src, (int16_t*)g_t32_t, tmp, shift0);
-  mul_clip_matrix_32x32_avx2((int16_t*)g_t32, tmp, dst, shift1);
-}
-
-static void matrix_itransform_2d_32x32_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1)
-{
-  int16_t tmp[32 * 32];
-
-  mul_clip_matrix_32x32_avx2((int16_t*) g_t32_t, src, tmp, shift0);
-  mul_clip_matrix_32x32_avx2(tmp, (int16_t*)g_t32, dst, shift1);
-}
-
-static void matrix_dst_4x4_avx2(int8_t bitdepth, int16_t *src, int16_t *dst)
-{
-  int32_t shift_1st = g_convert_to_bit[4] + 1 + (bitdepth - 8);
-  int32_t shift_2nd = g_convert_to_bit[4] + 8;
-  matrix_dst_2d_4x4_avx2(src, dst, (const int16_t*)g_dst, shift_1st, shift_2nd);
-}
-
-static void matrix_idst_4x4_avx2(int8_t bitdepth, int16_t *dst, int16_t *src)
-{
-  int32_t shift_1st = 7;
-  int32_t shift_2nd = 12 - (bitdepth - 8);
-  matrix_idst_2d_4x4_avx2(src, dst, (const int16_t*)g_dst, shift_1st, shift_2nd);
-}
-
-static void matrix_dct_4x4_avx2(int8_t bitdepth, int16_t *src, int16_t *dst)
-{
-  int32_t shift_1st = g_convert_to_bit[4] + 1 + (bitdepth - 8);
-  int32_t shift_2nd = g_convert_to_bit[4] + 8;
-  matrix_transform_2d_4x4_avx2(src, dst, (const int16_t*)g_t4, shift_1st, shift_2nd);
-}
-
-static void matrix_idct_4x4_avx2(int8_t bitdepth, int16_t *dst, int16_t *src)
-{
-  int32_t shift_1st = 7;
-  int32_t shift_2nd = 12 - (bitdepth - 8);
-  matrix_itransform_2d_4x4_avx2(src, dst, (const int16_t*)g_t4, shift_1st, shift_2nd);
-}
-
-static void matrix_dct_8x8_avx2(int8_t bitdepth, int16_t *src, int16_t *dst)
-{
-  int32_t shift_1st = g_convert_to_bit[8] + 1 + (bitdepth - 8);
-  int32_t shift_2nd = g_convert_to_bit[8] + 8;
-  matrix_transform_2d_8x8_avx2(src, dst, (const int16_t*)g_t8, shift_1st, shift_2nd);
-}
-
-static void matrix_idct_8x8_avx2(int8_t bitdepth, int16_t *dst, int16_t *src)
-{
-  int32_t shift_1st = 7;
-  int32_t shift_2nd = 12 - (bitdepth - 8);
-  matrix_itransform_2d_8x8_avx2(src, dst, (const int16_t*)g_t8_t, shift_1st, shift_2nd);
-}
-
-static void matrix_dct_16x16_avx2(int8_t bitdepth, int16_t *src, int16_t *dst)
-{
-  int32_t shift_1st = g_convert_to_bit[16] + 1 + (bitdepth - 8);
-  int32_t shift_2nd = g_convert_to_bit[16] + 8;
-  matrix_transform_2d_16x16_avx2(src, dst, (const int16_t*)g_t16, shift_1st, shift_2nd);
-}
-
-static void matrix_idct_16x16_avx2(int8_t bitdepth, int16_t *dst, int16_t *src)
-{
-  int32_t shift_1st = 7;
-  int32_t shift_2nd = 12 - (bitdepth - 8);
-  matrix_itransform_2d_16x16_avx2(src, dst, (const int16_t*)g_t16, shift_1st, shift_2nd);
-}
-
-static void matrix_dct_32x32_avx2(int8_t bitdepth, int16_t *src, int16_t *dst)
-{
-  int32_t shift_1st = g_convert_to_bit[32] + 1 + (bitdepth - 8);
-  int32_t shift_2nd = g_convert_to_bit[32] + 8;
-  matrix_transform_2d_32x32_avx2(src, dst, (const int16_t*)g_t32, shift_1st, shift_2nd);
-}
-
-static void matrix_idct_32x32_avx2(int8_t bitdepth, int16_t *dst, int16_t *src)
-{
-  int32_t shift_1st = 7;
-  int32_t shift_2nd = 12 - (bitdepth - 8);
-  matrix_itransform_2d_32x32_avx2(src, dst, (const int16_t*)g_t32, shift_1st, shift_2nd);
-}
 #endif //COMPILE_INTEL_AVX2

 int strategy_register_dct_avx2(void* opaque)
--- a/src/strategies/generic/dct-generic.c
+++ b/src/strategies/generic/dct-generic.c
@ -26,7 +26,7 @@
 #include "strategyselector.h"
 #include "encoder.h"

-const int16_t g_dst[4][4] =
+const int16_t g_dst_4[4][4] =
 {
  { 29, 55, 74, 84 },
  { 74, 74, 0, -74 },
@ -34,7 +34,7 @@ const int16_t g_dst[4][4] =
  { 55, -84, 74, -29 }
 };

-const int16_t g_t4[4][4] =
+const int16_t g_dct_4[4][4] =
 {
  { 64, 64, 64, 64 },
  { 83, 36, -36, -83 },
@ -42,7 +42,7 @@ const int16_t g_t4[4][4] =
  { 36, -83, 83, -36 }
 };

-const int16_t g_t8[8][8] =
+const int16_t g_dct_8[8][8] =
 {
  { 64, 64, 64, 64, 64, 64, 64, 64 },
  { 89, 75, 50, 18, -18, -50, -75, -89 },
@ -54,7 +54,7 @@ const int16_t g_t8[8][8] =
  { 18, -50, 75, -89, 89, -75, 50, -18 }
 };

-const int16_t g_t16[16][16] =
+const int16_t g_dct_16[16][16] =
 {
  { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
  { 90, 87, 80, 70, 57, 43, 25, 9, -9, -25, -43, -57, -70, -80, -87, -90 },
@ -74,7 +74,7 @@ const int16_t g_t16[16][16] =
  { 9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9 }
 };

-const int16_t g_t32[32][32] =
+const int16_t g_dct_32[32][32] =
 {
  { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
  { 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, -4, -13, -22, -31, -38, -46, -54, -61, -67, -73, -78, -82, -85, -88, -90, -90 },
@ -110,7 +110,7 @@ const int16_t g_t32[32][32] =
  { 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90, 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 }
 };

-const int16_t g_dst_t[4][4] =
+const int16_t g_dst_4_t[4][4] =
 {
  { 29, 74, 84, 55 },
  { 55, 74, -29, -84 },
@ -118,7 +118,7 @@ const int16_t g_dst_t[4][4] =
  { 84, -74, 55, -29 }
 };

-const int16_t g_t4_t[4][4] =
+const int16_t g_dct_4_t[4][4] =
 {
  { 64, 83, 64, 36, },
  { 64, 36, -64, -83, },
@ -126,7 +126,7 @@ const int16_t g_t4_t[4][4] =
  { 64, -83, 64, -36 }
 };

-const int16_t g_t8_t[8][8] =
+const int16_t g_dct_8_t[8][8] =
 {
  { 64, 89, 83, 75, 64, 50, 36, 18, },
  { 64, 75, 36, -18, -64, -89, -83, -50, },
@ -138,7 +138,7 @@ const int16_t g_t8_t[8][8] =
  { 64, -89, 83, -75, 64, -50, 36, -18 }
 };

-const int16_t g_t16_t[16][16] =
+const int16_t g_dct_16_t[16][16] =
 {
  { 64, 90, 89, 87, 83, 80, 75, 70, 64, 57, 50, 43, 36, 25, 18, 9, },
  { 64, 87, 75, 57, 36, 9, -18, -43, -64, -80, -89, -90, -83, -70, -50, -25, },
@ -158,7 +158,7 @@ const int16_t g_t16_t[16][16] =
  { 64, -90, 89, -87, 83, -80, 75, -70, 64, -57, 50, -43, 36, -25, 18, -9 }
 };

-const int16_t g_t32_t[32][32] =
+const int16_t g_dct_32_t[32][32] =
 {
  { 64, 90, 90, 90, 89, 88, 87, 85, 83, 82, 80, 78, 75, 73, 70, 67, 64, 61, 57, 54, 50, 46, 43, 38, 36, 31, 25, 22, 18, 13, 9, 4, },
  { 64, 90, 87, 82, 75, 67, 57, 46, 36, 22, 9, -4, -18, -31, -43, -54, -64, -73, -80, -85, -89, -90, -90, -88, -83, -78, -70, -61, -50, -38, -25, -13, },
@ -258,10 +258,10 @@ static void partial_butterfly_4_generic(short *src, short *dst,
    e[1] = src[1] + src[2];
    o[1] = src[1] - src[2];

-    dst[0] = (short)((g_t4[0][0] * e[0] + g_t4[0][1] * e[1] + add) >> shift);
-    dst[2 * line] = (short)((g_t4[2][0] * e[0] + g_t4[2][1] * e[1] + add) >> shift);
-    dst[line] = (short)((g_t4[1][0] * o[0] + g_t4[1][1] * o[1] + add) >> shift);
-    dst[3 * line] = (short)((g_t4[3][0] * o[0] + g_t4[3][1] * o[1] + add) >> shift);
+    dst[0] = (short)((g_dct_4[0][0] * e[0] + g_dct_4[0][1] * e[1] + add) >> shift);
+    dst[2 * line] = (short)((g_dct_4[2][0] * e[0] + g_dct_4[2][1] * e[1] + add) >> shift);
+    dst[line] = (short)((g_dct_4[1][0] * o[0] + g_dct_4[1][1] * o[1] + add) >> shift);
+    dst[3 * line] = (short)((g_dct_4[3][0] * o[0] + g_dct_4[3][1] * o[1] + add) >> shift);

    src += 4;
    dst++;
@ -279,10 +279,10 @@ static void partial_butterfly_inverse_4_generic(short *src, short *dst,

  for (j = 0; j < line; j++) {
    // Utilizing symmetry properties to the maximum to minimize the number of multiplications
-    o[0] = g_t4[1][0] * src[line] + g_t4[3][0] * src[3 * line];
-    o[1] = g_t4[1][1] * src[line] + g_t4[3][1] * src[3 * line];
-    e[0] = g_t4[0][0] * src[0] + g_t4[2][0] * src[2 * line];
-    e[1] = g_t4[0][1] * src[0] + g_t4[2][1] * src[2 * line];
+    o[0] = g_dct_4[1][0] * src[line] + g_dct_4[3][0] * src[3 * line];
+    o[1] = g_dct_4[1][1] * src[line] + g_dct_4[3][1] * src[3 * line];
+    e[0] = g_dct_4[0][0] * src[0] + g_dct_4[2][0] * src[2 * line];
+    e[1] = g_dct_4[0][1] * src[0] + g_dct_4[2][1] * src[2 * line];

    // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
    dst[0] = (short)CLIP(-32768, 32767, (e[0] + o[0] + add) >> shift);
@ -317,15 +317,15 @@ static void partial_butterfly_8_generic(short *src, short *dst,
    ee[1] = e[1] + e[2];
    eo[1] = e[1] - e[2];

-    dst[0] = (short)((g_t8[0][0] * ee[0] + g_t8[0][1] * ee[1] + add) >> shift);
-    dst[4 * line] = (short)((g_t8[4][0] * ee[0] + g_t8[4][1] * ee[1] + add) >> shift);
-    dst[2 * line] = (short)((g_t8[2][0] * eo[0] + g_t8[2][1] * eo[1] + add) >> shift);
-    dst[6 * line] = (short)((g_t8[6][0] * eo[0] + g_t8[6][1] * eo[1] + add) >> shift);
+    dst[0] = (short)((g_dct_8[0][0] * ee[0] + g_dct_8[0][1] * ee[1] + add) >> shift);
+    dst[4 * line] = (short)((g_dct_8[4][0] * ee[0] + g_dct_8[4][1] * ee[1] + add) >> shift);
+    dst[2 * line] = (short)((g_dct_8[2][0] * eo[0] + g_dct_8[2][1] * eo[1] + add) >> shift);
+    dst[6 * line] = (short)((g_dct_8[6][0] * eo[0] + g_dct_8[6][1] * eo[1] + add) >> shift);

-    dst[line] = (short)((g_t8[1][0] * o[0] + g_t8[1][1] * o[1] + g_t8[1][2] * o[2] + g_t8[1][3] * o[3] + add) >> shift);
-    dst[3 * line] = (short)((g_t8[3][0] * o[0] + g_t8[3][1] * o[1] + g_t8[3][2] * o[2] + g_t8[3][3] * o[3] + add) >> shift);
-    dst[5 * line] = (short)((g_t8[5][0] * o[0] + g_t8[5][1] * o[1] + g_t8[5][2] * o[2] + g_t8[5][3] * o[3] + add) >> shift);
-    dst[7 * line] = (short)((g_t8[7][0] * o[0] + g_t8[7][1] * o[1] + g_t8[7][2] * o[2] + g_t8[7][3] * o[3] + add) >> shift);
+    dst[line] = (short)((g_dct_8[1][0] * o[0] + g_dct_8[1][1] * o[1] + g_dct_8[1][2] * o[2] + g_dct_8[1][3] * o[3] + add) >> shift);
+    dst[3 * line] = (short)((g_dct_8[3][0] * o[0] + g_dct_8[3][1] * o[1] + g_dct_8[3][2] * o[2] + g_dct_8[3][3] * o[3] + add) >> shift);
+    dst[5 * line] = (short)((g_dct_8[5][0] * o[0] + g_dct_8[5][1] * o[1] + g_dct_8[5][2] * o[2] + g_dct_8[5][3] * o[3] + add) >> shift);
+    dst[7 * line] = (short)((g_dct_8[7][0] * o[0] + g_dct_8[7][1] * o[1] + g_dct_8[7][2] * o[2] + g_dct_8[7][3] * o[3] + add) >> shift);

    src += 8;
    dst++;
@ -345,13 +345,13 @@ static void partial_butterfly_inverse_8_generic(int16_t *src, int16_t *dst,
  for (j = 0; j < line; j++) {
    // Utilizing symmetry properties to the maximum to minimize the number of multiplications
    for (k = 0; k < 4; k++) {
-      o[k] = g_t8[1][k] * src[line] + g_t8[3][k] * src[3 * line] + g_t8[5][k] * src[5 * line] + g_t8[7][k] * src[7 * line];
+      o[k] = g_dct_8[1][k] * src[line] + g_dct_8[3][k] * src[3 * line] + g_dct_8[5][k] * src[5 * line] + g_dct_8[7][k] * src[7 * line];
    }

-    eo[0] = g_t8[2][0] * src[2 * line] + g_t8[6][0] * src[6 * line];
-    eo[1] = g_t8[2][1] * src[2 * line] + g_t8[6][1] * src[6 * line];
-    ee[0] = g_t8[0][0] * src[0] + g_t8[4][0] * src[4 * line];
-    ee[1] = g_t8[0][1] * src[0] + g_t8[4][1] * src[4 * line];
+    eo[0] = g_dct_8[2][0] * src[2 * line] + g_dct_8[6][0] * src[6 * line];
+    eo[1] = g_dct_8[2][1] * src[2 * line] + g_dct_8[6][1] * src[6 * line];
+    ee[0] = g_dct_8[0][0] * src[0] + g_dct_8[4][0] * src[4 * line];
+    ee[1] = g_dct_8[0][1] * src[0] + g_dct_8[4][1] * src[4 * line];

    // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
    e[0] = ee[0] + eo[0];
@ -395,18 +395,18 @@ static void partial_butterfly_16_generic(short *src, short *dst,
    eee[1] = ee[1] + ee[2];
    eeo[1] = ee[1] - ee[2];

-    dst[0] = (short)((g_t16[0][0] * eee[0] + g_t16[0][1] * eee[1] + add) >> shift);
-    dst[8 * line] = (short)((g_t16[8][0] * eee[0] + g_t16[8][1] * eee[1] + add) >> shift);
-    dst[4 * line] = (short)((g_t16[4][0] * eeo[0] + g_t16[4][1] * eeo[1] + add) >> shift);
-    dst[12 * line] = (short)((g_t16[12][0] * eeo[0] + g_t16[12][1] * eeo[1] + add) >> shift);
+    dst[0] = (short)((g_dct_16[0][0] * eee[0] + g_dct_16[0][1] * eee[1] + add) >> shift);
+    dst[8 * line] = (short)((g_dct_16[8][0] * eee[0] + g_dct_16[8][1] * eee[1] + add) >> shift);
+    dst[4 * line] = (short)((g_dct_16[4][0] * eeo[0] + g_dct_16[4][1] * eeo[1] + add) >> shift);
+    dst[12 * line] = (short)((g_dct_16[12][0] * eeo[0] + g_dct_16[12][1] * eeo[1] + add) >> shift);

    for (k = 2; k < 16; k += 4) {
-      dst[k*line] = (short)((g_t16[k][0] * eo[0] + g_t16[k][1] * eo[1] + g_t16[k][2] * eo[2] + g_t16[k][3] * eo[3] + add) >> shift);
+      dst[k*line] = (short)((g_dct_16[k][0] * eo[0] + g_dct_16[k][1] * eo[1] + g_dct_16[k][2] * eo[2] + g_dct_16[k][3] * eo[3] + add) >> shift);
    }

    for (k = 1; k < 16; k += 2) {
-      dst[k*line] = (short)((g_t16[k][0] * o[0] + g_t16[k][1] * o[1] + g_t16[k][2] * o[2] + g_t16[k][3] * o[3] +
-        g_t16[k][4] * o[4] + g_t16[k][5] * o[5] + g_t16[k][6] * o[6] + g_t16[k][7] * o[7] + add) >> shift);
+      dst[k*line] = (short)((g_dct_16[k][0] * o[0] + g_dct_16[k][1] * o[1] + g_dct_16[k][2] * o[2] + g_dct_16[k][3] * o[3] +
+        g_dct_16[k][4] * o[4] + g_dct_16[k][5] * o[5] + g_dct_16[k][6] * o[6] + g_dct_16[k][7] * o[7] + add) >> shift);
    }

    src += 16;
@ -428,16 +428,16 @@ static void partial_butterfly_inverse_16_generic(int16_t *src, int16_t *dst,
  for (j = 0; j < line; j++) {
    // Utilizing symmetry properties to the maximum to minimize the number of multiplications
    for (k = 0; k < 8; k++)  {
-      o[k] = g_t16[1][k] * src[line] + g_t16[3][k] * src[3 * line] + g_t16[5][k] * src[5 * line] + g_t16[7][k] * src[7 * line] +
-        g_t16[9][k] * src[9 * line] + g_t16[11][k] * src[11 * line] + g_t16[13][k] * src[13 * line] + g_t16[15][k] * src[15 * line];
+      o[k] = g_dct_16[1][k] * src[line] + g_dct_16[3][k] * src[3 * line] + g_dct_16[5][k] * src[5 * line] + g_dct_16[7][k] * src[7 * line] +
+        g_dct_16[9][k] * src[9 * line] + g_dct_16[11][k] * src[11 * line] + g_dct_16[13][k] * src[13 * line] + g_dct_16[15][k] * src[15 * line];
    }
    for (k = 0; k < 4; k++) {
-      eo[k] = g_t16[2][k] * src[2 * line] + g_t16[6][k] * src[6 * line] + g_t16[10][k] * src[10 * line] + g_t16[14][k] * src[14 * line];
+      eo[k] = g_dct_16[2][k] * src[2 * line] + g_dct_16[6][k] * src[6 * line] + g_dct_16[10][k] * src[10 * line] + g_dct_16[14][k] * src[14 * line];
    }
-    eeo[0] = g_t16[4][0] * src[4 * line] + g_t16[12][0] * src[12 * line];
-    eee[0] = g_t16[0][0] * src[0] + g_t16[8][0] * src[8 * line];
-    eeo[1] = g_t16[4][1] * src[4 * line] + g_t16[12][1] * src[12 * line];
-    eee[1] = g_t16[0][1] * src[0] + g_t16[8][1] * src[8 * line];
+    eeo[0] = g_dct_16[4][0] * src[4 * line] + g_dct_16[12][0] * src[12 * line];
+    eee[0] = g_dct_16[0][0] * src[0] + g_dct_16[8][0] * src[8 * line];
+    eeo[1] = g_dct_16[4][1] * src[4 * line] + g_dct_16[12][1] * src[12 * line];
+    eee[1] = g_dct_16[0][1] * src[0] + g_dct_16[8][1] * src[8 * line];

    // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
    for (k = 0; k < 2; k++) {
@ -491,22 +491,22 @@ static void partial_butterfly_32_generic(short *src, short *dst,
    eeee[1] = eee[1] + eee[2];
    eeeo[1] = eee[1] - eee[2];

-    dst[0] = (short)((g_t32[0][0] * eeee[0] + g_t32[0][1] * eeee[1] + add) >> shift);
-    dst[16 * line] = (short)((g_t32[16][0] * eeee[0] + g_t32[16][1] * eeee[1] + add) >> shift);
-    dst[8 * line] = (short)((g_t32[8][0] * eeeo[0] + g_t32[8][1] * eeeo[1] + add) >> shift);
-    dst[24 * line] = (short)((g_t32[24][0] * eeeo[0] + g_t32[24][1] * eeeo[1] + add) >> shift);
+    dst[0] = (short)((g_dct_32[0][0] * eeee[0] + g_dct_32[0][1] * eeee[1] + add) >> shift);
+    dst[16 * line] = (short)((g_dct_32[16][0] * eeee[0] + g_dct_32[16][1] * eeee[1] + add) >> shift);
+    dst[8 * line] = (short)((g_dct_32[8][0] * eeeo[0] + g_dct_32[8][1] * eeeo[1] + add) >> shift);
+    dst[24 * line] = (short)((g_dct_32[24][0] * eeeo[0] + g_dct_32[24][1] * eeeo[1] + add) >> shift);
    for (k = 4; k < 32; k += 8) {
-      dst[k*line] = (short)((g_t32[k][0] * eeo[0] + g_t32[k][1] * eeo[1] + g_t32[k][2] * eeo[2] + g_t32[k][3] * eeo[3] + add) >> shift);
+      dst[k*line] = (short)((g_dct_32[k][0] * eeo[0] + g_dct_32[k][1] * eeo[1] + g_dct_32[k][2] * eeo[2] + g_dct_32[k][3] * eeo[3] + add) >> shift);
    }
    for (k = 2; k < 32; k += 4) {
-      dst[k*line] = (short)((g_t32[k][0] * eo[0] + g_t32[k][1] * eo[1] + g_t32[k][2] * eo[2] + g_t32[k][3] * eo[3] +
-        g_t32[k][4] * eo[4] + g_t32[k][5] * eo[5] + g_t32[k][6] * eo[6] + g_t32[k][7] * eo[7] + add) >> shift);
+      dst[k*line] = (short)((g_dct_32[k][0] * eo[0] + g_dct_32[k][1] * eo[1] + g_dct_32[k][2] * eo[2] + g_dct_32[k][3] * eo[3] +
+        g_dct_32[k][4] * eo[4] + g_dct_32[k][5] * eo[5] + g_dct_32[k][6] * eo[6] + g_dct_32[k][7] * eo[7] + add) >> shift);
    }
    for (k = 1; k < 32; k += 2) {
-      dst[k*line] = (short)((g_t32[k][0] * o[0] + g_t32[k][1] * o[1] + g_t32[k][2] * o[2] + g_t32[k][3] * o[3] +
-        g_t32[k][4] * o[4] + g_t32[k][5] * o[5] + g_t32[k][6] * o[6] + g_t32[k][7] * o[7] +
-        g_t32[k][8] * o[8] + g_t32[k][9] * o[9] + g_t32[k][10] * o[10] + g_t32[k][11] * o[11] +
-        g_t32[k][12] * o[12] + g_t32[k][13] * o[13] + g_t32[k][14] * o[14] + g_t32[k][15] * o[15] + add) >> shift);
+      dst[k*line] = (short)((g_dct_32[k][0] * o[0] + g_dct_32[k][1] * o[1] + g_dct_32[k][2] * o[2] + g_dct_32[k][3] * o[3] +
+        g_dct_32[k][4] * o[4] + g_dct_32[k][5] * o[5] + g_dct_32[k][6] * o[6] + g_dct_32[k][7] * o[7] +
+        g_dct_32[k][8] * o[8] + g_dct_32[k][9] * o[9] + g_dct_32[k][10] * o[10] + g_dct_32[k][11] * o[11] +
+        g_dct_32[k][12] * o[12] + g_dct_32[k][13] * o[13] + g_dct_32[k][14] * o[14] + g_dct_32[k][15] * o[15] + add) >> shift);
    }
    src += 32;
    dst++;
@ -528,22 +528,22 @@ static void partial_butterfly_inverse_32_generic(int16_t *src, int16_t *dst,
  for (j = 0; j<line; j++) {
    // Utilizing symmetry properties to the maximum to minimize the number of multiplications
    for (k = 0; k < 16; k++) {
-      o[k] = g_t32[1][k] * src[line] + g_t32[3][k] * src[3 * line] + g_t32[5][k] * src[5 * line] + g_t32[7][k] * src[7 * line] +
-        g_t32[9][k] * src[9 * line] + g_t32[11][k] * src[11 * line] + g_t32[13][k] * src[13 * line] + g_t32[15][k] * src[15 * line] +
-        g_t32[17][k] * src[17 * line] + g_t32[19][k] * src[19 * line] + g_t32[21][k] * src[21 * line] + g_t32[23][k] * src[23 * line] +
-        g_t32[25][k] * src[25 * line] + g_t32[27][k] * src[27 * line] + g_t32[29][k] * src[29 * line] + g_t32[31][k] * src[31 * line];
+      o[k] = g_dct_32[1][k] * src[line] + g_dct_32[3][k] * src[3 * line] + g_dct_32[5][k] * src[5 * line] + g_dct_32[7][k] * src[7 * line] +
+        g_dct_32[9][k] * src[9 * line] + g_dct_32[11][k] * src[11 * line] + g_dct_32[13][k] * src[13 * line] + g_dct_32[15][k] * src[15 * line] +
+        g_dct_32[17][k] * src[17 * line] + g_dct_32[19][k] * src[19 * line] + g_dct_32[21][k] * src[21 * line] + g_dct_32[23][k] * src[23 * line] +
+        g_dct_32[25][k] * src[25 * line] + g_dct_32[27][k] * src[27 * line] + g_dct_32[29][k] * src[29 * line] + g_dct_32[31][k] * src[31 * line];
    }
    for (k = 0; k < 8; k++) {
-      eo[k] = g_t32[2][k] * src[2 * line] + g_t32[6][k] * src[6 * line] + g_t32[10][k] * src[10 * line] + g_t32[14][k] * src[14 * line] +
-        g_t32[18][k] * src[18 * line] + g_t32[22][k] * src[22 * line] + g_t32[26][k] * src[26 * line] + g_t32[30][k] * src[30 * line];
+      eo[k] = g_dct_32[2][k] * src[2 * line] + g_dct_32[6][k] * src[6 * line] + g_dct_32[10][k] * src[10 * line] + g_dct_32[14][k] * src[14 * line] +
+        g_dct_32[18][k] * src[18 * line] + g_dct_32[22][k] * src[22 * line] + g_dct_32[26][k] * src[26 * line] + g_dct_32[30][k] * src[30 * line];
    }
    for (k = 0; k < 4; k++) {
-      eeo[k] = g_t32[4][k] * src[4 * line] + g_t32[12][k] * src[12 * line] + g_t32[20][k] * src[20 * line] + g_t32[28][k] * src[28 * line];
+      eeo[k] = g_dct_32[4][k] * src[4 * line] + g_dct_32[12][k] * src[12 * line] + g_dct_32[20][k] * src[20 * line] + g_dct_32[28][k] * src[28 * line];
    }
-    eeeo[0] = g_t32[8][0] * src[8 * line] + g_t32[24][0] * src[24 * line];
-    eeeo[1] = g_t32[8][1] * src[8 * line] + g_t32[24][1] * src[24 * line];
-    eeee[0] = g_t32[0][0] * src[0] + g_t32[16][0] * src[16 * line];
-    eeee[1] = g_t32[0][1] * src[0] + g_t32[16][1] * src[16 * line];
+    eeeo[0] = g_dct_32[8][0] * src[8 * line] + g_dct_32[24][0] * src[24 * line];
+    eeeo[1] = g_dct_32[8][1] * src[8 * line] + g_dct_32[24][1] * src[24 * line];
+    eeee[0] = g_dct_32[0][0] * src[0] + g_dct_32[16][0] * src[16 * line];
+    eeee[1] = g_dct_32[0][1] * src[0] + g_dct_32[16][1] * src[16 * line];

    // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector
    eee[0] = eeee[0] + eeeo[0];
--- a/src/strategies/generic/dct-generic.h
+++ b/src/strategies/generic/dct-generic.h
@ -19,17 +19,17 @@
 * along with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
 ****************************************************************************/

-extern const int16_t g_dst[4][4];
-extern const int16_t g_t4[4][4];
-extern const int16_t g_t8[8][8];
-extern const int16_t g_t16[16][16];
-extern const int16_t g_t32[32][32];
+extern const int16_t g_dst_4[4][4];
+extern const int16_t g_dct_4[4][4];
+extern const int16_t g_dct_8[8][8];
+extern const int16_t g_dct_16[16][16];
+extern const int16_t g_dct_32[32][32];

-extern const int16_t g_dst_t[4][4];
-extern const int16_t g_t4_t[4][4];
-extern const int16_t g_t8_t[8][8];
-extern const int16_t g_t16_t[16][16];
-extern const int16_t g_t32_t[32][32];
+extern const int16_t g_dst_4_t[4][4];
+extern const int16_t g_dct_4_t[4][4];
+extern const int16_t g_dct_8_t[8][8];
+extern const int16_t g_dct_16_t[16][16];
+extern const int16_t g_dct_32_t[32][32];

 int strategy_register_dct_generic(void* opaque);