diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c index ed465b8b..dbdcc8e5 100644 --- a/src/strategies/avx2/dct-avx2.c +++ b/src/strategies/avx2/dct-avx2.c @@ -30,17 +30,17 @@ #if COMPILE_INTEL_AVX2 #include -extern const int16_t g_dst[4][4]; -extern const int16_t g_t4[4][4]; -extern const int16_t g_t8[8][8]; -extern const int16_t g_t16[16][16]; -extern const int16_t g_t32[32][32]; +extern const int16_t g_dst_4[4][4]; +extern const int16_t g_dct_4[4][4]; +extern const int16_t g_dct_8[8][8]; +extern const int16_t g_dct_16[16][16]; +extern const int16_t g_dct_32[32][32]; -extern const int16_t g_dst_t[4][4]; -extern const int16_t g_t4_t[4][4]; -extern const int16_t g_t8_t[8][8]; -extern const int16_t g_t16_t[16][16]; -extern const int16_t g_t32_t[32][32]; +extern const int16_t g_dst_4_t[4][4]; +extern const int16_t g_dct_4_t[4][4]; +extern const int16_t g_dct_8_t[8][8]; +extern const int16_t g_dct_16_t[16][16]; +extern const int16_t g_dct_32_t[32][32]; /** * \brief AVX2 transform functions @@ -481,155 +481,42 @@ static void mul_clip_matrix_32x32_avx2(const int16_t *first, const int16_t *seco } } -static void matrix_dst_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[4 * 4]; +#define TRANSFORM(type, n) \ +\ +static void matrix_ ## type ## _ ## n ## x ## n ## _avx2(int8_t bitdepth, const int16_t *src, int16_t *dst)\ +{\ + int32_t shift_1st = g_convert_to_bit[n] + 1 + (bitdepth - 8); \ + int32_t shift_2nd = g_convert_to_bit[n] + 8; \ + int16_t tmp[n * n];\ +\ + mul_clip_matrix_ ## n ## x ## n ## _avx2(src, (int16_t*)g_ ## type ## _ ## n ## _t, tmp, shift_1st);\ + mul_clip_matrix_ ## n ## x ## n ## _avx2((int16_t*)g_ ## type ## _ ## n ##, tmp, dst, shift_2nd);\ +}\ - mul_clip_matrix_4x4_avx2(src, (int16_t*)g_dst_t, tmp, shift0); - mul_clip_matrix_4x4_avx2((int16_t*)g_dst, tmp, dst, shift1); -} +#define ITRANSFORM(type, n) \ +\ +static void matrix_i ## type ## _## n ## x ## n ## _avx2(int8_t bitdepth, const int16_t *dst, int16_t *src)\ +{\ + int32_t shift_1st = 7; \ + int32_t shift_2nd = 12 - (bitdepth - 8); \ + int16_t tmp[n * n];\ +\ + mul_clip_matrix_ ## n ## x ## n ## _avx2((int16_t*)g_ ## type ## _ ## n ## _t, src, tmp, shift_1st);\ + mul_clip_matrix_ ## n ## x ## n ## _avx2(tmp, (int16_t*)g_ ## type ## _ ## n ##, dst, shift_2nd);\ +}\ -static void matrix_idst_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[4 * 4]; +TRANSFORM(dst, 4); +TRANSFORM(dct, 4); +TRANSFORM(dct, 8); +TRANSFORM(dct, 16); +TRANSFORM(dct, 32); - mul_clip_matrix_4x4_avx2((int16_t*)g_dst_t, src, tmp, shift0); - mul_clip_matrix_4x4_avx2(tmp, (int16_t*)g_dst, dst, shift1); -} +ITRANSFORM(dst, 4); +ITRANSFORM(dct, 4); +ITRANSFORM(dct, 8); +ITRANSFORM(dct, 16); +ITRANSFORM(dct, 32); -static void matrix_transform_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[4 * 4]; - - mul_clip_matrix_4x4_avx2(src, (int16_t*)g_t4_t, tmp, shift0); - mul_clip_matrix_4x4_avx2((int16_t*)g_t4, tmp, dst, shift1); -} - -static void matrix_itransform_2d_4x4_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[4*4]; - - mul_clip_matrix_4x4_avx2((int16_t*)g_t4_t, src, tmp, shift0); - mul_clip_matrix_4x4_avx2(tmp, (int16_t*)g_t4, dst, shift1); -} - -static void matrix_transform_2d_8x8_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[8 * 8]; - - mul_clip_matrix_8x8_avx2(src, (int16_t*)g_t8_t, tmp, shift0); - mul_clip_matrix_8x8_avx2((int16_t*)g_t8, tmp, dst, shift1); -} - -static void matrix_itransform_2d_8x8_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[8 * 8]; - - mul_clip_matrix_8x8_avx2((int16_t*)g_t8_t, src, tmp, shift0); - mul_clip_matrix_8x8_avx2(tmp, (int16_t*)g_t8, dst, shift1); -} - -static void matrix_transform_2d_16x16_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[16 * 16]; - - mul_clip_matrix_16x16_avx2(src, (int16_t*)g_t16_t, tmp, shift0); - mul_clip_matrix_16x16_avx2((int16_t*)g_t16, tmp, dst, shift1); -} - -static void matrix_itransform_2d_16x16_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[16 * 16]; - - mul_clip_matrix_16x16_avx2((int16_t*)g_t16_t, src, tmp, shift0); - mul_clip_matrix_16x16_avx2(tmp, (int16_t*)g_t16, dst, shift1); -} - -static void matrix_transform_2d_32x32_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[32 * 32]; - - mul_clip_matrix_32x32_avx2(src, (int16_t*)g_t32_t, tmp, shift0); - mul_clip_matrix_32x32_avx2((int16_t*)g_t32, tmp, dst, shift1); -} - -static void matrix_itransform_2d_32x32_avx2(const int16_t *src, int16_t *dst, const int16_t *transform, const int16_t shift0, const int16_t shift1) -{ - int16_t tmp[32 * 32]; - - mul_clip_matrix_32x32_avx2((int16_t*) g_t32_t, src, tmp, shift0); - mul_clip_matrix_32x32_avx2(tmp, (int16_t*)g_t32, dst, shift1); -} - -static void matrix_dst_4x4_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) -{ - int32_t shift_1st = g_convert_to_bit[4] + 1 + (bitdepth - 8); - int32_t shift_2nd = g_convert_to_bit[4] + 8; - matrix_dst_2d_4x4_avx2(src, dst, (const int16_t*)g_dst, shift_1st, shift_2nd); -} - -static void matrix_idst_4x4_avx2(int8_t bitdepth, int16_t *dst, int16_t *src) -{ - int32_t shift_1st = 7; - int32_t shift_2nd = 12 - (bitdepth - 8); - matrix_idst_2d_4x4_avx2(src, dst, (const int16_t*)g_dst, shift_1st, shift_2nd); -} - -static void matrix_dct_4x4_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) -{ - int32_t shift_1st = g_convert_to_bit[4] + 1 + (bitdepth - 8); - int32_t shift_2nd = g_convert_to_bit[4] + 8; - matrix_transform_2d_4x4_avx2(src, dst, (const int16_t*)g_t4, shift_1st, shift_2nd); -} - -static void matrix_idct_4x4_avx2(int8_t bitdepth, int16_t *dst, int16_t *src) -{ - int32_t shift_1st = 7; - int32_t shift_2nd = 12 - (bitdepth - 8); - matrix_itransform_2d_4x4_avx2(src, dst, (const int16_t*)g_t4, shift_1st, shift_2nd); -} - -static void matrix_dct_8x8_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) -{ - int32_t shift_1st = g_convert_to_bit[8] + 1 + (bitdepth - 8); - int32_t shift_2nd = g_convert_to_bit[8] + 8; - matrix_transform_2d_8x8_avx2(src, dst, (const int16_t*)g_t8, shift_1st, shift_2nd); -} - -static void matrix_idct_8x8_avx2(int8_t bitdepth, int16_t *dst, int16_t *src) -{ - int32_t shift_1st = 7; - int32_t shift_2nd = 12 - (bitdepth - 8); - matrix_itransform_2d_8x8_avx2(src, dst, (const int16_t*)g_t8_t, shift_1st, shift_2nd); -} - -static void matrix_dct_16x16_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) -{ - int32_t shift_1st = g_convert_to_bit[16] + 1 + (bitdepth - 8); - int32_t shift_2nd = g_convert_to_bit[16] + 8; - matrix_transform_2d_16x16_avx2(src, dst, (const int16_t*)g_t16, shift_1st, shift_2nd); -} - -static void matrix_idct_16x16_avx2(int8_t bitdepth, int16_t *dst, int16_t *src) -{ - int32_t shift_1st = 7; - int32_t shift_2nd = 12 - (bitdepth - 8); - matrix_itransform_2d_16x16_avx2(src, dst, (const int16_t*)g_t16, shift_1st, shift_2nd); -} - -static void matrix_dct_32x32_avx2(int8_t bitdepth, int16_t *src, int16_t *dst) -{ - int32_t shift_1st = g_convert_to_bit[32] + 1 + (bitdepth - 8); - int32_t shift_2nd = g_convert_to_bit[32] + 8; - matrix_transform_2d_32x32_avx2(src, dst, (const int16_t*)g_t32, shift_1st, shift_2nd); -} - -static void matrix_idct_32x32_avx2(int8_t bitdepth, int16_t *dst, int16_t *src) -{ - int32_t shift_1st = 7; - int32_t shift_2nd = 12 - (bitdepth - 8); - matrix_itransform_2d_32x32_avx2(src, dst, (const int16_t*)g_t32, shift_1st, shift_2nd); -} #endif //COMPILE_INTEL_AVX2 int strategy_register_dct_avx2(void* opaque) diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index 57067ae1..c7afd13b 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -26,7 +26,7 @@ #include "strategyselector.h" #include "encoder.h" -const int16_t g_dst[4][4] = +const int16_t g_dst_4[4][4] = { { 29, 55, 74, 84 }, { 74, 74, 0, -74 }, @@ -34,7 +34,7 @@ const int16_t g_dst[4][4] = { 55, -84, 74, -29 } }; -const int16_t g_t4[4][4] = +const int16_t g_dct_4[4][4] = { { 64, 64, 64, 64 }, { 83, 36, -36, -83 }, @@ -42,7 +42,7 @@ const int16_t g_t4[4][4] = { 36, -83, 83, -36 } }; -const int16_t g_t8[8][8] = +const int16_t g_dct_8[8][8] = { { 64, 64, 64, 64, 64, 64, 64, 64 }, { 89, 75, 50, 18, -18, -50, -75, -89 }, @@ -54,7 +54,7 @@ const int16_t g_t8[8][8] = { 18, -50, 75, -89, 89, -75, 50, -18 } }; -const int16_t g_t16[16][16] = +const int16_t g_dct_16[16][16] = { { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, { 90, 87, 80, 70, 57, 43, 25, 9, -9, -25, -43, -57, -70, -80, -87, -90 }, @@ -74,7 +74,7 @@ const int16_t g_t16[16][16] = { 9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9 } }; -const int16_t g_t32[32][32] = +const int16_t g_dct_32[32][32] = { { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, { 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, -4, -13, -22, -31, -38, -46, -54, -61, -67, -73, -78, -82, -85, -88, -90, -90 }, @@ -110,7 +110,7 @@ const int16_t g_t32[32][32] = { 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90, 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 } }; -const int16_t g_dst_t[4][4] = +const int16_t g_dst_4_t[4][4] = { { 29, 74, 84, 55 }, { 55, 74, -29, -84 }, @@ -118,7 +118,7 @@ const int16_t g_dst_t[4][4] = { 84, -74, 55, -29 } }; -const int16_t g_t4_t[4][4] = +const int16_t g_dct_4_t[4][4] = { { 64, 83, 64, 36, }, { 64, 36, -64, -83, }, @@ -126,7 +126,7 @@ const int16_t g_t4_t[4][4] = { 64, -83, 64, -36 } }; -const int16_t g_t8_t[8][8] = +const int16_t g_dct_8_t[8][8] = { { 64, 89, 83, 75, 64, 50, 36, 18, }, { 64, 75, 36, -18, -64, -89, -83, -50, }, @@ -138,7 +138,7 @@ const int16_t g_t8_t[8][8] = { 64, -89, 83, -75, 64, -50, 36, -18 } }; -const int16_t g_t16_t[16][16] = +const int16_t g_dct_16_t[16][16] = { { 64, 90, 89, 87, 83, 80, 75, 70, 64, 57, 50, 43, 36, 25, 18, 9, }, { 64, 87, 75, 57, 36, 9, -18, -43, -64, -80, -89, -90, -83, -70, -50, -25, }, @@ -158,7 +158,7 @@ const int16_t g_t16_t[16][16] = { 64, -90, 89, -87, 83, -80, 75, -70, 64, -57, 50, -43, 36, -25, 18, -9 } }; -const int16_t g_t32_t[32][32] = +const int16_t g_dct_32_t[32][32] = { { 64, 90, 90, 90, 89, 88, 87, 85, 83, 82, 80, 78, 75, 73, 70, 67, 64, 61, 57, 54, 50, 46, 43, 38, 36, 31, 25, 22, 18, 13, 9, 4, }, { 64, 90, 87, 82, 75, 67, 57, 46, 36, 22, 9, -4, -18, -31, -43, -54, -64, -73, -80, -85, -89, -90, -90, -88, -83, -78, -70, -61, -50, -38, -25, -13, }, @@ -258,10 +258,10 @@ static void partial_butterfly_4_generic(short *src, short *dst, e[1] = src[1] + src[2]; o[1] = src[1] - src[2]; - dst[0] = (short)((g_t4[0][0] * e[0] + g_t4[0][1] * e[1] + add) >> shift); - dst[2 * line] = (short)((g_t4[2][0] * e[0] + g_t4[2][1] * e[1] + add) >> shift); - dst[line] = (short)((g_t4[1][0] * o[0] + g_t4[1][1] * o[1] + add) >> shift); - dst[3 * line] = (short)((g_t4[3][0] * o[0] + g_t4[3][1] * o[1] + add) >> shift); + dst[0] = (short)((g_dct_4[0][0] * e[0] + g_dct_4[0][1] * e[1] + add) >> shift); + dst[2 * line] = (short)((g_dct_4[2][0] * e[0] + g_dct_4[2][1] * e[1] + add) >> shift); + dst[line] = (short)((g_dct_4[1][0] * o[0] + g_dct_4[1][1] * o[1] + add) >> shift); + dst[3 * line] = (short)((g_dct_4[3][0] * o[0] + g_dct_4[3][1] * o[1] + add) >> shift); src += 4; dst++; @@ -279,10 +279,10 @@ static void partial_butterfly_inverse_4_generic(short *src, short *dst, for (j = 0; j < line; j++) { // Utilizing symmetry properties to the maximum to minimize the number of multiplications - o[0] = g_t4[1][0] * src[line] + g_t4[3][0] * src[3 * line]; - o[1] = g_t4[1][1] * src[line] + g_t4[3][1] * src[3 * line]; - e[0] = g_t4[0][0] * src[0] + g_t4[2][0] * src[2 * line]; - e[1] = g_t4[0][1] * src[0] + g_t4[2][1] * src[2 * line]; + o[0] = g_dct_4[1][0] * src[line] + g_dct_4[3][0] * src[3 * line]; + o[1] = g_dct_4[1][1] * src[line] + g_dct_4[3][1] * src[3 * line]; + e[0] = g_dct_4[0][0] * src[0] + g_dct_4[2][0] * src[2 * line]; + e[1] = g_dct_4[0][1] * src[0] + g_dct_4[2][1] * src[2 * line]; // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector dst[0] = (short)CLIP(-32768, 32767, (e[0] + o[0] + add) >> shift); @@ -317,15 +317,15 @@ static void partial_butterfly_8_generic(short *src, short *dst, ee[1] = e[1] + e[2]; eo[1] = e[1] - e[2]; - dst[0] = (short)((g_t8[0][0] * ee[0] + g_t8[0][1] * ee[1] + add) >> shift); - dst[4 * line] = (short)((g_t8[4][0] * ee[0] + g_t8[4][1] * ee[1] + add) >> shift); - dst[2 * line] = (short)((g_t8[2][0] * eo[0] + g_t8[2][1] * eo[1] + add) >> shift); - dst[6 * line] = (short)((g_t8[6][0] * eo[0] + g_t8[6][1] * eo[1] + add) >> shift); + dst[0] = (short)((g_dct_8[0][0] * ee[0] + g_dct_8[0][1] * ee[1] + add) >> shift); + dst[4 * line] = (short)((g_dct_8[4][0] * ee[0] + g_dct_8[4][1] * ee[1] + add) >> shift); + dst[2 * line] = (short)((g_dct_8[2][0] * eo[0] + g_dct_8[2][1] * eo[1] + add) >> shift); + dst[6 * line] = (short)((g_dct_8[6][0] * eo[0] + g_dct_8[6][1] * eo[1] + add) >> shift); - dst[line] = (short)((g_t8[1][0] * o[0] + g_t8[1][1] * o[1] + g_t8[1][2] * o[2] + g_t8[1][3] * o[3] + add) >> shift); - dst[3 * line] = (short)((g_t8[3][0] * o[0] + g_t8[3][1] * o[1] + g_t8[3][2] * o[2] + g_t8[3][3] * o[3] + add) >> shift); - dst[5 * line] = (short)((g_t8[5][0] * o[0] + g_t8[5][1] * o[1] + g_t8[5][2] * o[2] + g_t8[5][3] * o[3] + add) >> shift); - dst[7 * line] = (short)((g_t8[7][0] * o[0] + g_t8[7][1] * o[1] + g_t8[7][2] * o[2] + g_t8[7][3] * o[3] + add) >> shift); + dst[line] = (short)((g_dct_8[1][0] * o[0] + g_dct_8[1][1] * o[1] + g_dct_8[1][2] * o[2] + g_dct_8[1][3] * o[3] + add) >> shift); + dst[3 * line] = (short)((g_dct_8[3][0] * o[0] + g_dct_8[3][1] * o[1] + g_dct_8[3][2] * o[2] + g_dct_8[3][3] * o[3] + add) >> shift); + dst[5 * line] = (short)((g_dct_8[5][0] * o[0] + g_dct_8[5][1] * o[1] + g_dct_8[5][2] * o[2] + g_dct_8[5][3] * o[3] + add) >> shift); + dst[7 * line] = (short)((g_dct_8[7][0] * o[0] + g_dct_8[7][1] * o[1] + g_dct_8[7][2] * o[2] + g_dct_8[7][3] * o[3] + add) >> shift); src += 8; dst++; @@ -345,13 +345,13 @@ static void partial_butterfly_inverse_8_generic(int16_t *src, int16_t *dst, for (j = 0; j < line; j++) { // Utilizing symmetry properties to the maximum to minimize the number of multiplications for (k = 0; k < 4; k++) { - o[k] = g_t8[1][k] * src[line] + g_t8[3][k] * src[3 * line] + g_t8[5][k] * src[5 * line] + g_t8[7][k] * src[7 * line]; + o[k] = g_dct_8[1][k] * src[line] + g_dct_8[3][k] * src[3 * line] + g_dct_8[5][k] * src[5 * line] + g_dct_8[7][k] * src[7 * line]; } - eo[0] = g_t8[2][0] * src[2 * line] + g_t8[6][0] * src[6 * line]; - eo[1] = g_t8[2][1] * src[2 * line] + g_t8[6][1] * src[6 * line]; - ee[0] = g_t8[0][0] * src[0] + g_t8[4][0] * src[4 * line]; - ee[1] = g_t8[0][1] * src[0] + g_t8[4][1] * src[4 * line]; + eo[0] = g_dct_8[2][0] * src[2 * line] + g_dct_8[6][0] * src[6 * line]; + eo[1] = g_dct_8[2][1] * src[2 * line] + g_dct_8[6][1] * src[6 * line]; + ee[0] = g_dct_8[0][0] * src[0] + g_dct_8[4][0] * src[4 * line]; + ee[1] = g_dct_8[0][1] * src[0] + g_dct_8[4][1] * src[4 * line]; // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector e[0] = ee[0] + eo[0]; @@ -395,18 +395,18 @@ static void partial_butterfly_16_generic(short *src, short *dst, eee[1] = ee[1] + ee[2]; eeo[1] = ee[1] - ee[2]; - dst[0] = (short)((g_t16[0][0] * eee[0] + g_t16[0][1] * eee[1] + add) >> shift); - dst[8 * line] = (short)((g_t16[8][0] * eee[0] + g_t16[8][1] * eee[1] + add) >> shift); - dst[4 * line] = (short)((g_t16[4][0] * eeo[0] + g_t16[4][1] * eeo[1] + add) >> shift); - dst[12 * line] = (short)((g_t16[12][0] * eeo[0] + g_t16[12][1] * eeo[1] + add) >> shift); + dst[0] = (short)((g_dct_16[0][0] * eee[0] + g_dct_16[0][1] * eee[1] + add) >> shift); + dst[8 * line] = (short)((g_dct_16[8][0] * eee[0] + g_dct_16[8][1] * eee[1] + add) >> shift); + dst[4 * line] = (short)((g_dct_16[4][0] * eeo[0] + g_dct_16[4][1] * eeo[1] + add) >> shift); + dst[12 * line] = (short)((g_dct_16[12][0] * eeo[0] + g_dct_16[12][1] * eeo[1] + add) >> shift); for (k = 2; k < 16; k += 4) { - dst[k*line] = (short)((g_t16[k][0] * eo[0] + g_t16[k][1] * eo[1] + g_t16[k][2] * eo[2] + g_t16[k][3] * eo[3] + add) >> shift); + dst[k*line] = (short)((g_dct_16[k][0] * eo[0] + g_dct_16[k][1] * eo[1] + g_dct_16[k][2] * eo[2] + g_dct_16[k][3] * eo[3] + add) >> shift); } for (k = 1; k < 16; k += 2) { - dst[k*line] = (short)((g_t16[k][0] * o[0] + g_t16[k][1] * o[1] + g_t16[k][2] * o[2] + g_t16[k][3] * o[3] + - g_t16[k][4] * o[4] + g_t16[k][5] * o[5] + g_t16[k][6] * o[6] + g_t16[k][7] * o[7] + add) >> shift); + dst[k*line] = (short)((g_dct_16[k][0] * o[0] + g_dct_16[k][1] * o[1] + g_dct_16[k][2] * o[2] + g_dct_16[k][3] * o[3] + + g_dct_16[k][4] * o[4] + g_dct_16[k][5] * o[5] + g_dct_16[k][6] * o[6] + g_dct_16[k][7] * o[7] + add) >> shift); } src += 16; @@ -428,16 +428,16 @@ static void partial_butterfly_inverse_16_generic(int16_t *src, int16_t *dst, for (j = 0; j < line; j++) { // Utilizing symmetry properties to the maximum to minimize the number of multiplications for (k = 0; k < 8; k++) { - o[k] = g_t16[1][k] * src[line] + g_t16[3][k] * src[3 * line] + g_t16[5][k] * src[5 * line] + g_t16[7][k] * src[7 * line] + - g_t16[9][k] * src[9 * line] + g_t16[11][k] * src[11 * line] + g_t16[13][k] * src[13 * line] + g_t16[15][k] * src[15 * line]; + o[k] = g_dct_16[1][k] * src[line] + g_dct_16[3][k] * src[3 * line] + g_dct_16[5][k] * src[5 * line] + g_dct_16[7][k] * src[7 * line] + + g_dct_16[9][k] * src[9 * line] + g_dct_16[11][k] * src[11 * line] + g_dct_16[13][k] * src[13 * line] + g_dct_16[15][k] * src[15 * line]; } for (k = 0; k < 4; k++) { - eo[k] = g_t16[2][k] * src[2 * line] + g_t16[6][k] * src[6 * line] + g_t16[10][k] * src[10 * line] + g_t16[14][k] * src[14 * line]; + eo[k] = g_dct_16[2][k] * src[2 * line] + g_dct_16[6][k] * src[6 * line] + g_dct_16[10][k] * src[10 * line] + g_dct_16[14][k] * src[14 * line]; } - eeo[0] = g_t16[4][0] * src[4 * line] + g_t16[12][0] * src[12 * line]; - eee[0] = g_t16[0][0] * src[0] + g_t16[8][0] * src[8 * line]; - eeo[1] = g_t16[4][1] * src[4 * line] + g_t16[12][1] * src[12 * line]; - eee[1] = g_t16[0][1] * src[0] + g_t16[8][1] * src[8 * line]; + eeo[0] = g_dct_16[4][0] * src[4 * line] + g_dct_16[12][0] * src[12 * line]; + eee[0] = g_dct_16[0][0] * src[0] + g_dct_16[8][0] * src[8 * line]; + eeo[1] = g_dct_16[4][1] * src[4 * line] + g_dct_16[12][1] * src[12 * line]; + eee[1] = g_dct_16[0][1] * src[0] + g_dct_16[8][1] * src[8 * line]; // Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector for (k = 0; k < 2; k++) { @@ -491,22 +491,22 @@ static void partial_butterfly_32_generic(short *src, short *dst, eeee[1] = eee[1] + eee[2]; eeeo[1] = eee[1] - eee[2]; - dst[0] = (short)((g_t32[0][0] * eeee[0] + g_t32[0][1] * eeee[1] + add) >> shift); - dst[16 * line] = (short)((g_t32[16][0] * eeee[0] + g_t32[16][1] * eeee[1] + add) >> shift); - dst[8 * line] = (short)((g_t32[8][0] * eeeo[0] + g_t32[8][1] * eeeo[1] + add) >> shift); - dst[24 * line] = (short)((g_t32[24][0] * eeeo[0] + g_t32[24][1] * eeeo[1] + add) >> shift); + dst[0] = (short)((g_dct_32[0][0] * eeee[0] + g_dct_32[0][1] * eeee[1] + add) >> shift); + dst[16 * line] = (short)((g_dct_32[16][0] * eeee[0] + g_dct_32[16][1] * eeee[1] + add) >> shift); + dst[8 * line] = (short)((g_dct_32[8][0] * eeeo[0] + g_dct_32[8][1] * eeeo[1] + add) >> shift); + dst[24 * line] = (short)((g_dct_32[24][0] * eeeo[0] + g_dct_32[24][1] * eeeo[1] + add) >> shift); for (k = 4; k < 32; k += 8) { - dst[k*line] = (short)((g_t32[k][0] * eeo[0] + g_t32[k][1] * eeo[1] + g_t32[k][2] * eeo[2] + g_t32[k][3] * eeo[3] + add) >> shift); + dst[k*line] = (short)((g_dct_32[k][0] * eeo[0] + g_dct_32[k][1] * eeo[1] + g_dct_32[k][2] * eeo[2] + g_dct_32[k][3] * eeo[3] + add) >> shift); } for (k = 2; k < 32; k += 4) { - dst[k*line] = (short)((g_t32[k][0] * eo[0] + g_t32[k][1] * eo[1] + g_t32[k][2] * eo[2] + g_t32[k][3] * eo[3] + - g_t32[k][4] * eo[4] + g_t32[k][5] * eo[5] + g_t32[k][6] * eo[6] + g_t32[k][7] * eo[7] + add) >> shift); + dst[k*line] = (short)((g_dct_32[k][0] * eo[0] + g_dct_32[k][1] * eo[1] + g_dct_32[k][2] * eo[2] + g_dct_32[k][3] * eo[3] + + g_dct_32[k][4] * eo[4] + g_dct_32[k][5] * eo[5] + g_dct_32[k][6] * eo[6] + g_dct_32[k][7] * eo[7] + add) >> shift); } for (k = 1; k < 32; k += 2) { - dst[k*line] = (short)((g_t32[k][0] * o[0] + g_t32[k][1] * o[1] + g_t32[k][2] * o[2] + g_t32[k][3] * o[3] + - g_t32[k][4] * o[4] + g_t32[k][5] * o[5] + g_t32[k][6] * o[6] + g_t32[k][7] * o[7] + - g_t32[k][8] * o[8] + g_t32[k][9] * o[9] + g_t32[k][10] * o[10] + g_t32[k][11] * o[11] + - g_t32[k][12] * o[12] + g_t32[k][13] * o[13] + g_t32[k][14] * o[14] + g_t32[k][15] * o[15] + add) >> shift); + dst[k*line] = (short)((g_dct_32[k][0] * o[0] + g_dct_32[k][1] * o[1] + g_dct_32[k][2] * o[2] + g_dct_32[k][3] * o[3] + + g_dct_32[k][4] * o[4] + g_dct_32[k][5] * o[5] + g_dct_32[k][6] * o[6] + g_dct_32[k][7] * o[7] + + g_dct_32[k][8] * o[8] + g_dct_32[k][9] * o[9] + g_dct_32[k][10] * o[10] + g_dct_32[k][11] * o[11] + + g_dct_32[k][12] * o[12] + g_dct_32[k][13] * o[13] + g_dct_32[k][14] * o[14] + g_dct_32[k][15] * o[15] + add) >> shift); } src += 32; dst++; @@ -528,22 +528,22 @@ static void partial_butterfly_inverse_32_generic(int16_t *src, int16_t *dst, for (j = 0; j. ****************************************************************************/ -extern const int16_t g_dst[4][4]; -extern const int16_t g_t4[4][4]; -extern const int16_t g_t8[8][8]; -extern const int16_t g_t16[16][16]; -extern const int16_t g_t32[32][32]; +extern const int16_t g_dst_4[4][4]; +extern const int16_t g_dct_4[4][4]; +extern const int16_t g_dct_8[8][8]; +extern const int16_t g_dct_16[16][16]; +extern const int16_t g_dct_32[32][32]; -extern const int16_t g_dst_t[4][4]; -extern const int16_t g_t4_t[4][4]; -extern const int16_t g_t8_t[8][8]; -extern const int16_t g_t16_t[16][16]; -extern const int16_t g_t32_t[32][32]; +extern const int16_t g_dst_4_t[4][4]; +extern const int16_t g_dct_4_t[4][4]; +extern const int16_t g_dct_8_t[8][8]; +extern const int16_t g_dct_16_t[16][16]; +extern const int16_t g_dct_32_t[32][32]; int strategy_register_dct_generic(void* opaque);