mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 10:04:07 +00:00
[avx2] Fix compilation errors
This commit is contained in:
parent
13d4313e02
commit
1f9955bdda
|
@ -2198,9 +2198,9 @@ void fast_forward_tr_2x16_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_2nd = log2_height_minus1 + 7;
|
||||
|
||||
const int16_t* hor_coeff = ff_dct2_2xN_coeff_hor;
|
||||
const int16_t* ver_coeff = uvg_g_dct_16;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_16[0][0];
|
||||
if (ver == DST7) {
|
||||
ver_coeff = uvg_g_dst7_16;
|
||||
ver_coeff = &uvg_g_dst7_16[0][0];
|
||||
}
|
||||
const __m256i v_res_shuffle = _mm256_load_si256((const __m256i*)ff_dct2_2x16_ver_result_shuffle);
|
||||
// No coeffs for DCT8 and DST7 transforms since they do not exist for this block size
|
||||
|
@ -2389,7 +2389,7 @@ void fast_forward_tr_2x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_2nd = log2_height_minus1 + 7;
|
||||
|
||||
const int16_t* hor_coeff = ff_dct2_2xN_coeff_hor;
|
||||
const int16_t* ver_coeff = uvg_g_dct_32;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_32[0][0];
|
||||
// For result shuffling, can use existing shuffle vector
|
||||
const __m256i v_res_shuffle = _mm256_load_si256((const __m256i*)ff_dct2_2x16_ver_result_shuffle);
|
||||
// No coeffs for DCT8 and DST7 transforms since they do not exist for this block size
|
||||
|
@ -2562,7 +2562,7 @@ void fast_inverse_tr_2x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_1st = INVERSE_SHIFT_1ST;
|
||||
const int32_t shift_2nd = INVERSE_SHIFT_2ND;
|
||||
|
||||
const int16_t* ver_coeff = uvg_g_dct_32_t; // rename
|
||||
const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0]; // rename
|
||||
const int16_t* hor_coeff = fi_dct2_32x2_coeff_ver; // TODO: rename
|
||||
// No coeffs for DCT8 and DST7 transforms since they do not exist for this block size
|
||||
|
||||
|
@ -2986,16 +2986,16 @@ void fast_forward_tr_4x16_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_2nd = log2_height_minus1 + 7;
|
||||
|
||||
const int16_t* hor_coeff = fast_forward_dct2_b4_coeff;
|
||||
const int16_t* ver_coeff = uvg_g_dct_16;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_16[0][0];
|
||||
if (hor == DST7) {
|
||||
hor_coeff = fast_forward_dst7_b4_coeff;
|
||||
} else if (hor == DCT8) {
|
||||
hor_coeff = fast_forward_dct8_b4_coeff;
|
||||
}
|
||||
if (ver == DST7) {
|
||||
ver_coeff = uvg_g_dst7_16;
|
||||
ver_coeff = &uvg_g_dst7_16[0][0];
|
||||
} else if (ver == DCT8) {
|
||||
ver_coeff = uvg_g_dct8_16;
|
||||
ver_coeff = &uvg_g_dct8_16[0][0];
|
||||
}
|
||||
|
||||
__m256i v_hor_pass_out[4];
|
||||
|
@ -3415,7 +3415,7 @@ void fast_inverse_tr_4x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_1st = INVERSE_SHIFT_1ST;
|
||||
const int32_t shift_2nd = INVERSE_SHIFT_2ND;
|
||||
|
||||
const int16_t* ver_coeff = uvg_g_dct_32_t;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
|
||||
const int16_t* hor_coeff = fi_dct2_32x4_coeff_ver; // TODO: rename
|
||||
if (hor == DST7) {
|
||||
hor_coeff = fi_dst7_32x4_coeff_ver; // TODO: rename
|
||||
|
@ -3423,9 +3423,9 @@ void fast_inverse_tr_4x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
hor_coeff = fi_dct8_32x4_coeff_ver; // TODO: rename
|
||||
}
|
||||
if (ver == DST7) {
|
||||
ver_coeff = uvg_g_dst7_32_t;
|
||||
ver_coeff = &uvg_g_dst7_32_t[0][0];
|
||||
} else if (ver == DCT8) {
|
||||
ver_coeff = uvg_g_dct8_32;
|
||||
ver_coeff = &uvg_g_dct8_32[0][0];
|
||||
}
|
||||
|
||||
__m256i v_ver_pass_out[8];
|
||||
|
@ -4587,7 +4587,7 @@ void fast_inverse_tr_8x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_1st = INVERSE_SHIFT_1ST;
|
||||
const int32_t shift_2nd = INVERSE_SHIFT_2ND;
|
||||
|
||||
const int16_t* ver_coeff = uvg_g_dct_32_t;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
|
||||
const int16_t* hor_coeff = fi_dct2_32x8_coeff_ver; // TODO: rename table
|
||||
if (hor == DST7) {
|
||||
hor_coeff = fi_dst7_32x8_coeff_ver; // TODO: rename
|
||||
|
@ -4595,9 +4595,9 @@ void fast_inverse_tr_8x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
hor_coeff = fi_dct8_32x8_coeff_ver; // TODO: rename
|
||||
}
|
||||
if (ver == DST7) {
|
||||
ver_coeff = uvg_g_dst7_32_t;
|
||||
ver_coeff = &uvg_g_dst7_32_t[0][0];
|
||||
} else if (ver == DCT8) {
|
||||
ver_coeff = uvg_g_dct8_32;
|
||||
ver_coeff = &uvg_g_dct8_32[0][0];
|
||||
}
|
||||
|
||||
__m256i v_ver_pass_out[16];
|
||||
|
@ -5949,7 +5949,7 @@ void fast_inverse_tr_16x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_1st = INVERSE_SHIFT_1ST;
|
||||
const int32_t shift_2nd = INVERSE_SHIFT_2ND;
|
||||
|
||||
const int16_t* ver_coeff = uvg_g_dct_32_t;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
|
||||
const int16_t* hor_coeff = fi_dct2_16x16_coeff_hor;
|
||||
if (hor == DST7) {
|
||||
hor_coeff = fi_dst7_16x32_coeff_hor; // TODO: coeffs
|
||||
|
@ -5957,9 +5957,9 @@ void fast_inverse_tr_16x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
hor_coeff = fi_dct8_16x32_coeff_hor;
|
||||
}
|
||||
if (ver == DST7) {
|
||||
ver_coeff = uvg_g_dst7_32_t;
|
||||
ver_coeff = &uvg_g_dst7_32_t[0][0];
|
||||
} else if (ver == DCT8) {
|
||||
ver_coeff = uvg_g_dct8_32;
|
||||
ver_coeff = &uvg_g_dct8_32[0][0];
|
||||
}
|
||||
|
||||
__m256i v_ver_pass_out[32];
|
||||
|
@ -6108,8 +6108,8 @@ static void fast_forward_DCT2_32x2_avx2_ver(const __m256i* src, int16_t* dst, in
|
|||
// Prepare coeffs
|
||||
// TODO: either rename these old coeff tables to be consistent with other new avx2 functions
|
||||
// or construct them here in place. Should be ease to accomplish with set1_epi32, just use a int32_t combined from two int16_t
|
||||
const __m256i v_coeff_0 = _mm256_load_si256((const __m256i*)fast_forward_dct2_b2_coeff[0]);
|
||||
const __m256i v_coeff_1 = _mm256_load_si256((const __m256i*)fast_forward_dct2_b2_coeff[16]);
|
||||
const __m256i v_coeff_0 = _mm256_load_si256((const __m256i*)&fast_forward_dct2_b2_coeff[0]);
|
||||
const __m256i v_coeff_1 = _mm256_load_si256((const __m256i*)&fast_forward_dct2_b2_coeff[16]);
|
||||
|
||||
// Got data for 4 vectors, 32 lines with 2 samples each
|
||||
__m256i v_result_e[4];
|
||||
|
@ -6147,7 +6147,7 @@ static void fast_forward_DCT2_32x4_avx2_ver(const __m256i* src, int16_t* dst, in
|
|||
// Got data for 8 vectors, 32 lines with 4 samples each
|
||||
|
||||
// Prepare coeffs
|
||||
const int16_t* coeff = uvg_g_dct_4;
|
||||
const int16_t* coeff = &uvg_g_dct_4[0][0];
|
||||
const int a = coeff[0];
|
||||
const int b = coeff[1 * 4 + 0];
|
||||
const int c = coeff[1 * 4 + 1];
|
||||
|
@ -6891,11 +6891,11 @@ void fast_inverse_tr_32x4_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_2nd = INVERSE_SHIFT_2ND;
|
||||
|
||||
const int16_t* ver_coeff = fi_dct2_4x32_coeff_hor; // TODO: rename
|
||||
const int16_t* hor_coeff = uvg_g_dct_32_t;
|
||||
const int16_t* hor_coeff = &uvg_g_dct_32_t[0][0];
|
||||
if (hor == DST7) {
|
||||
hor_coeff = uvg_g_dst7_32_t;
|
||||
hor_coeff = &uvg_g_dst7_32_t[0][0];
|
||||
} else if (hor == DCT8) {
|
||||
hor_coeff = uvg_g_dct8_32;
|
||||
hor_coeff = &uvg_g_dct8_32[0][0];
|
||||
}
|
||||
if (ver == DST7) {
|
||||
ver_coeff = fi_dst7_4x32_coeff_hor; // TODO: rename
|
||||
|
@ -8023,7 +8023,7 @@ void fast_inverse_tr_32x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
const int32_t shift_1st = INVERSE_SHIFT_1ST;
|
||||
const int32_t shift_2nd = INVERSE_SHIFT_2ND;
|
||||
|
||||
const int16_t* ver_coeff = uvg_g_dct_32_t;
|
||||
const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
|
||||
const int16_t* hor_coeff = fi_dct2_32xN_coeff_hor;
|
||||
if (hor == DST7) {
|
||||
hor_coeff = fi_dst7_32xN_coeff_hor;
|
||||
|
@ -8031,9 +8031,9 @@ void fast_inverse_tr_32x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
|
|||
hor_coeff = fi_dct8_32xN_coeff_hor;
|
||||
}
|
||||
if (ver == DST7) {
|
||||
ver_coeff = uvg_g_dst7_32_t;
|
||||
ver_coeff = &uvg_g_dst7_32_t[0][0];
|
||||
} else if (ver == DCT8) {
|
||||
ver_coeff = uvg_g_dct8_32;
|
||||
ver_coeff = &uvg_g_dct8_32[0][0];
|
||||
}
|
||||
|
||||
__m256i v_ver_pass_out[64];
|
||||
|
|
|
@ -4830,101 +4830,5 @@ typedef int16_t TMatrixCoeff;
|
|||
{ b, -d, f, -h, j, -l, n, -p, r, -t, v, -x, z, -B, D, -F, E, -C, A, -y, w, -u, s, -q, o, -m, k, -i, g, -e, c, -a,}, \
|
||||
}
|
||||
|
||||
#define TRANSFORM_NUMBER_OF_DIRECTIONS 1
|
||||
#define ALIGN_DATA(nBytes,v) __declspec(align(nBytes)) v
|
||||
#define MEMORY_ALIGN_DEF_SIZE 32 // for use with avx2 (256 bit)
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
// DCT-2
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P2[TRANSFORM_NUMBER_OF_DIRECTIONS][2][2]) =
|
||||
{
|
||||
DEFINE_DCT2_P2_MATRIX(64),
|
||||
//DEFINE_DCT2_P2_MATRIX(64)
|
||||
};
|
||||
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]) =
|
||||
{
|
||||
DEFINE_DCT2_P4_MATRIX(64, 83, 36),
|
||||
//DEFINE_DCT2_P4_MATRIX(64, 83, 36)
|
||||
};
|
||||
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8]) =
|
||||
{
|
||||
DEFINE_DCT2_P8_MATRIX(64, 83, 36, 89, 75, 50, 18),
|
||||
//DEFINE_DCT2_P8_MATRIX(64, 83, 36, 89, 75, 50, 18)
|
||||
};
|
||||
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16]) =
|
||||
{
|
||||
DEFINE_DCT2_P16_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9),
|
||||
//DEFINE_DCT2_P16_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9)
|
||||
};
|
||||
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32]) =
|
||||
{
|
||||
DEFINE_DCT2_P32_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4),
|
||||
//DEFINE_DCT2_P32_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4)
|
||||
};
|
||||
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P64[TRANSFORM_NUMBER_OF_DIRECTIONS][64][64]) =
|
||||
{
|
||||
DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2),
|
||||
//DEFINE_DCT2_P64_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9, 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4, 91, 90, 90, 90, 88, 87, 86, 84, 83, 81, 79, 77, 73, 71, 69, 65, 62, 59, 56, 52, 48, 44, 41, 37, 33, 28, 24, 20, 15, 11, 7, 2)
|
||||
};
|
||||
|
||||
// DCT-8
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]) =
|
||||
{
|
||||
DEFINE_DCT8_P4_MATRIX(84, 74, 55, 29),
|
||||
//DEFINE_DCT8_P4_MATRIX(84, 74, 55, 29)
|
||||
};
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8]) =
|
||||
{
|
||||
DEFINE_DCT8_P8_MATRIX(86, 85, 78, 71, 60, 46, 32, 17),
|
||||
//DEFINE_DCT8_P8_MATRIX(86, 85, 78, 71, 60, 46, 32, 17)
|
||||
};
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16]) =
|
||||
{
|
||||
DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77, 73, 68, 62, 55, 48, 40, 33, 25, 17, 8),
|
||||
//DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77, 73, 68, 62, 55, 48, 40, 33, 25, 17, 8)
|
||||
};
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32]) =
|
||||
{
|
||||
DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4),
|
||||
//DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4)
|
||||
};
|
||||
|
||||
// DST-7
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]) =
|
||||
{
|
||||
DEFINE_DST7_P4_MATRIX(29, 55, 74, 84),
|
||||
//DEFINE_DST7_P4_MATRIX(29, 55, 74, 84)
|
||||
};
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8]) =
|
||||
{
|
||||
DEFINE_DST7_P8_MATRIX(17, 32, 46, 60, 71, 78, 85, 86),
|
||||
//DEFINE_DST7_P8_MATRIX(17, 32, 46, 60, 71, 78, 85, 86)
|
||||
};
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16]) =
|
||||
{
|
||||
DEFINE_DST7_P16_MATRIX(8, 17, 25, 33, 40, 48, 55, 62, 68, 73, 77, 81, 85, 87, 88, 88),
|
||||
//DEFINE_DST7_P16_MATRIX(8, 17, 25, 33, 40, 48, 55, 62, 68, 73, 77, 81, 85, 87, 88, 88)
|
||||
};
|
||||
ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32]) =
|
||||
{
|
||||
DEFINE_DST7_P32_MATRIX(4, 9, 13, 17, 21, 26, 30, 34, 38, 42, 46, 50, 53, 56, 60, 63, 66, 68, 72, 74, 77, 78, 80, 82, 84, 85, 86, 87, 88, 89, 90, 90),
|
||||
//DEFINE_DST7_P32_MATRIX(4, 9, 13, 17, 21, 26, 30, 34, 38, 42, 46, 50, 53, 56, 60, 63, 66, 68, 72, 74, 77, 78, 80, 82, 84, 85, 86, 87, 88, 89, 90, 90)
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
|
||||
static const int16_t* vvenc_matrix_coeffs[3][6] = {
|
||||
{g_trCoreDCT2P2[0][0], g_trCoreDCT2P4[0][0], g_trCoreDCT2P8[0][0], g_trCoreDCT2P16[0][0], g_trCoreDCT2P32[0][0], g_trCoreDCT2P64[0][0]},
|
||||
{NULL, g_trCoreDCT8P4[0][0], g_trCoreDCT8P8[0][0], g_trCoreDCT8P16[0][0], g_trCoreDCT8P32[0][0], NULL},
|
||||
{NULL, g_trCoreDST7P4[0][0], g_trCoreDST7P8[0][0], g_trCoreDST7P16[0][0], g_trCoreDST7P32[0][0], NULL},
|
||||
};
|
||||
|
||||
//! \}
|
||||
|
||||
|
||||
|
||||
#endif DCT_AVX2_TABLES_H
|
||||
|
|
Loading…
Reference in a new issue