[avx2] Fix compilation errors

2024-11-23 18:14:06 +00:00 · 2023-07-26 15:20:33 +03:00 · 2023-07-26 15:20:33 +03:00 · 1f9955bdda
parent 13d4313e02
commit 1f9955bdda
2 changed files with 25 additions and 121 deletions
--- a/src/strategies/avx2/dct-avx2.c
+++ b/src/strategies/avx2/dct-avx2.c
@ -2198,9 +2198,9 @@ void fast_forward_tr_2x16_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_2nd = log2_height_minus1 + 7;

  const int16_t* hor_coeff = ff_dct2_2xN_coeff_hor;
-  const int16_t* ver_coeff = uvg_g_dct_16;
+  const int16_t* ver_coeff = &uvg_g_dct_16[0][0];
  if (ver == DST7) {
-    ver_coeff = uvg_g_dst7_16;
+    ver_coeff = &uvg_g_dst7_16[0][0];
  }
  const __m256i v_res_shuffle = _mm256_load_si256((const __m256i*)ff_dct2_2x16_ver_result_shuffle);
  // No coeffs for DCT8 and DST7 transforms since they do not exist for this block size
@ -2389,7 +2389,7 @@ void fast_forward_tr_2x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_2nd = log2_height_minus1 + 7;

  const int16_t* hor_coeff = ff_dct2_2xN_coeff_hor;
-  const int16_t* ver_coeff = uvg_g_dct_32;
+  const int16_t* ver_coeff = &uvg_g_dct_32[0][0];
  // For result shuffling, can use existing shuffle vector
  const __m256i v_res_shuffle = _mm256_load_si256((const __m256i*)ff_dct2_2x16_ver_result_shuffle);
  // No coeffs for DCT8 and DST7 transforms since they do not exist for this block size
@ -2562,7 +2562,7 @@ void fast_inverse_tr_2x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_1st = INVERSE_SHIFT_1ST;
  const int32_t shift_2nd = INVERSE_SHIFT_2ND;

-  const int16_t* ver_coeff = uvg_g_dct_32_t; // rename
+  const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0]; // rename
  const int16_t* hor_coeff = fi_dct2_32x2_coeff_ver; // TODO: rename
  // No coeffs for DCT8 and DST7 transforms since they do not exist for this block size

@ -2986,16 +2986,16 @@ void fast_forward_tr_4x16_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_2nd = log2_height_minus1 + 7;

  const int16_t* hor_coeff = fast_forward_dct2_b4_coeff;
-  const int16_t* ver_coeff = uvg_g_dct_16;
+  const int16_t* ver_coeff = &uvg_g_dct_16[0][0];
  if (hor == DST7) {
    hor_coeff = fast_forward_dst7_b4_coeff;
  } else if (hor == DCT8) {
    hor_coeff = fast_forward_dct8_b4_coeff;
  }
  if (ver == DST7) {
-    ver_coeff = uvg_g_dst7_16;
+    ver_coeff = &uvg_g_dst7_16[0][0];
  } else if (ver == DCT8) {
-    ver_coeff = uvg_g_dct8_16;
+    ver_coeff = &uvg_g_dct8_16[0][0];
  }

  __m256i v_hor_pass_out[4];
@ -3415,7 +3415,7 @@ void fast_inverse_tr_4x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_1st = INVERSE_SHIFT_1ST;
  const int32_t shift_2nd = INVERSE_SHIFT_2ND;

-  const int16_t* ver_coeff = uvg_g_dct_32_t;
+  const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
  const int16_t* hor_coeff = fi_dct2_32x4_coeff_ver; // TODO: rename
  if (hor == DST7) {
    hor_coeff = fi_dst7_32x4_coeff_ver; // TODO: rename
@ -3423,9 +3423,9 @@ void fast_inverse_tr_4x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
    hor_coeff = fi_dct8_32x4_coeff_ver; // TODO: rename
  }
  if (ver == DST7) {
-    ver_coeff = uvg_g_dst7_32_t;
+    ver_coeff = &uvg_g_dst7_32_t[0][0];
  } else if (ver == DCT8) {
-    ver_coeff = uvg_g_dct8_32;
+    ver_coeff = &uvg_g_dct8_32[0][0];
  }

  __m256i v_ver_pass_out[8];
@ -4587,7 +4587,7 @@ void fast_inverse_tr_8x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_1st = INVERSE_SHIFT_1ST;
  const int32_t shift_2nd = INVERSE_SHIFT_2ND;

-  const int16_t* ver_coeff = uvg_g_dct_32_t;
+  const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
  const int16_t* hor_coeff = fi_dct2_32x8_coeff_ver; // TODO: rename table
  if (hor == DST7) {
    hor_coeff = fi_dst7_32x8_coeff_ver; // TODO: rename
@ -4595,9 +4595,9 @@ void fast_inverse_tr_8x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
    hor_coeff = fi_dct8_32x8_coeff_ver; // TODO: rename
  }
  if (ver == DST7) {
-    ver_coeff = uvg_g_dst7_32_t;
+    ver_coeff = &uvg_g_dst7_32_t[0][0];
  } else if (ver == DCT8) {
-    ver_coeff = uvg_g_dct8_32;
+    ver_coeff = &uvg_g_dct8_32[0][0];
  }

  __m256i v_ver_pass_out[16];
@ -5949,7 +5949,7 @@ void fast_inverse_tr_16x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_1st = INVERSE_SHIFT_1ST;
  const int32_t shift_2nd = INVERSE_SHIFT_2ND;

-  const int16_t* ver_coeff = uvg_g_dct_32_t;
+  const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
  const int16_t* hor_coeff = fi_dct2_16x16_coeff_hor;
  if (hor == DST7) {
    hor_coeff = fi_dst7_16x32_coeff_hor; // TODO: coeffs
@ -5957,9 +5957,9 @@ void fast_inverse_tr_16x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
    hor_coeff = fi_dct8_16x32_coeff_hor;
  }
  if (ver == DST7) {
-    ver_coeff = uvg_g_dst7_32_t;
+    ver_coeff = &uvg_g_dst7_32_t[0][0];
  } else if (ver == DCT8) {
-    ver_coeff = uvg_g_dct8_32;
+    ver_coeff = &uvg_g_dct8_32[0][0];
  }

  __m256i v_ver_pass_out[32];
@ -6108,8 +6108,8 @@ static void fast_forward_DCT2_32x2_avx2_ver(const __m256i* src, int16_t* dst, in
  // Prepare coeffs
  // TODO: either rename these old coeff tables to be consistent with other new avx2 functions
  // or construct them here in place. Should be ease to accomplish with set1_epi32, just use a int32_t combined from two int16_t
-  const __m256i v_coeff_0 = _mm256_load_si256((const __m256i*)fast_forward_dct2_b2_coeff[0]);
-  const __m256i v_coeff_1 = _mm256_load_si256((const __m256i*)fast_forward_dct2_b2_coeff[16]);
+  const __m256i v_coeff_0 = _mm256_load_si256((const __m256i*)&fast_forward_dct2_b2_coeff[0]);
+  const __m256i v_coeff_1 = _mm256_load_si256((const __m256i*)&fast_forward_dct2_b2_coeff[16]);
  
  // Got data for 4 vectors, 32 lines with 2 samples each
  __m256i v_result_e[4];
@ -6147,7 +6147,7 @@ static void fast_forward_DCT2_32x4_avx2_ver(const __m256i* src, int16_t* dst, in
  // Got data for 8 vectors, 32 lines with 4 samples each

  // Prepare coeffs
-  const int16_t* coeff = uvg_g_dct_4;
+  const int16_t* coeff = &uvg_g_dct_4[0][0];
  const int a = coeff[0];
  const int b = coeff[1 * 4 + 0];
  const int c = coeff[1 * 4 + 1];
@ -6891,11 +6891,11 @@ void fast_inverse_tr_32x4_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_2nd = INVERSE_SHIFT_2ND;

  const int16_t* ver_coeff = fi_dct2_4x32_coeff_hor; // TODO: rename
-  const int16_t* hor_coeff = uvg_g_dct_32_t;
+  const int16_t* hor_coeff = &uvg_g_dct_32_t[0][0];
  if (hor == DST7) {
-    hor_coeff = uvg_g_dst7_32_t;
+    hor_coeff = &uvg_g_dst7_32_t[0][0];
  } else if (hor == DCT8) {
-    hor_coeff = uvg_g_dct8_32;
+    hor_coeff = &uvg_g_dct8_32[0][0];
  }
  if (ver == DST7) {
    ver_coeff = fi_dst7_4x32_coeff_hor; // TODO: rename
@ -8023,7 +8023,7 @@ void fast_inverse_tr_32x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
  const int32_t shift_1st = INVERSE_SHIFT_1ST;
  const int32_t shift_2nd = INVERSE_SHIFT_2ND;

-  const int16_t* ver_coeff = uvg_g_dct_32_t;
+  const int16_t* ver_coeff = &uvg_g_dct_32_t[0][0];
  const int16_t* hor_coeff = fi_dct2_32xN_coeff_hor;
  if (hor == DST7) {
    hor_coeff = fi_dst7_32xN_coeff_hor;
@ -8031,9 +8031,9 @@ void fast_inverse_tr_32x32_avx2(const int16_t* src, int16_t* dst, tr_type_t hor,
    hor_coeff = fi_dct8_32xN_coeff_hor;
  }
  if (ver == DST7) {
-    ver_coeff = uvg_g_dst7_32_t;
+    ver_coeff = &uvg_g_dst7_32_t[0][0];
  } else if (ver == DCT8) {
-    ver_coeff = uvg_g_dct8_32;
+    ver_coeff = &uvg_g_dct8_32[0][0];
  }

  __m256i v_ver_pass_out[64];
--- a/src/strategies/avx2/dct_avx2_tables.h
+++ b/src/strategies/avx2/dct_avx2_tables.h
@ -4830,101 +4830,5 @@ typedef int16_t TMatrixCoeff;
  {  b, -d,  f, -h,  j, -l,  n, -p,  r, -t,  v, -x,  z, -B,  D, -F,  E, -C,  A, -y,  w, -u,  s, -q,  o, -m,  k, -i,  g, -e,  c, -a,}, \
 }

-#define TRANSFORM_NUMBER_OF_DIRECTIONS 1
-#define ALIGN_DATA(nBytes,v) __declspec(align(nBytes)) v
-#define MEMORY_ALIGN_DEF_SIZE       32  // for use with avx2 (256 bit)
-//--------------------------------------------------------------------------------------------------
-// DCT-2
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P2[TRANSFORM_NUMBER_OF_DIRECTIONS][2][2]) =
-{
-  DEFINE_DCT2_P2_MATRIX(64),
-  //DEFINE_DCT2_P2_MATRIX(64)
-};
-
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]) =
-{
-  DEFINE_DCT2_P4_MATRIX(64,    83,    36),
-  //DEFINE_DCT2_P4_MATRIX(64,    83,    36)
-};
-
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8]) =
-{
-  DEFINE_DCT2_P8_MATRIX(64,    83,    36,    89,    75,    50,    18),
-  //DEFINE_DCT2_P8_MATRIX(64,    83,    36,    89,    75,    50,    18)
-};
-
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16]) =
-{
-  DEFINE_DCT2_P16_MATRIX(64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9),
-  //DEFINE_DCT2_P16_MATRIX(64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9)
-};
-
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32]) =
-{
-  DEFINE_DCT2_P32_MATRIX(64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9,    90,    90,    88,    85,    82,    78,    73,    67,    61,    54,    46,    38,    31,    22,    13,     4),
-  //DEFINE_DCT2_P32_MATRIX(64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9,    90,    90,    88,    85,    82,    78,    73,    67,    61,    54,    46,    38,    31,    22,    13,     4)
-};
-
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT2P64[TRANSFORM_NUMBER_OF_DIRECTIONS][64][64]) =
-{
-  DEFINE_DCT2_P64_MATRIX(64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9,    90,    90,    88,    85,    82,    78,    73,    67,    61,    54,    46,    38,    31,    22,    13,     4,    91,    90,    90,    90,    88,    87,    86,    84,    83,    81,    79,    77,    73,    71,    69,    65,    62,    59,    56,    52,    48,    44,    41,    37,    33,    28,    24,    20,    15,    11,     7,     2),
-  //DEFINE_DCT2_P64_MATRIX(64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9,    90,    90,    88,    85,    82,    78,    73,    67,    61,    54,    46,    38,    31,    22,    13,     4,    91,    90,    90,    90,    88,    87,    86,    84,    83,    81,    79,    77,    73,    71,    69,    65,    62,    59,    56,    52,    48,    44,    41,    37,    33,    28,    24,    20,    15,    11,     7,     2)
-};
-
-// DCT-8
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]) =
-{
-  DEFINE_DCT8_P4_MATRIX(84,     74,     55,     29),
-  //DEFINE_DCT8_P4_MATRIX(84,     74,     55,     29)
-};
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8]) =
-{
-  DEFINE_DCT8_P8_MATRIX(86,     85,     78,     71,     60,     46,     32,     17),
-  //DEFINE_DCT8_P8_MATRIX(86,     85,     78,     71,     60,     46,     32,     17)
-};
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16]) =
-{
-  DEFINE_DCT8_P16_MATRIX(88,     88,     87,     85,     81,     77,     73,     68,     62,     55,     48,     40,     33,     25,     17,      8),
-  //DEFINE_DCT8_P16_MATRIX(88,     88,     87,     85,     81,     77,     73,     68,     62,     55,     48,     40,     33,     25,     17,      8)
-};
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDCT8P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32]) =
-{
-  DEFINE_DCT8_P32_MATRIX(90,     90,     89,     88,     87,     86,     85,     84,     82,     80,     78,     77,     74,     72,     68,     66,     63,     60,     56,     53,     50,     46,     42,     38,     34,     30,     26,     21,     17,     13,      9,      4),
-  //DEFINE_DCT8_P32_MATRIX(90,     90,     89,     88,     87,     86,     85,     84,     82,     80,     78,     77,     74,     72,     68,     66,     63,     60,     56,     53,     50,     46,     42,     38,     34,     30,     26,     21,     17,     13,      9,      4)
-};
-
-// DST-7
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]) =
-{
-  DEFINE_DST7_P4_MATRIX(29,    55,    74,    84),
-  //DEFINE_DST7_P4_MATRIX(29,    55,    74,    84)
-};
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8]) =
-{
-  DEFINE_DST7_P8_MATRIX(17,    32,    46,    60,    71,    78,    85,    86),
-  //DEFINE_DST7_P8_MATRIX(17,    32,    46,    60,    71,    78,    85,    86)
-};
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16]) =
-{
-  DEFINE_DST7_P16_MATRIX(8,    17,    25,    33,    40,    48,    55,    62,    68,    73,    77,    81,    85,    87,    88,    88),
-  //DEFINE_DST7_P16_MATRIX(8,    17,    25,    33,    40,    48,    55,    62,    68,    73,    77,    81,    85,    87,    88,    88)
-};
-ALIGN_DATA(MEMORY_ALIGN_DEF_SIZE, const TMatrixCoeff g_trCoreDST7P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32]) =
-{
-  DEFINE_DST7_P32_MATRIX(4,     9,    13,    17,    21,    26,    30,    34,    38,    42,    46,    50,    53,    56,    60,    63,    66,    68,    72,    74,    77,    78,    80,    82,    84,    85,    86,    87,    88,    89,    90,    90),
-  //DEFINE_DST7_P32_MATRIX(4,     9,    13,    17,    21,    26,    30,    34,    38,    42,    46,    50,    53,    56,    60,    63,    66,    68,    72,    74,    77,    78,    80,    82,    84,    85,    86,    87,    88,    89,    90,    90)
-};
-
-//--------------------------------------------------------------------------------------------------
-
-static const int16_t* vvenc_matrix_coeffs[3][6] = {
-  {g_trCoreDCT2P2[0][0], g_trCoreDCT2P4[0][0], g_trCoreDCT2P8[0][0], g_trCoreDCT2P16[0][0], g_trCoreDCT2P32[0][0], g_trCoreDCT2P64[0][0]},
-  {NULL,  g_trCoreDCT8P4[0][0], g_trCoreDCT8P8[0][0], g_trCoreDCT8P16[0][0], g_trCoreDCT8P32[0][0], NULL},
-  {NULL,  g_trCoreDST7P4[0][0], g_trCoreDST7P8[0][0], g_trCoreDST7P16[0][0], g_trCoreDST7P32[0][0], NULL},
-};
-
-//! \}
-
-

 #endif DCT_AVX2_TABLES_H