From d7f7a2d99bd66fbcbe3ac53f6de2a5943d0c68b8 Mon Sep 17 00:00:00 2001
From: Joose Sainio <joose.sainio@tuni.fi>
Date: Thu, 16 Jun 2022 08:34:38 +0300
Subject: [PATCH] [lfnst] working for 32x32

---
 src/search_intra.c                     | 13 ++--
 src/strategies/avx2/dct-avx2.c         | 51 +++++++++++-----
 src/strategies/avx2/quant-avx2.c       |  2 +-
 src/strategies/generic/dct-generic.c   | 23 ++++++-
 src/strategies/generic/quant-generic.c | 84 ++++++++++++++++++++------
 src/strategies/generic/quant-generic.h | 13 +++-
 src/strategies/strategies-quant.h      |  2 +-
 src/transform.c                        | 15 +++--
 tests/mts_tests.c                      |  1 +
 9 files changed, 151 insertions(+), 53 deletions(-)

diff --git a/src/search_intra.c b/src/search_intra.c
index 2f04552f..292267ce 100644
--- a/src/search_intra.c
+++ b/src/search_intra.c
@@ -347,8 +347,7 @@ static double search_intra_trdepth(
     for (int i = start_idx; i < end_idx + 1; ++i) {
       search_data->lfnst_costs[i] = MAX_DOUBLE;
     }
-    bool constraints[2] = { pred_cu->violates_lfnst_constrained_luma,
-                     pred_cu->lfnst_last_scan_pos };
+
 
     for (int lfnst_idx = start_idx; lfnst_idx <= end_idx; lfnst_idx++) {
       // Initialize lfnst variables
@@ -359,6 +358,8 @@ static double search_intra_trdepth(
 
       for (trafo = mts_start; trafo < num_transforms; trafo++) {
         pred_cu->tr_idx = trafo;
+        bool constraints[2] = { pred_cu->violates_lfnst_constrained_luma,
+                 pred_cu->lfnst_last_scan_pos };
         if (mts_enabled) {
           pred_cu->mts_last_scan_pos = 0;
           pred_cu->violates_mts_coeff_constraint = 0;
@@ -520,8 +521,8 @@ static double search_intra_trdepth(
     pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
     pred_cu->tr_idx = best_tr_idx;
     pred_cu->lfnst_idx = best_lfnst_idx;
-    pred_cu->lfnst_last_scan_pos = constraints[1];
-    pred_cu->violates_lfnst_constrained_luma = constraints[0];
+    pred_cu->lfnst_last_scan_pos = false;
+    pred_cu->violates_lfnst_constrained_luma = false;
     nosplit_cost += best_rd_cost;
 
     // Early stop condition for the recursive search.
@@ -1435,7 +1436,7 @@ int8_t uvg_search_intra_chroma_rdo(
     int lfnst_modes_to_check[3];
     if(chroma_data->pred_cu.lfnst_idx) {
       lfnst_modes_to_check[0] = chroma_data->pred_cu.lfnst_idx;
-      lfnst_modes_to_check[1] = 0;
+      lfnst_modes_to_check[1] = -1;
       lfnst_modes_to_check[2] = -1;
     }
     else {
@@ -1457,10 +1458,10 @@ int8_t uvg_search_intra_chroma_rdo(
       uint8_t best_lfnst_index = 0;
       for (int lfnst_i = 0; lfnst_i < 3; ++lfnst_i) {
         const int lfnst = lfnst_modes_to_check[lfnst_i];
-        chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->lambda;
         if (lfnst == -1) {
           continue;
         }
+        chroma_data[mode_i].lfnst_costs[lfnst] += mode_bits * state->lambda;
         if (pred_cu->tr_depth == pred_cu->depth) {
           uvg_intra_predict(
             state,
diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c
index adaec7ed..b695273b 100644
--- a/src/strategies/avx2/dct-avx2.c
+++ b/src/strategies/avx2/dct-avx2.c
@@ -1206,7 +1206,7 @@ const int16_t* uvg_g_mts_input[2][3][5] = {
   },
 };
 
-static void mts_dct_4x4_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_dct_4x4_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   //const int height = 4;
   const int width = 4;
@@ -1229,7 +1229,7 @@ static void mts_dct_4x4_avx2(const int16_t* input, int16_t* output, tr_type_t ty
   _mm256_store_si256((__m256i*)output, result);
 }
 
-static void mts_idct_4x4_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_idct_4x4_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = 7;
   int32_t shift_2nd = 12 - (bitdepth - 8);
@@ -1247,7 +1247,7 @@ static void mts_idct_4x4_avx2(const int16_t* input, int16_t* output, tr_type_t t
   _mm256_store_si256((__m256i*)output, result);
 }
 
-static void mts_dct_8x8_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_dct_8x8_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = uvg_g_convert_to_bit[8] + 1 + (bitdepth - 8);
   int32_t shift_2nd = uvg_g_convert_to_bit[8] + 8;
@@ -1261,7 +1261,7 @@ static void mts_dct_8x8_avx2(const int16_t* input, int16_t* output, tr_type_t ty
   matmul_8x8_a_bt(dct2, tmpres, output, shift_2nd);
 }
 
-static void mts_idct_8x8_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_idct_8x8_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = 7;
   int32_t shift_2nd = 12 - (bitdepth - 8);
@@ -1275,7 +1275,7 @@ static void mts_idct_8x8_avx2(const int16_t* input, int16_t* output, tr_type_t t
 }
 
 
-static void mts_dct_16x16_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_dct_16x16_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = uvg_g_convert_to_bit[16] + 1 + (bitdepth - 8);
   int32_t shift_2nd = uvg_g_convert_to_bit[16] + 8;
@@ -1306,6 +1306,25 @@ static void mts_dct_16x16_avx2(const int16_t* input, int16_t* output, tr_type_t
   // multiply completely
   matmul_16x16_a_bt(d_v, i_v, tmp, shift_1st);
   matmul_16x16_a_bt(d_v2, tmp, o_v, shift_2nd);
+
+  const int skip_line = lfnst_idx ? 8 : 0;
+  const int skip_line2 = lfnst_idx ? 8 : 0;
+  if (skip_line)
+  {
+    const int reduced_line = 8, cutoff = 8;
+    int16_t* dst2 = output + reduced_line;
+    for (int j = 0; j < cutoff; j++)
+    {
+      memset(dst2, 0, sizeof(int16_t) * skip_line);
+      dst2 += 16;
+    }
+  }
+
+  if (skip_line2)
+  {
+    int16_t* dst2 = output + 16 * 8;
+    memset(dst2, 0, sizeof(int16_t) * 16 * skip_line2);
+  }
 }
 
 /**********/
@@ -1395,7 +1414,7 @@ static void partial_butterfly_inverse_16_mts_avx2(const int16_t* src, int16_t* d
   }
 }
 
-static void mts_idct_16x16_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_idct_16x16_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = 7;
   int32_t shift_2nd = 12 - (bitdepth - 8);
@@ -1510,7 +1529,7 @@ static void mul_clip_matrix_32x32_mts_avx2(const int16_t* left,
   }
 }
 
-static void mts_dct_32x32_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_dct_32x32_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = uvg_g_convert_to_bit[32] + 1 + (bitdepth - 8); 
   int32_t shift_2nd = uvg_g_convert_to_bit[32] + 8; 
@@ -1519,15 +1538,19 @@ static void mts_dct_32x32_avx2(const int16_t* input, int16_t* output, tr_type_t
   const int16_t* tdct = uvg_g_mts_input[1][type_hor][3];
   const int16_t* dct = uvg_g_mts_input[0][type_ver][3];
 
-  const int skip_width = (type_hor != DCT2) ? 16 : 0;
-  const int skip_height = (type_ver != DCT2) ? 16 : 0;
+  int skip_width = (type_hor != DCT2) ? 16 : 0;
+  int skip_height = (type_ver != DCT2) ? 16 : 0;
+  if(lfnst_idx) {
+    skip_width = 24;
+    skip_height = 24;
+  }
 
   mul_clip_matrix_32x32_mts_avx2(input, tdct, tmp, shift_1st, skip_width, 0 );
   mul_clip_matrix_32x32_mts_avx2(dct, tmp, output, shift_2nd, skip_width, skip_height);
 }
 
 
-static void mts_idct_32x32_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth)
+static void mts_idct_32x32_avx2(const int16_t* input, int16_t* output, tr_type_t type_hor, tr_type_t type_ver, uint8_t bitdepth, uint8_t lfnst_idx)
 {
   int32_t shift_1st = 7; 
   int32_t shift_2nd = 12 - (bitdepth - 8); 
@@ -1542,7 +1565,7 @@ static void mts_idct_32x32_avx2(const int16_t* input, int16_t* output, tr_type_t
   mul_clip_matrix_32x32_mts_avx2(tmp, dct, output, shift_2nd, 0, 0);
 }
 
-typedef void tr_func(const int16_t*, int16_t*, tr_type_t , tr_type_t , uint8_t);
+typedef void tr_func(const int16_t*, int16_t*, tr_type_t , tr_type_t , uint8_t, uint8_t);
 
 // ToDo: Enable MTS 2x2 and 64x64 transforms
 static tr_func* dct_table[5] = {
@@ -1576,7 +1599,7 @@ static void mts_dct_avx2(
 
   uvg_get_tr_type(width, color, tu, &type_hor, &type_ver, mts_idx);
 
-  if (type_hor == DCT2 && type_ver == DCT2)
+  if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx)
   {
     dct_func* dct_func = uvg_get_dct_func(width, color, tu->type);
     dct_func(bitdepth, input, output);
@@ -1587,7 +1610,7 @@ static void mts_dct_avx2(
 
     tr_func* dct = dct_table[log2_width_minus2];
 
-    dct(input, output, type_hor, type_ver, bitdepth);
+    dct(input, output, type_hor, type_ver, bitdepth, tu->lfnst_idx);
   }
 }
 
@@ -1617,7 +1640,7 @@ static void mts_idct_avx2(
 
     tr_func* idct = idct_table[log2_width_minus2];
 
-    idct(input, output, type_hor, type_ver, bitdepth);
+    idct(input, output, type_hor, type_ver, bitdepth, tu->lfnst_idx);
   }
 }
 
diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c
index e4945f08..7509735d 100644
--- a/src/strategies/avx2/quant-avx2.c
+++ b/src/strategies/avx2/quant-avx2.c
@@ -669,7 +669,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
   }
   else {
     uvg_quant(state, coeff, coeff_out, width, width, color,
-      scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
+      scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y, cur_cu->lfnst_idx);
   }
 
   // Check if there are any non-zero coefficients.
diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c
index a8285990..8745e641 100644
--- a/src/strategies/generic/dct-generic.c
+++ b/src/strategies/generic/dct-generic.c
@@ -1366,6 +1366,11 @@ static void fastForwardDCT2_B32(const int16_t* src, int16_t* dst, int32_t shift,
       dst += line;
     }
   }
+  if (skip_line2) {
+    const int  reduced_line = line - skip_line2;
+    dst = p_coef + reduced_line * 32;
+    memset(dst, 0, skip_line2 * 32 * sizeof(coeff_t));
+  }
 }
 
 static void fastInverseDCT2_B32(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2)
@@ -2491,7 +2496,7 @@ static void mts_dct_generic(
 
   uvg_get_tr_type(width, color, tu, &type_hor, &type_ver, mts_idx);
 
-  if (type_hor == DCT2 && type_ver == DCT2)
+  if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx)
   {
     dct_func *dct_func = uvg_get_dct_func(width, color, tu->type);
     dct_func(bitdepth, input, output);
@@ -2499,9 +2504,21 @@ static void mts_dct_generic(
   else
   {
     const int height = width;
-    const int skip_width = (type_hor != DCT2 && width == 32) ? 16 : (width > 32 ? width - 32 : 0);
-    const int skip_height = (type_ver != DCT2 && height == 32) ? 16 : (height > 32 ? height - 32 : 0);
+    int skip_width = (type_hor != DCT2 && width == 32) ? 16 : (width > 32 ? width - 32 : 0);
+    int skip_height = (type_ver != DCT2 && height == 32) ? 16 : (height > 32 ? height - 32 : 0);
     const int log2_width_minus2 = uvg_g_convert_to_bit[width];
+    if(tu->lfnst_idx) {
+      if ((width == 4 && height > 4) || (width > 4 && height == 4))
+      {
+        skip_width = width - 4;
+        skip_height = height - 4;
+      }
+      else if ((width >= 8 && height >= 8))
+      {
+        skip_width = width - 8;
+        skip_height = height - 8;
+      }
+    }
 
     partial_tr_func* dct_hor = dct_table[type_hor][log2_width_minus2];
     partial_tr_func* dct_ver = dct_table[type_ver][log2_width_minus2];
diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c
index 096a610d..1cbc5d4e 100644
--- a/src/strategies/generic/quant-generic.c
+++ b/src/strategies/generic/quant-generic.c
@@ -49,8 +49,17 @@
 * \brief quantize transformed coefficents
 *
 */
-void uvg_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
-  int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip)
+void uvg_quant_generic(
+  const encoder_state_t * const state,
+  coeff_t *coef,
+  coeff_t *q_coef,
+  int32_t width,
+  int32_t height,
+  color_t color,
+  int8_t scan_idx,
+  int8_t block_type,
+  int8_t transform_skip,
+  uint8_t lfnst_idx)
 {
   const encoder_control_t * const encoder = state->encoder_control;
   const uint32_t log2_block_size = uvg_g_convert_to_bit[width] + 2;
@@ -69,20 +78,41 @@ void uvg_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff
 
   uint32_t ac_sum = 0;
 
-  for (int32_t n = 0; n < width * height; n++) {
-    int32_t level = coef[n];
-    int64_t abs_level = (int64_t)abs(level);
-    int32_t  sign;
+  if(lfnst_idx == 0){
+    for (int32_t n = 0; n < width * height; n++) {
+      int32_t level = coef[n];
+      int64_t abs_level = (int64_t)abs(level);
+      int32_t sign;
 
-    sign = (level < 0 ? -1 : 1);
+      sign = (level < 0 ? -1 : 1);
 
-    int32_t curr_quant_coeff = quant_coeff[n];
-    level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits);
-    ac_sum += level;
+      int32_t curr_quant_coeff = quant_coeff[n];
+      level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits);
+      ac_sum += level;
 
-    level *= sign;
-    q_coef[n] = (coeff_t)(CLIP(-32768, 32767, level));
+      level *= sign;
+      q_coef[n] = (coeff_t)(CLIP(-32768, 32767, level));
 
+    }
+  }
+  else {
+    const int max_number_of_coeffs = ((width == 4 && height == 4) || (width == 8 && height == 8)) ? 8 : 16;
+    memset(q_coef, 0, width * height * sizeof(coeff_t));
+    for (int32_t n = 0; n < max_number_of_coeffs; n++) {
+      const uint32_t idx = scan[n];
+      int32_t level = coef[idx];
+      int64_t abs_level = (int64_t)abs(level);
+      int32_t sign;
+
+      sign = (level < 0 ? -1 : 1);
+
+      int32_t curr_quant_coeff = quant_coeff[n];
+      level = (abs_level * curr_quant_coeff + add) >> q_bits;
+      ac_sum += level;
+
+      level *= sign;
+      q_coef[idx] = (coeff_t)(CLIP(-32768, 32767, level));
+    }
   }
 
   // Signhiding
@@ -90,13 +120,27 @@ void uvg_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff
 
   int32_t delta_u[LCU_WIDTH*LCU_WIDTH >> 2];
 
-  for (int32_t n = 0; n < width * height; n++) {
-    int32_t level = coef[n];
-    int64_t abs_level = (int64_t)abs(level);
-    int32_t curr_quant_coeff = quant_coeff[n];
+  if(lfnst_idx == 0) {
+    for (int32_t n = 0; n < width * height; n++) {
+      int32_t level = coef[n];
+      int64_t abs_level = (int64_t)abs(level);
+      int32_t curr_quant_coeff = quant_coeff[n];
 
-    level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits);
-    delta_u[n] = (int32_t)((abs_level * curr_quant_coeff - (level << q_bits)) >> q_bits8);
+      level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits);
+      delta_u[n] = (int32_t)((abs_level * curr_quant_coeff - (level << q_bits)) >> q_bits8);
+    }
+  }
+  else {
+    const int max_number_of_coeffs = ((width == 4 && height == 4) || (width == 8 && height == 8)) ? 8 : 16;
+    for (int32_t n = 0; n < max_number_of_coeffs; n++) {
+      const uint32_t idx = scan[n];
+      int32_t level = coef[idx];
+      int64_t abs_level = (int64_t)abs(level);
+      int32_t curr_quant_coeff = quant_coeff[idx];
+
+      level = (abs_level * curr_quant_coeff + add) >> q_bits;
+      delta_u[idx] = (int32_t)((abs_level * curr_quant_coeff - (level << q_bits)) >> q_bits8);
+    }
   }
 
   if (ac_sum >= 2) {
@@ -277,7 +321,7 @@ int uvg_quant_cbcr_residual_generic(
   }
   else {
     uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
-      scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
+      scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx);
   }
 
   int8_t has_coeffs = 0;
@@ -455,7 +499,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
   } else {
   
     uvg_quant(state, coeff, coeff_out, width, width, color,
-      scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
+      scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y, cur_cu->lfnst_idx);
   }
 
   // Check if there are any non-zero coefficients.
diff --git a/src/strategies/generic/quant-generic.h b/src/strategies/generic/quant-generic.h
index 41b5ccdb..e4dad821 100644
--- a/src/strategies/generic/quant-generic.h
+++ b/src/strategies/generic/quant-generic.h
@@ -47,8 +47,17 @@
 #define QUANT_SHIFT 14
 
 int uvg_strategy_register_quant_generic(void* opaque, uint8_t bitdepth);
-void uvg_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
-  int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip);
+void uvg_quant_generic(
+  const encoder_state_t * const state,
+  coeff_t *coef,
+  coeff_t *q_coef,
+  int32_t width,
+  int32_t height,
+  color_t color,
+  int8_t scan_idx,
+  int8_t block_type,
+  int8_t transform_skip,
+  uint8_t lfnst_idx);
 
 int uvg_quantize_residual_generic(encoder_state_t *const state,
   const cu_info_t *const cur_cu, const int width, const color_t color,
diff --git a/src/strategies/strategies-quant.h b/src/strategies/strategies-quant.h
index f64b1eac..8a60dca0 100644
--- a/src/strategies/strategies-quant.h
+++ b/src/strategies/strategies-quant.h
@@ -46,7 +46,7 @@
 
 // Declare function pointers.
 typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
-  int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip);
+  int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip, uint8_t lfnst_idx);
 typedef unsigned (quant_cbcr_func)(
   encoder_state_t* const state,
   const cu_info_t* const cur_cu,
diff --git a/src/transform.c b/src/transform.c
index c4fbbf3f..4b069d4f 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -246,7 +246,7 @@ void uvg_transform2d(const encoder_control_t * const encoder,
                      color_t color,
                      const cu_info_t *tu)
 {
-  if (encoder->cfg.mts)
+  if (encoder->cfg.mts || tu->lfnst_idx)
   {
     uvg_mts_dct(encoder->bitdepth, color, tu, block_size, block, coeff, encoder->cfg.mts);
   }
@@ -412,7 +412,8 @@ static void quantize_chroma(
   coeff_t v_quant_coeff[1024],
   const coeff_scan_order_t scan_order,
   bool* u_has_coeffs,
-  bool* v_has_coeffs)
+  bool* v_has_coeffs,
+  uint8_t lfnst_idx)
 {
   if (state->encoder_control->cfg.rdoq_enable &&
     (transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip))
@@ -442,11 +443,11 @@ static void quantize_chroma(
   }
   else {
     uvg_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
-      scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
+      scan_order, CU_INTRA, transforms[i] == CHROMA_TS, lfnst_idx);
 
     if (!IS_JCCR_MODE(transforms[i])) {
       uvg_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
-        scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
+        scan_order, CU_INTRA, transforms[i] == CHROMA_TS, lfnst_idx);
     }
   }
 
@@ -550,7 +551,9 @@ void uvg_chroma_transform_search(
       v_quant_coeff,
       scan_order,
       &u_has_coeffs,
-      &v_has_coeffs);
+      &v_has_coeffs,
+      pred_cu->lfnst_idx);
+    /*
     if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS) {
       bool constraints[2] = { false, false };
       uvg_derive_lfnst_constraints(pred_cu, depth, constraints, &u_coeff[i * trans_offset], width, height);
@@ -558,7 +561,7 @@ void uvg_chroma_transform_search(
         uvg_derive_lfnst_constraints(pred_cu, depth, constraints, &v_coeff[i * trans_offset], width, height);        
       }
       if ((constraints[0] || !constraints[1]) && pred_cu->lfnst_idx != 0) continue;
-    }
+    }*/
 
     if (IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue;
 
diff --git a/tests/mts_tests.c b/tests/mts_tests.c
index 4244069e..4fe7dad9 100644
--- a/tests/mts_tests.c
+++ b/tests/mts_tests.c
@@ -157,6 +157,7 @@ TEST dct(void)
       cu_info_t tu;
       tu.type = CU_INTRA;
       tu.tr_idx = MTS_DST7_DST7 + trafo;
+      tu.lfnst_idx = 0;
 
       int16_t* buf = dct_bufs[trafo * NUM_SIZES + blocksize];
       ALIGNED(32) int16_t test_result[LCU_WIDTH * LCU_WIDTH] = { 0 };