[lfnst] Add early termination to mts/lfnst search if MTS_SKIP is selected. Resolve TODOs.

2024-11-23 18:14:06 +00:00 · 2022-05-25 13:19:32 +03:00 · 2022-05-25 13:19:32 +03:00 · cf4343989f
parent e056e87ca0
commit cf4343989f
3 changed files with 156 additions and 142 deletions
--- a/src/encode_coding_tree.c
+++ b/src/encode_coding_tree.c
@ -101,7 +101,7 @@ static void encode_mts_idx(encoder_state_t * const state,
  }
 }

-// TODO: move these defines to a proper place when ISP is implemented
+// ISP_TODO: move these defines to a proper place when ISP is implemented
 // As of now, these are only needed in lfnst checks
 #define NOT_INTRA_SUBPARTITIONS 0
 #define HOR_INTRA_SUBPARTITIONS 1
@ -111,12 +111,9 @@ static void encode_mts_idx(encoder_state_t * const state,
 #define TU_1D_HOR_SPLIT 8
 #define TU_1D_VER_SPLIT 9

-// TODO: check if these are defined somewhere else
 #define MIN_TB_SIZE_X 4
 #define MIN_TB_SIZE_Y 4

-#define MAX_TB_SIZE 32
-
 static int get_isp_split_dim(const int width, const int height, const int isp_split_type)
 {
  bool divide_tu_in_rows = isp_split_type == TU_1D_HOR_SPLIT;
@ -164,20 +161,21 @@ static bool is_lfnst_allowed(encoder_state_t* const state, const cu_info_t* cons
                             const int width, const int height) 
 {
  if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA) {
-    const int isp_mode = 0; // LFNST_TODO: assign proper ISP mode when ISP is implemented
+    const int isp_mode = 0; // ISP_TODO: assign proper ISP mode when ISP is implemented
    const int isp_split_type = 0;
    const int chroma_width = width >> 1;
    const int chroma_height = height >> 1;
    const int cu_width = color == COLOR_Y ? width : chroma_width;
    const int cu_height = color == COLOR_Y ? height : chroma_height;
    bool can_use_lfnst_with_mip = (width >= 16 && height >= 16);
-    bool is_sep_tree = false; // LFNST_TODO: if/when separate tree structure is implemented, add proper boolean here
+    const int depth = pred_cu->depth;
+    bool is_sep_tree = depth == 4; // TODO: if/when separate tree structure is implemented, add proper boolean here
    bool mip_flag = pred_cu->type == CU_INTRA ? pred_cu->intra.mip_flag : false;

    if ((isp_mode && !can_use_lfnst_with_isp(width, height, isp_split_type, color)) ||
      (pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip) || 
      (is_sep_tree && color != COLOR_Y && MIN(chroma_width, chroma_height) < 4) || 
-      (cu_width > MAX_TB_SIZE || cu_height > MAX_TB_SIZE)) {
+      (cu_width > TR_MAX_WIDTH || cu_height > TR_MAX_WIDTH)) {
      return false;
    }

@ -196,7 +194,7 @@ static bool encode_lfnst_idx(encoder_state_t * const state, cabac_data_t * const
  if (is_lfnst_allowed(state, pred_cu, color, width, height)) {
    // Getting separate tree bool from block size is a temporary fix until a proper dual tree check is possible (there is no dual tree structure at time of writing this).
    // VTM seems to force explicit dual tree structure for small 4x4 blocks
-    bool is_separate_tree = depth == 4; // LFNST_TODO: if/when separate/dual tree structure is implemented, get proper value for this
+    bool is_separate_tree = depth == 4; // TODO: if/when separate/dual tree structure is implemented, get proper value for this
    bool luma_flag = is_separate_tree ? (color == COLOR_Y ? true: false) : true;
    bool chroma_flag = is_separate_tree ? (color != COLOR_Y ? true : false) : true;
    bool non_zero_coeff_non_ts_corner_8x8 = (luma_flag && pred_cu->violates_lfnst_constrained[0]) || (chroma_flag && pred_cu->violates_lfnst_constrained[1]);
@ -211,7 +209,7 @@ static bool encode_lfnst_idx(encoder_state_t * const state, cabac_data_t * const
    const int tu_row_length = 1 << (tr_depth - depth);
    const int tu_width = cu_width >> (tr_depth - depth);
    const int tu_height = tu_width; // TODO: height for non-square blocks
-    const int isp_mode = 0; // LFNST_TODO:get isp_mode from cu when ISP is implemented
+    const int isp_mode = 0; // ISP_TODO:get isp_mode from cu when ISP is implemented

    // TODO: chroma transform skip
    if (color == COLOR_Y) {
--- a/src/search_intra.c
+++ b/src/search_intra.c
@ -383,10 +383,8 @@ static double search_intra_trdepth(
    pred_cu->intra.mode_chroma = -1;
    pred_cu->joint_cb_cr = 4;

-    const int max_tb_size = 32; // LFNST_TODO: use define instead for max transform block size
+    const int max_tb_size = TR_MAX_WIDTH;
    // LFNST search params
-    // bool is_separate_tree = (width == 4 && height == 4) ? true : false; // LFNST_TODO: if/when separate/dual tree structure is implemented, get proper value for this
-    // const int max_lfnst_idx = (is_separate_tree /*&& color != COLOR_Y*/ && (width < 8 || height < 8)) || (width > max_tb_size || height > max_tb_size) ? 0 : 2;
    const int max_lfnst_idx = width > max_tb_size || height > max_tb_size ? 0 : 2;

    int start_idx = 0;
@ -449,8 +447,9 @@ static double search_intra_trdepth(
          best_rd_cost = rd_cost;
          best_lfnst_idx = pred_cu->lfnst_idx;
          best_tr_idx = pred_cu->tr_idx;
+          if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option
        }
-      }
+      } // end mts index loop (tr_idx)
      if (reconstruct_chroma) {
        int8_t luma_mode = pred_cu->intra.mode;
        pred_cu->intra.mode = -1;
@ -481,7 +480,8 @@ static double search_intra_trdepth(
          }
        }
      }
-    }
+      if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option
+    } // end lfnst_index loop
    
    pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
    pred_cu->tr_idx = best_tr_idx;
--- a/src/transform.c
+++ b/src/transform.c
@ -253,8 +253,22 @@ void kvz_fwd_lfnst_NxN(coeff_t *src, coeff_t *dst, const int8_t mode, const int8
    tr_mat += tr_size;
  }

-  // LFNST_TODO: implement fill. Use fill macros present in Kvazaar. Use FILL with switch case if there are only few possible fill cases, it's faster
-  FILL_ARRAY(out, 0, tr_size - zero_out_size);
+  // Possible tr_size values 16, 48. Possible zero_out_size values 8, 16
+  switch (tr_size - zero_out_size) {
+    case 0:
+      break;
+    case 8:
+      FILL_ARRAY(out, 0, 8);
+      break;
+    case 32:
+      FILL_ARRAY(out, 0, 32);
+      break;
+    case 40:
+      FILL_ARRAY(out, 0, 40);
+      break;
+    default:
+      assert(false && "LFNST: This should never trip.");
+  }
 }

 static inline bool get_transpose_flag(const int8_t intra_mode)
@ -272,21 +286,23 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu,
  const uint16_t lfnst_index = lfnst_idx;
  int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
  bool mts_skip = cur_cu->tr_skip;
-  //                                                                     this should probably never trigger
-  bool is_separate_tree = color == COLOR_Y ? width == 4 && height == 4 : width == 2 && height == 2; // LFNST_TODO: proper dual tree check when that structure is implemented
+  const int depth = cur_cu->depth;
+  bool is_separate_tree = depth == 4; // TODO: proper dual tree check when that structure is implemented
  bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83]

  bool is_mip = cur_cu->type == CU_INTRA ? cur_cu->intra.mip_flag : false;
  bool is_wide_angle = false; // TODO: get wide angle mode when implemented

-  // LFNST_TODO: use kvz_get_scan_order to get scan mode instead of using SCAN_DIAG define.
+  const int cu_type = cur_cu->type;
+
+  const int scan_order = kvz_get_scan_order(cu_type, intra_mode, depth);

  if (lfnst_index && !mts_skip && (is_separate_tree || color == COLOR_Y))
  {
    const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
    assert(log2_block_size != -1 && "LFNST: invalid block width.");
    const bool whge3 = width >= 8 && height >= 8;
-    const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[SCAN_DIAG][log2_block_size - 1];
+    const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[scan_order][log2_block_size - 1];

    if (is_cclm_mode) {
      intra_mode = cur_cu->intra.mode;
@ -295,76 +311,75 @@ void kvz_fwd_lfnst(const cu_info_t* const cur_cu,
      intra_mode = 0; // Set to planar mode
    }
    assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
+    assert(lfnst_index < 3 && lfnst_index >= 0 && "LFNST: Invalid LFNST index. Must be in [0, 2]");

-    if (lfnst_index < 3) {
-      if (is_wide_angle) {
-        // Transform wide angle mode to intra mode
-        intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing.
-      }
+    if (is_wide_angle) {
+      // Transform wide angle mode to intra mode
+      intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing.
+    }

-      bool transpose = get_transpose_flag(intra_mode);
-      const int sb_size = whge3 ? 8 : 4;
-      bool tu_4x4 = (width == 4 && height == 4);
-      bool tu_8x8 = (width == 8 && height == 8);
+    bool transpose = get_transpose_flag(intra_mode);
+    const int sb_size = whge3 ? 8 : 4;
+    bool tu_4x4 = (width == 4 && height == 4);
+    bool tu_8x8 = (width == 8 && height == 8);
 
-      coeff_t tmp_in_matrix[48];
-      coeff_t tmp_out_matrix[48];
-      coeff_t *lfnst_tmp = tmp_in_matrix; // forward low frequency non-separable transform
+    coeff_t tmp_in_matrix[48];
+    coeff_t tmp_out_matrix[48];
+    coeff_t *lfnst_tmp = tmp_in_matrix; // forward low frequency non-separable transform
      
-      coeff_t *coeff_tmp = coeffs;
+    coeff_t *coeff_tmp = coeffs;

-      int y;
-      if (transpose) {
-        if (sb_size == 4) {
-          for (y = 0; y < 4; y++) {
-            lfnst_tmp[0] = coeff_tmp[0];
-            lfnst_tmp[4] = coeff_tmp[1];
-            lfnst_tmp[8] = coeff_tmp[2];
-            lfnst_tmp[12] = coeff_tmp[3];
-            lfnst_tmp++;
-            coeff_tmp += width;
-          }
-        }
-        else { // ( sb_size == 8 )
-          for (y = 0; y < 8; y++) {
-            lfnst_tmp[0] = coeff_tmp[0];
-            lfnst_tmp[8] = coeff_tmp[1];
-            lfnst_tmp[16] = coeff_tmp[2];
-            lfnst_tmp[24] = coeff_tmp[3];
-            if (y < 4) {
-              lfnst_tmp[32] = coeff_tmp[4];
-              lfnst_tmp[36] = coeff_tmp[5];
-              lfnst_tmp[40] = coeff_tmp[6];
-              lfnst_tmp[44] = coeff_tmp[7];
-            }
-            lfnst_tmp++;
-            coeff_tmp += width;
-          }
-        }
-      }
-      else {
-        for (y = 0; y < sb_size; y++) {
-          uint32_t stride = (y < 4) ? sb_size : 4;
-          memcpy(lfnst_tmp, coeff_tmp, stride * sizeof(coeff_t));
-          lfnst_tmp += stride;
+    int y;
+    if (transpose) {
+      if (sb_size == 4) {
+        for (y = 0; y < 4; y++) {
+          lfnst_tmp[0] = coeff_tmp[0];
+          lfnst_tmp[4] = coeff_tmp[1];
+          lfnst_tmp[8] = coeff_tmp[2];
+          lfnst_tmp[12] = coeff_tmp[3];
+          lfnst_tmp++;
          coeff_tmp += width;
        }
      }
-
-      kvz_fwd_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size,
-        (tu_4x4 || tu_8x8) ? 8 : 16);
-
-      lfnst_tmp = tmp_out_matrix;   // forward spectral rearrangement
-      coeff_tmp = coeffs;
-      int lfnst_coeff_num = (sb_size == 4) ? sb_size * sb_size : 48;
-
-      const uint32_t *scan_ptr = scan;
-
-      for (y = 0; y < lfnst_coeff_num; y++) {
-        coeff_tmp[*scan_ptr] = *lfnst_tmp++;
-        scan_ptr++;
+      else { // ( sb_size == 8 )
+        for (y = 0; y < 8; y++) {
+          lfnst_tmp[0] = coeff_tmp[0];
+          lfnst_tmp[8] = coeff_tmp[1];
+          lfnst_tmp[16] = coeff_tmp[2];
+          lfnst_tmp[24] = coeff_tmp[3];
+          if (y < 4) {
+            lfnst_tmp[32] = coeff_tmp[4];
+            lfnst_tmp[36] = coeff_tmp[5];
+            lfnst_tmp[40] = coeff_tmp[6];
+            lfnst_tmp[44] = coeff_tmp[7];
+          }
+          lfnst_tmp++;
+          coeff_tmp += width;
+        }
      }
    }
+    else {
+      for (y = 0; y < sb_size; y++) {
+        uint32_t stride = (y < 4) ? sb_size : 4;
+        memcpy(lfnst_tmp, coeff_tmp, stride * sizeof(coeff_t));
+        lfnst_tmp += stride;
+        coeff_tmp += width;
+      }
+    }
+
+    kvz_fwd_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size,
+      (tu_4x4 || tu_8x8) ? 8 : 16);
+
+    lfnst_tmp = tmp_out_matrix;   // forward spectral rearrangement
+    coeff_tmp = coeffs;
+    int lfnst_coeff_num = (sb_size == 4) ? sb_size * sb_size : 48;
+
+    const uint32_t *scan_ptr = scan;
+
+    for (y = 0; y < lfnst_coeff_num; y++) {
+      coeff_tmp[*scan_ptr] = *lfnst_tmp++;
+      scan_ptr++;
+    }
  }
 }

@ -405,19 +420,21 @@ void kvz_inv_lfnst(const cu_info_t *cur_cu,
  const uint32_t  lfnst_index = lfnst_idx;
  int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
  bool mts_skip = cur_cu->tr_skip;
-  //                                                                     this should probably never trigger
-  bool is_separate_tree = color == COLOR_Y ? width == 4 && height == 4 : width == 2 && height == 2; // LFNST_TODO: proper dual tree check when that structure is implemented
+  const int depth = cur_cu->depth;
+  bool is_separate_tree = depth == 4; // TODO: proper dual tree check when that structure is implemented
  bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83]

  bool is_mip = cur_cu->type == CU_INTRA ? cur_cu->intra.mip_flag : false;
  bool is_wide_angle = false; // TODO: get wide angle mode when implemented

-  // LFNST_TODO: use kvz_get_scan_order to get scan mode instead of using SCAN_DIAG define.
+  const int cu_type = cur_cu->type;

+  const int scan_order = kvz_get_scan_order(cu_type, intra_mode, depth);
+  
  if (lfnst_index && !mts_skip && (is_separate_tree || color == COLOR_Y)) {
    const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
    const bool whge3 = width >= 8 && height >= 8;
-    const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[SCAN_DIAG][log2_block_size - 1];
+    const uint32_t* scan = whge3 ? kvz_coef_top_left_diag_scan_8x8[log2_block_size] : kvz_g_sig_last_scan[scan_order][log2_block_size - 1];
    
    if (is_cclm_mode) {
      intra_mode = cur_cu->intra.mode;
@ -426,72 +443,71 @@ void kvz_inv_lfnst(const cu_info_t *cur_cu,
      intra_mode = 0; // Set to planar mode
    }
    assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
+    assert(lfnst_index < 3 && lfnst_index >= 0 && "LFNST: Invalid LFNST index. Must be in [0, 2]");

-    if (lfnst_index < 3) {
-      if (is_wide_angle) {
-        // Transform wide angle mode to intra mode
-        intra_mode = intra_mode; // LFNST_TODO: wide angle modes not implemented yet. Do nothing.
-      }
+    if (is_wide_angle) {
+      // Transform wide angle mode to intra mode
+      intra_mode = intra_mode; // TODO: wide angle modes not implemented yet. Do nothing.
+    }

-      bool          transpose_flag = get_transpose_flag(intra_mode);
-      const int     sb_size = whge3 ? 8 : 4;
-      bool          tu_4x4_flag = (width == 4 && height == 4);
-      bool          tu_8x8_flag = (width == 8 && height == 8);
-      coeff_t tmp_in_matrix[48];
-      coeff_t tmp_out_matrix[48];
-      coeff_t *lfnst_tmp;
-      coeff_t *coeff_tmp;
-      int           y;
-      lfnst_tmp = tmp_in_matrix;   // inverse spectral rearrangement
-      coeff_tmp = coeffs;
-      coeff_t *dst = lfnst_tmp;
+    bool          transpose_flag = get_transpose_flag(intra_mode);
+    const int     sb_size = whge3 ? 8 : 4;
+    bool          tu_4x4_flag = (width == 4 && height == 4);
+    bool          tu_8x8_flag = (width == 8 && height == 8);
+    coeff_t tmp_in_matrix[48];
+    coeff_t tmp_out_matrix[48];
+    coeff_t *lfnst_tmp;
+    coeff_t *coeff_tmp;
+    int           y;
+    lfnst_tmp = tmp_in_matrix;   // inverse spectral rearrangement
+    coeff_tmp = coeffs;
+    coeff_t *dst = lfnst_tmp;

-      const uint32_t *scan_ptr = scan;
-      for (y = 0; y < 16; y++) {
-        *dst++ = coeff_tmp[*scan_ptr];
-        scan_ptr++;
-      }
+    const uint32_t *scan_ptr = scan;
+    for (y = 0; y < 16; y++) {
+      *dst++ = coeff_tmp[*scan_ptr];
+      scan_ptr++;
+    }

-      kvz_inv_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size,
-        (tu_4x4_flag || tu_8x8_flag) ? 8 : 16, max_log2_dyn_range);
-      lfnst_tmp = tmp_out_matrix;   // inverse low frequency non-separale transform
+    kvz_inv_lfnst_NxN(tmp_in_matrix, tmp_out_matrix, kvz_lfnst_lut[intra_mode], lfnst_index - 1, sb_size,
+      (tu_4x4_flag || tu_8x8_flag) ? 8 : 16, max_log2_dyn_range);
+    lfnst_tmp = tmp_out_matrix;   // inverse low frequency non-separale transform

-      if (transpose_flag) {
-        if (sb_size == 4) {
-          for (y = 0; y < 4; y++) {
-            coeff_tmp[0] = lfnst_tmp[0];
-            coeff_tmp[1] = lfnst_tmp[4];
-            coeff_tmp[2] = lfnst_tmp[8];
-            coeff_tmp[3] = lfnst_tmp[12];
-            lfnst_tmp++;
-            coeff_tmp += width;
-          }
-        }
-        else { // ( sb_size == 8 )
-          for (y = 0; y < 8; y++) {
-            coeff_tmp[0] = lfnst_tmp[0];
-            coeff_tmp[1] = lfnst_tmp[8];
-            coeff_tmp[2] = lfnst_tmp[16];
-            coeff_tmp[3] = lfnst_tmp[24];
-            if (y < 4) {
-              coeff_tmp[4] = lfnst_tmp[32];
-              coeff_tmp[5] = lfnst_tmp[36];
-              coeff_tmp[6] = lfnst_tmp[40];
-              coeff_tmp[7] = lfnst_tmp[44];
-            }
-            lfnst_tmp++;
-            coeff_tmp += width;
-          }
-        }
-      }
-      else {
-        for (y = 0; y < sb_size; y++) {
-          uint32_t uiStride = (y < 4) ? sb_size : 4;
-          memcpy(coeff_tmp, lfnst_tmp, uiStride * sizeof(coeff_t));
-          lfnst_tmp += uiStride;
+    if (transpose_flag) {
+      if (sb_size == 4) {
+        for (y = 0; y < 4; y++) {
+          coeff_tmp[0] = lfnst_tmp[0];
+          coeff_tmp[1] = lfnst_tmp[4];
+          coeff_tmp[2] = lfnst_tmp[8];
+          coeff_tmp[3] = lfnst_tmp[12];
+          lfnst_tmp++;
          coeff_tmp += width;
        }
      }
+      else { // ( sb_size == 8 )
+        for (y = 0; y < 8; y++) {
+          coeff_tmp[0] = lfnst_tmp[0];
+          coeff_tmp[1] = lfnst_tmp[8];
+          coeff_tmp[2] = lfnst_tmp[16];
+          coeff_tmp[3] = lfnst_tmp[24];
+          if (y < 4) {
+            coeff_tmp[4] = lfnst_tmp[32];
+            coeff_tmp[5] = lfnst_tmp[36];
+            coeff_tmp[6] = lfnst_tmp[40];
+            coeff_tmp[7] = lfnst_tmp[44];
+          }
+          lfnst_tmp++;
+          coeff_tmp += width;
+        }
+      }
+    }
+    else {
+      for (y = 0; y < sb_size; y++) {
+        uint32_t uiStride = (y < 4) ? sb_size : 4;
+        memcpy(coeff_tmp, lfnst_tmp, uiStride * sizeof(coeff_t));
+        lfnst_tmp += uiStride;
+        coeff_tmp += width;
+      }
    }
  }
 }