Implemented transform skipping (for 4x4 blocks)

transform skip vs. normal transform selection criteria might need more work, currently both are calculated for each 4x4 block and SAD+coeff_SSE is compared.
2024-11-27 19:24:06 +00:00 · 2014-04-02 10:54:03 +03:00 · 2014-04-02 10:54:03 +03:00 · cfb21c0e4c
parent b9f8950cf7
commit cfb21c0e4c
7 changed files with 109 additions and 8 deletions
--- a/src/context.c
+++ b/src/context.c
@ -166,6 +166,13 @@ const uint8_t INIT_ABS_FLAG[3][6] =
  { 138,153,136,167,152,152},
 };

+static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2] =
+{
+  { 139,  139},
+  { 139,  139},
+  { 139,  139},
+};
+

 // CONTEXTS
 cabac_ctx g_sao_merge_flag_model;
@ -196,6 +203,8 @@ cabac_ctx g_cu_mvd_model[2];
 cabac_ctx g_cu_ref_pic_model[2];
 cabac_ctx g_mvp_idx_model[2];
 cabac_ctx g_cu_qt_root_cbf_model;
+cabac_ctx g_transform_skip_model_luma;
+cabac_ctx g_transform_skip_model_chroma;



@ -227,6 +236,9 @@ void init_contexts(encoder_control *encoder, int8_t slice)
  uint16_t i;

  // Initialize contexts
+  ctx_init(&g_transform_skip_model_luma, encoder->QP, INIT_TRANSFORMSKIP_FLAG[slice][0]);
+  ctx_init(&g_transform_skip_model_chroma, encoder->QP, INIT_TRANSFORMSKIP_FLAG[slice][1]);
+
  ctx_init(&g_sao_merge_flag_model, encoder->QP, INIT_SAO_MERGE_FLAG[slice]);
  ctx_init(&g_sao_type_idx_model, encoder->QP, INIT_SAO_TYPE_IDX[slice]);

--- a/src/context.h
+++ b/src/context.h
@ -77,6 +77,8 @@ extern cabac_ctx g_cu_mvd_model[2];
 extern cabac_ctx g_cu_ref_pic_model[2];
 extern cabac_ctx g_mvp_idx_model[2];
 extern cabac_ctx g_cu_qt_root_cbf_model;
+extern cabac_ctx g_transform_skip_model_luma;
+extern cabac_ctx g_transform_skip_model_chroma;
 #define CNU 154

 #endif
--- a/src/encoder.c
+++ b/src/encoder.c
@ -751,7 +751,7 @@ void encode_pic_parameter_set(encoder_control* encoder)
  WRITE_UE(encoder->stream, 0, "num_ref_idx_l1_default_active_minus1");
  WRITE_SE(encoder->stream, ((int8_t)encoder->QP)-26, "pic_init_qp_minus26");
  WRITE_U(encoder->stream, 0, 1, "constrained_intra_pred_flag");
-  WRITE_U(encoder->stream, 0, 1, "transform_skip_enabled_flag");
+  WRITE_U(encoder->stream, 1, 1, "transform_skip_enabled_flag");
  WRITE_U(encoder->stream, 0, 1, "cu_qp_delta_enabled_flag");
  //if cu_qp_delta_enabled_flag
  //WRITE_UE(encoder->stream, 0, "diff_cu_qp_delta_depth");
@ -1927,7 +1927,43 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
    #endif

    // Transform and quant residual to coeffs
-    transform2d(block,pre_quant_coeff,width,0);
+    if(width == 4) {
+      int i;
+      coefficient temp_block[16];
+      coefficient temp_coeff[16];
+      coefficient temp_block2[16];
+      coefficient temp_coeff2[16];
+      uint32_t cost = 0,cost2 = 0;
+
+      // Test for transform skip
+      transformskip(block,pre_quant_coeff,width);
+      quant(encoder, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type);
+      dequant(encoder, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type);
+      itransformskip(temp_block,pre_quant_coeff,width);
+
+      transform2d(block,pre_quant_coeff,width,0);
+      quant(encoder, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type);
+      dequant(encoder, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type);
+      itransform2d(temp_block2,pre_quant_coeff,width,0);
+
+
+      // SAD between reconstruction and original + sum of coeffs
+      for (i = 0; i < 16; i++) {
+        cost += abs((int)temp_block[i] - (int)block[i]);
+        cost += temp_coeff[i]*temp_coeff[i];
+
+        cost2 += abs((int)temp_block2[i] - (int)block[i]);
+        cost2 += temp_coeff2[i]*temp_coeff2[i];
+      }
+
+      cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2);
+    }
+
+    if(width == 4 && cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip) {
+      transformskip(block,pre_quant_coeff,width);
+    } else {
+      transform2d(block,pre_quant_coeff,width,0);
+    }

    if (encoder->rdoq_enable) {
      rdoq(encoder, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0,
@ -1972,7 +2008,11 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
      }

      dequant(encoder, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type);
-      itransform2d(block,pre_quant_coeff,width,0);
+      if(width == 4 && cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip) {
+        itransformskip(block,pre_quant_coeff,width);
+      } else {
+        itransform2d(block,pre_quant_coeff,width,0);
+      }

      i = 0;

@ -2135,7 +2175,7 @@ static void encode_transform_unit(encoder_control *encoder,
      }
    }

-    encode_coeff_nxn(encoder, coeff_y, width, 0, scan_idx);
+    encode_coeff_nxn(encoder, coeff_y, width, 0, scan_idx, cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip);
  }

  if (depth == MAX_DEPTH + 1 && !(x_pu % 2 && y_pu % 2)) {
@ -2191,11 +2231,11 @@ static void encode_transform_unit(encoder_control *encoder,
    }

    if (cur_cu->coeff_top_u[depth]) {
-      encode_coeff_nxn(encoder, coeff_u, width_c, 2, scan_idx);
+      encode_coeff_nxn(encoder, coeff_u, width_c, 2, scan_idx, 0);
    }

    if (cur_cu->coeff_top_v[depth]) {
-      encode_coeff_nxn(encoder, coeff_v, width_c, 2, scan_idx);
+      encode_coeff_nxn(encoder, coeff_v, width_c, 2, scan_idx, 0);
    }
  }
 }
@ -2290,7 +2330,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_pu,int32_t y_pu,
 }

 void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t width,
-                      uint8_t type, int8_t scan_mode)
+                      uint8_t type, int8_t scan_mode, int8_t tr_skip)
 {
  int c1 = 1;
  uint8_t last_coeff_x = 0;
@ -2321,6 +2361,12 @@ void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t widt
                                 &g_cu_sig_model_chroma[0];
  memset(sig_coeffgroup_flag,0,sizeof(uint32_t)*64);

+  // transform skip flag
+  if(width == 4) {
+    cabac.ctx = (type == 0) ? &g_transform_skip_model_luma : &g_transform_skip_model_chroma;
+    CABAC_BIN(&cabac, tr_skip, "transform_skip_flag");
+  }
+
  // Count non-zero coeffs
  for (i = 0; i < width * width; i++) {
    if (coeff[i] != 0) {
--- a/src/encoder.h
+++ b/src/encoder.h
@ -118,7 +118,7 @@ void encode_last_significant_xy(encoder_control *encoder, uint8_t lastpos_x,
                                uint8_t lastpos_y, uint8_t width, uint8_t height,
                                uint8_t type, uint8_t scan);
 void encode_coeff_nxn(encoder_control *encoder, int16_t *coeff, uint8_t width,
-                      uint8_t type, int8_t scan_mode);
+                      uint8_t type, int8_t scan_mode, int8_t tr_skip);
 void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8_t depth, lcu_t *lcu);
 void encode_transform_coeff(encoder_control *encoder, int32_t x_cu, int32_t y_cu,
                            int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v);
--- a/src/picture.h
+++ b/src/picture.h
@ -56,6 +56,7 @@ typedef struct
  uint32_t bitcost;
  int8_t mode;
  int8_t mode_chroma;
+  int8_t tr_skip;    //!< \brief transform skip flag
 } cu_info_intra;

 /**
--- a/src/transform.c
+++ b/src/transform.c
@ -729,6 +729,43 @@ static void partial_butterfly_inverse_32(int16_t *src, int16_t *dst,
  }
 }

+/**
+ * \brief NxN inverse transform (2D)
+ * \param coeff input data (transform coefficients)
+ * \param block output data (residual)
+ * \param block_size input data (width of transform)
+ */
+void transformskip(int16_t *block,int16_t *coeff, int8_t block_size)
+{
+  uint32_t log2_tr_size =  g_convert_to_bit[block_size] + 2;
+  int32_t  shift = MAX_TR_DYNAMIC_RANGE - g_bitdepth - log2_tr_size;
+  int32_t  j,k;
+  for (j = 0; j < block_size; j++) {
+    for(k = 0; k < block_size; k ++) {
+      coeff[j * block_size + k] = block[j * block_size + k] << shift;
+    }
+  }
+}
+
+/**
+ * \brief inverse transform skip
+ * \param coeff input data (transform coefficients)
+ * \param block output data (residual)
+ * \param block_size width of transform
+ */
+void itransformskip(int16_t *block,int16_t *coeff, int8_t block_size)
+{
+  uint32_t log2_tr_size =  g_convert_to_bit[block_size] + 2;
+  int32_t  shift = MAX_TR_DYNAMIC_RANGE - g_bitdepth - log2_tr_size;
+  int32_t  j,k;
+  int32_t offset;
+  offset = (1 << (shift -1)); // For rounding
+  for ( j = 0; j < block_size; j++ ) {
+    for(k = 0; k < block_size; k ++) {
+      block[j * block_size + k] =  (coeff[j * block_size + k] + offset) >> shift;
+    }
+  }
+}

 /**
 * \brief forward transform (2D)
--- a/src/transform.h
+++ b/src/transform.h
@ -54,6 +54,9 @@ void quant(encoder_control *encoder, int16_t *coef, int16_t *q_coef, int32_t wid
           int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type);
 void dequant(encoder_control *encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);

+void transformskip(int16_t *block,int16_t *coeff, int8_t block_size);
+void itransformskip(int16_t *block,int16_t *coeff, int8_t block_size);
+
 void transform2d(int16_t *block,int16_t *coeff, int8_t block_size, int32_t mode);
 void itransform2d(int16_t *block,int16_t *coeff, int8_t block_size, int32_t mode);