Improve cu_info coded block flag data structure a bit.

- It works just like the old structure except that the flags are checked with bitmasks instead of having the flag value be propagated upwards. There isn't really any benefit to this because the flags still have to be propagated to parent CUs. - Wrapped them inside a struct to make copying them easier. (Just need to copy the struct instead of making individual copies)
2024-11-24 02:24:07 +00:00 · 2014-05-06 14:43:12 +03:00 · 2014-05-06 14:43:12 +03:00 · bdc16d2612
parent d123b98aea
commit bdc16d2612
4 changed files with 78 additions and 78 deletions
--- a/src/encoder.c
+++ b/src/encoder.c
@ -1836,19 +1836,23 @@ void encode_coding_tree(encoder_state * const encoder_state,
        } // for ref_list
    } // if !merge

+    {
+      int cbf = (cbf_is_set(cur_cu->cbf.y, depth) ||
+                 cbf_is_set(cur_cu->cbf.u, depth) ||
+                 cbf_is_set(cur_cu->cbf.v, depth));

      // Only need to signal coded block flag if not skipped or merged
      // skip = no coded residual, merge = coded residual
      if (!cur_cu->merged) {
        cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model);
-      CABAC_BIN(cabac, cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth], "rqt_root_cbf");
+        CABAC_BIN(cabac, cbf, "rqt_root_cbf");
      }
      // Code (possible) coeffs to bitstream

-    if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) {
+      if (cbf) {
        encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
      }
-
+    }

    // END for each part
  } else if (cur_cu->type == CU_INTRA) {
@ -2114,21 +2118,20 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
    encode_transform_tree(encoder_state, x,          y + offset, depth+1, lcu);
    encode_transform_tree(encoder_state, x + offset, y + offset, depth+1, lcu);

-    // Derive coded coeff flags from the next depth
-    if (depth == MAX_DEPTH) {
-      cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4];
-      cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1];
-      cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1];
-    } else {
+    // Propagate coded block flags from child CUs to parent CU.
+    if (depth < MAX_DEPTH) {
      cu_info *cu_a =  &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) +  (y_local>>3)        *LCU_T_CU_WIDTH];
      cu_info *cu_b =  &lcu->cu[LCU_CU_OFFSET +  (x_local>>3)           + ((y_local+offset)>>3)*LCU_T_CU_WIDTH];
      cu_info *cu_c =  &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH];
-      cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cu_a->coeff_top_y[depth+1] | cu_b->coeff_top_y[depth+1]
-                                    | cu_c->coeff_top_y[depth+1];
-      cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1] | cu_a->coeff_top_u[depth+1] | cu_b->coeff_top_u[depth+1]
-                                    | cu_c->coeff_top_u[depth+1];
-      cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1] | cu_a->coeff_top_v[depth+1] | cu_b->coeff_top_v[depth+1]
-                                    | cu_c->coeff_top_v[depth+1];
+      if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) {
+        cbf_set(&cur_cu->cbf.y, depth);
+      }
+      if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) {
+        cbf_set(&cur_cu->cbf.u, depth);
+      }
+      if (cbf_is_set(cu_a->cbf.v, depth+1) || cbf_is_set(cu_b->cbf.v, depth+1) || cbf_is_set(cu_c->cbf.v, depth+1)) {
+        cbf_set(&cur_cu->cbf.v, depth);
+      }
    }

    return;
@ -2317,19 +2320,7 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
      if (coeff_y[i] != 0) {
        // Found one, we can break here
        cbf_y = 1;
-        if (depth <= MAX_DEPTH) {
-          int d;
-          for (d = 0; d <= depth; ++d) {
-            cur_cu->coeff_top_y[d] = 1;
-          }
-        } else {
-          int pu_index = (x_pu & 1) + 2 * (y_pu & 1);
-          int d;
-          cur_cu->coeff_top_y[depth + pu_index] = 1;
-          for (d = 0; d < depth; ++d) {
-            cur_cu->coeff_top_y[d] = 1;
-          }
-        }
+        cbf_set(&cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu));
        break;
      }
    }
@ -2392,26 +2383,19 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
      transform_chroma(encoder_state, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block);
      for (i = 0; i < chroma_size; i++) {
        if (coeff_u[i] != 0) {
-          int d;
-          for (d = 0; d <= depth; ++d) {
-            cur_cu->coeff_top_u[d] = 1;
-          }
+          cbf_set(&cur_cu->cbf.u, depth);
          break;
        }
      }
      transform_chroma(encoder_state, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block);
      for (i = 0; i < chroma_size; i++) {
        if (coeff_v[i] != 0) {
-          int d;
-          for (d = 0; d <= depth; ++d) {
-            cur_cu->coeff_top_v[d] = 1;
-          }
+          cbf_set(&cur_cu->cbf.v, depth);
          break;
        }
      }

-      // Save coefficients to cu.
-      if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
+      if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) {
        i = 0;
        for (y = 0; y < width_c; y++) {
          for (x = 0; x < width_c; x++) {
@ -2423,11 +2407,11 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
      }

      reconstruct_chroma(encoder_state, cur_cu, chroma_depth,
-                         cur_cu->coeff_top_u[depth],
+                         cbf_is_set(cur_cu->cbf.u, depth),
                         coeff_u, recbase_u, pred_u, color_type_u,
                         pre_quant_coeff, block);
      reconstruct_chroma(encoder_state, cur_cu, chroma_depth,
-                         cur_cu->coeff_top_v[depth],
+                         cbf_is_set(cur_cu->cbf.v, depth),
                         coeff_v, recbase_v, pred_v, color_type_v,
                         pre_quant_coeff, block);
    }
@ -2458,13 +2442,7 @@ static void encode_transform_unit(encoder_state * const encoder_state,
  int8_t scan_idx = SCAN_DIAG;
  uint32_t dir_mode;

-  int cbf_y;
-  if (depth <= MAX_DEPTH) {
-    cbf_y = cur_cu->coeff_top_y[depth];
-  } else {
-    int pu_index = x_pu % 2 + 2 * (y_pu % 2);
-    cbf_y = cur_cu->coeff_top_y[depth + pu_index];
-  }
+  int cbf_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu));

  if (cbf_y) {
    int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH);
@ -2525,7 +2503,7 @@ static void encode_transform_unit(encoder_state * const encoder_state,
    return;
  }

-  if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
+  if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) {
    int x, y;
    coefficient *orig_pos_u, *orig_pos_v;

@ -2570,11 +2548,11 @@ static void encode_transform_unit(encoder_state * const encoder_state,
      }
    }

-    if (cur_cu->coeff_top_u[depth]) {
+    if (cbf_is_set(cur_cu->cbf.u, depth)) {
      encode_coeff_nxn(encoder_state, coeff_u, width_c, 2, scan_idx, 0);
    }

-    if (cur_cu->coeff_top_v[depth]) {
+    if (cbf_is_set(cur_cu->cbf.v, depth)) {
      encode_coeff_nxn(encoder_state, coeff_v, width_c, 2, scan_idx, 0);
    }
  }
@ -2608,15 +2586,9 @@ void encode_transform_coeff(encoder_state * const encoder_state, int32_t x_pu,in

  int8_t split = (cur_cu->tr_depth > depth);

-  int8_t cb_flag_u = cur_cu->coeff_top_u[depth];
-  int8_t cb_flag_v = cur_cu->coeff_top_v[depth];
-  int cb_flag_y;
-  if (depth <= MAX_DEPTH) {
-    cb_flag_y = cur_cu->coeff_top_y[depth];
-  } else {
-    int pu_index = x_pu % 2 + 2 * (y_pu % 2);
-    cb_flag_y = cur_cu->coeff_top_y[depth + pu_index];
-  }
+  const int cb_flag_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu));
+  const int cb_flag_u = cbf_is_set(cur_cu->cbf.u, depth);
+  const int cb_flag_v = cbf_is_set(cur_cu->cbf.v, depth);

  // The split_transform_flag is not signaled when:
  // - transform size is greater than 32 (depth == 0)
--- a/src/filter.c
+++ b/src/filter.c
@ -232,14 +232,14 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state,
                                                          * (cur_pic->width_in_lcu << MAX_DEPTH)];
        // Filter strength
        strength = 0;
-        // Intra blocks have strength 2
        if(cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
          strength = 2;
+        } else if(cbf_is_set(cu_q->cbf.y, cu_q->tr_depth) || cbf_is_set(cu_p->cbf.y, cu_p->tr_depth)) {
          // Non-zero residual/coeffs and transform boundary
-        } else if(cu_q->coeff_top_y[cu_q->tr_depth] || cu_p->coeff_top_y[cu_p->tr_depth]) {
+          // Neither CU is intra so tr_depth <= MAX_DEPTH.
          strength = 1;
-          // Absolute motion vector diff between blocks >= 1 (Integer pixel)
        } else if((abs(cu_q->inter.mv[0] - cu_p->inter.mv[0]) >= 4) || (abs(cu_q->inter.mv[1] - cu_p->inter.mv[1]) >= 4)) {
+          // Absolute motion vector diff between blocks >= 1 (Integer pixel)
          strength = 1;
        } else if(cu_q->inter.mv_ref != cu_p->inter.mv_ref) {
          strength = 1;
--- a/src/picture.h
+++ b/src/picture.h
@ -74,6 +74,13 @@ typedef struct
  int8_t mode;
 } cu_info_inter;

+typedef struct
+{
+  uint8_t y;
+  uint8_t u;
+  uint8_t v;
+} cu_cbf_t;
+
 /**
 * \brief Struct for CU info
 */
@ -88,10 +95,7 @@ typedef struct
  int8_t merged;     //!< \brief flag to indicate this block is merged
  int8_t merge_idx;  //!< \brief merge index

-  // MAX_DEPTH+4 for the 4 PUs at the last level.
-  int8_t coeff_top_y[MAX_DEPTH+5];  //!< \brief is there coded coeffs Y in top level
-  int8_t coeff_top_u[MAX_DEPTH+5];  //!< \brief is there coded coeffs U in top level
-  int8_t coeff_top_v[MAX_DEPTH+5];  //!< \brief is there coded coeffs V in top level
+  cu_cbf_t cbf;
  cu_info_intra intra[4];
  cu_info_inter inter;
 } cu_info;
@ -207,6 +211,27 @@ typedef struct {
 //////////////////////////////////////////////////////////////////////////
 // FUNCTIONS

+/**
+ * Check if CBF in a given level >= depth is true.
+ */
+static INLINE int cbf_is_set(uint8_t cbf_flags, int depth)
+{
+  // Transform data for 4x4 blocks is stored at depths 4-8 for luma, so masks
+  // for those levels don't include the other ones.
+  static const uint8_t masks[8] = { 0xff, 0x7f, 0x3f, 0x1f, 0x8, 0x4, 0x2, 0x1 };
+
+  return (cbf_flags & masks[depth]) != 0;
+}
+
+/**
+ * Set CBF in a level to true.
+ */
+static INLINE void cbf_set(uint8_t *cbf_flags, int depth)
+{
+  // Return value of the bit corresponding to the level.
+  *cbf_flags |= 1 << (7 - depth);
+}
+
 yuv_t * yuv_t_alloc(int luma_size);
 void yuv_t_free(yuv_t * yuv);

--- a/src/search.c
+++ b/src/search.c
@ -655,7 +655,7 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu
      cu_info *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH];
      if (cu != cu_from) {
        // Chroma coeff data is not used, luma is needed for deblocking
-        memcpy(cu->coeff_top_y, cu_from->coeff_top_y, 8);
+        cu->cbf.y = cu_from->cbf.y;
      }
    }
  }
@ -910,10 +910,13 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
      lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
      intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
    } else if (cur_cu->type == CU_INTER) {
+      int cbf;
      inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
      encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]);

-      if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
+      cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
+
+      if(cur_cu->merged && !cbf) {
        cur_cu->merged = 0;
        cur_cu->skipped = 1;
        // Selecting skip reduces bits needed to code the CU
@ -931,12 +934,12 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
  if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
    int half_cu = cu_width / 2;
    int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost);
+    int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);

    // If skip mode was selected for the block, skip further search.
    // Skip mode means there's no coefficients in the block, so splitting
    // might not give any better results but takes more time to do.
-    if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] ||
-       cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
+    if(cur_cu->type == CU_NOTSET || cbf) {
      split_cost += search_cu(encoder_state, x,           y,           depth + 1, work_tree);
      split_cost += search_cu(encoder_state, x + half_cu, y,           depth + 1, work_tree);
      split_cost += search_cu(encoder_state, x,           y + half_cu, depth + 1, work_tree);