diff --git a/src/encoder.c b/src/encoder.c
index 2b76abe1..05a459f9 100644
--- a/src/encoder.c
+++ b/src/encoder.c
@@ -1836,19 +1836,23 @@ void encode_coding_tree(encoder_state * const encoder_state,
         } // for ref_list
     } // if !merge
 
+    {
+      int cbf = (cbf_is_set(cur_cu->cbf.y, depth) ||
+                 cbf_is_set(cur_cu->cbf.u, depth) ||
+                 cbf_is_set(cur_cu->cbf.v, depth));
 
-    // Only need to signal coded block flag if not skipped or merged
-    // skip = no coded residual, merge = coded residual
-    if (!cur_cu->merged) {
-      cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model);
-      CABAC_BIN(cabac, cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth], "rqt_root_cbf");
+      // Only need to signal coded block flag if not skipped or merged
+      // skip = no coded residual, merge = coded residual
+      if (!cur_cu->merged) {
+        cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model);
+        CABAC_BIN(cabac, cbf, "rqt_root_cbf");
+      }
+      // Code (possible) coeffs to bitstream
+
+      if (cbf) {
+        encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
+      }
     }
-    // Code (possible) coeffs to bitstream
-
-    if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) {
-      encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
-    }
-
 
     // END for each part
   } else if (cur_cu->type == CU_INTRA) {
@@ -2114,21 +2118,20 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
     encode_transform_tree(encoder_state, x,          y + offset, depth+1, lcu);
     encode_transform_tree(encoder_state, x + offset, y + offset, depth+1, lcu);
 
-    // Derive coded coeff flags from the next depth
-    if (depth == MAX_DEPTH) {
-      cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4];
-      cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1];
-      cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1];
-    } else {
+    // Propagate coded block flags from child CUs to parent CU.
+    if (depth < MAX_DEPTH) {
       cu_info *cu_a =  &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) +  (y_local>>3)        *LCU_T_CU_WIDTH];
       cu_info *cu_b =  &lcu->cu[LCU_CU_OFFSET +  (x_local>>3)           + ((y_local+offset)>>3)*LCU_T_CU_WIDTH];
       cu_info *cu_c =  &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH];
-      cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cu_a->coeff_top_y[depth+1] | cu_b->coeff_top_y[depth+1]
-                                    | cu_c->coeff_top_y[depth+1];
-      cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1] | cu_a->coeff_top_u[depth+1] | cu_b->coeff_top_u[depth+1]
-                                    | cu_c->coeff_top_u[depth+1];
-      cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1] | cu_a->coeff_top_v[depth+1] | cu_b->coeff_top_v[depth+1]
-                                    | cu_c->coeff_top_v[depth+1];
+      if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) {
+        cbf_set(&cur_cu->cbf.y, depth);
+      }
+      if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) {
+        cbf_set(&cur_cu->cbf.u, depth);
+      }
+      if (cbf_is_set(cu_a->cbf.v, depth+1) || cbf_is_set(cu_b->cbf.v, depth+1) || cbf_is_set(cu_c->cbf.v, depth+1)) {
+        cbf_set(&cur_cu->cbf.v, depth);
+      }
     }
 
     return;
@@ -2317,19 +2320,7 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
       if (coeff_y[i] != 0) {
         // Found one, we can break here
         cbf_y = 1;
-        if (depth <= MAX_DEPTH) {
-          int d;
-          for (d = 0; d <= depth; ++d) {
-            cur_cu->coeff_top_y[d] = 1;
-          }
-        } else {
-          int pu_index = (x_pu & 1) + 2 * (y_pu & 1);
-          int d;
-          cur_cu->coeff_top_y[depth + pu_index] = 1;
-          for (d = 0; d < depth; ++d) {
-            cur_cu->coeff_top_y[d] = 1;
-          }
-        }
+        cbf_set(&cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu));
         break;
       }
     }
@@ -2392,26 +2383,19 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
       transform_chroma(encoder_state, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block);
       for (i = 0; i < chroma_size; i++) {
         if (coeff_u[i] != 0) {
-          int d;
-          for (d = 0; d <= depth; ++d) {
-            cur_cu->coeff_top_u[d] = 1;
-          }
+          cbf_set(&cur_cu->cbf.u, depth);
           break;
         }
       }
       transform_chroma(encoder_state, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block);
       for (i = 0; i < chroma_size; i++) {
         if (coeff_v[i] != 0) {
-          int d;
-          for (d = 0; d <= depth; ++d) {
-            cur_cu->coeff_top_v[d] = 1;
-          }
+          cbf_set(&cur_cu->cbf.v, depth);
           break;
         }
       }
 
-      // Save coefficients to cu.
-      if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
+      if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) {
         i = 0;
         for (y = 0; y < width_c; y++) {
           for (x = 0; x < width_c; x++) {
@@ -2423,11 +2407,11 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32
       }
 
       reconstruct_chroma(encoder_state, cur_cu, chroma_depth,
-                         cur_cu->coeff_top_u[depth],
+                         cbf_is_set(cur_cu->cbf.u, depth),
                          coeff_u, recbase_u, pred_u, color_type_u,
                          pre_quant_coeff, block);
       reconstruct_chroma(encoder_state, cur_cu, chroma_depth,
-                         cur_cu->coeff_top_v[depth],
+                         cbf_is_set(cur_cu->cbf.v, depth),
                          coeff_v, recbase_v, pred_v, color_type_v,
                          pre_quant_coeff, block);
     }
@@ -2458,13 +2442,7 @@ static void encode_transform_unit(encoder_state * const encoder_state,
   int8_t scan_idx = SCAN_DIAG;
   uint32_t dir_mode;
 
-  int cbf_y;
-  if (depth <= MAX_DEPTH) {
-    cbf_y = cur_cu->coeff_top_y[depth];
-  } else {
-    int pu_index = x_pu % 2 + 2 * (y_pu % 2);
-    cbf_y = cur_cu->coeff_top_y[depth + pu_index];
-  }
+  int cbf_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu));
 
   if (cbf_y) {
     int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH);
@@ -2525,7 +2503,7 @@ static void encode_transform_unit(encoder_state * const encoder_state,
     return;
   }
 
-  if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
+  if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) {
     int x, y;
     coefficient *orig_pos_u, *orig_pos_v;
 
@@ -2570,11 +2548,11 @@ static void encode_transform_unit(encoder_state * const encoder_state,
       }
     }
 
-    if (cur_cu->coeff_top_u[depth]) {
+    if (cbf_is_set(cur_cu->cbf.u, depth)) {
       encode_coeff_nxn(encoder_state, coeff_u, width_c, 2, scan_idx, 0);
     }
 
-    if (cur_cu->coeff_top_v[depth]) {
+    if (cbf_is_set(cur_cu->cbf.v, depth)) {
       encode_coeff_nxn(encoder_state, coeff_v, width_c, 2, scan_idx, 0);
     }
   }
@@ -2608,15 +2586,9 @@ void encode_transform_coeff(encoder_state * const encoder_state, int32_t x_pu,in
 
   int8_t split = (cur_cu->tr_depth > depth);
 
-  int8_t cb_flag_u = cur_cu->coeff_top_u[depth];
-  int8_t cb_flag_v = cur_cu->coeff_top_v[depth];
-  int cb_flag_y;
-  if (depth <= MAX_DEPTH) {
-    cb_flag_y = cur_cu->coeff_top_y[depth];
-  } else {
-    int pu_index = x_pu % 2 + 2 * (y_pu % 2);
-    cb_flag_y = cur_cu->coeff_top_y[depth + pu_index];
-  }
+  const int cb_flag_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu));
+  const int cb_flag_u = cbf_is_set(cur_cu->cbf.u, depth);
+  const int cb_flag_v = cbf_is_set(cur_cu->cbf.v, depth);
 
   // The split_transform_flag is not signaled when:
   // - transform size is greater than 32 (depth == 0)
diff --git a/src/filter.c b/src/filter.c
index 18bc09a9..20ac1c4f 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -232,14 +232,14 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state,
                                                           * (cur_pic->width_in_lcu << MAX_DEPTH)];
         // Filter strength
         strength = 0;
-        // Intra blocks have strength 2
         if(cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
           strength = 2;
+        } else if(cbf_is_set(cu_q->cbf.y, cu_q->tr_depth) || cbf_is_set(cu_p->cbf.y, cu_p->tr_depth)) {
           // Non-zero residual/coeffs and transform boundary
-        } else if(cu_q->coeff_top_y[cu_q->tr_depth] || cu_p->coeff_top_y[cu_p->tr_depth]) {
+          // Neither CU is intra so tr_depth <= MAX_DEPTH.
           strength = 1;
-          // Absolute motion vector diff between blocks >= 1 (Integer pixel)
         } else if((abs(cu_q->inter.mv[0] - cu_p->inter.mv[0]) >= 4) || (abs(cu_q->inter.mv[1] - cu_p->inter.mv[1]) >= 4)) {
+          // Absolute motion vector diff between blocks >= 1 (Integer pixel)
           strength = 1;
         } else if(cu_q->inter.mv_ref != cu_p->inter.mv_ref) {
           strength = 1;
diff --git a/src/picture.h b/src/picture.h
index b61d36f1..2fc6d2bd 100644
--- a/src/picture.h
+++ b/src/picture.h
@@ -74,6 +74,13 @@ typedef struct
   int8_t mode;
 } cu_info_inter;
 
+typedef struct
+{
+  uint8_t y;
+  uint8_t u;
+  uint8_t v;
+} cu_cbf_t;
+
 /**
  * \brief Struct for CU info
  */
@@ -88,10 +95,7 @@ typedef struct
   int8_t merged;     //!< \brief flag to indicate this block is merged
   int8_t merge_idx;  //!< \brief merge index
 
-  // MAX_DEPTH+4 for the 4 PUs at the last level.
-  int8_t coeff_top_y[MAX_DEPTH+5];  //!< \brief is there coded coeffs Y in top level
-  int8_t coeff_top_u[MAX_DEPTH+5];  //!< \brief is there coded coeffs U in top level
-  int8_t coeff_top_v[MAX_DEPTH+5];  //!< \brief is there coded coeffs V in top level
+  cu_cbf_t cbf;
   cu_info_intra intra[4];
   cu_info_inter inter;
 } cu_info;
@@ -207,6 +211,27 @@ typedef struct {
 //////////////////////////////////////////////////////////////////////////
 // FUNCTIONS
 
+/**
+ * Check if CBF in a given level >= depth is true.
+ */
+static INLINE int cbf_is_set(uint8_t cbf_flags, int depth)
+{
+  // Transform data for 4x4 blocks is stored at depths 4-8 for luma, so masks
+  // for those levels don't include the other ones.
+  static const uint8_t masks[8] = { 0xff, 0x7f, 0x3f, 0x1f, 0x8, 0x4, 0x2, 0x1 };
+
+  return (cbf_flags & masks[depth]) != 0;
+}
+
+/**
+ * Set CBF in a level to true.
+ */
+static INLINE void cbf_set(uint8_t *cbf_flags, int depth)
+{
+  // Return value of the bit corresponding to the level.
+  *cbf_flags |= 1 << (7 - depth);
+}
+
 yuv_t * yuv_t_alloc(int luma_size);
 void yuv_t_free(yuv_t * yuv);
 
diff --git a/src/search.c b/src/search.c
index 66e4c3f4..c3deae4e 100644
--- a/src/search.c
+++ b/src/search.c
@@ -655,7 +655,7 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu
       cu_info *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH];
       if (cu != cu_from) {
         // Chroma coeff data is not used, luma is needed for deblocking
-        memcpy(cu->coeff_top_y, cu_from->coeff_top_y, 8);
+        cu->cbf.y = cu_from->cbf.y;
       }
     }
   }
@@ -910,10 +910,13 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
       lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
       intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
     } else if (cur_cu->type == CU_INTER) {
+      int cbf;
       inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
       encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]);
 
-      if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
+      cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
+
+      if(cur_cu->merged && !cbf) {
         cur_cu->merged = 0;
         cur_cu->skipped = 1;
         // Selecting skip reduces bits needed to code the CU
@@ -931,12 +934,12 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
   if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
     int half_cu = cu_width / 2;
     int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost);
+    int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
 
     // If skip mode was selected for the block, skip further search.
     // Skip mode means there's no coefficients in the block, so splitting
     // might not give any better results but takes more time to do.
-    if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] ||
-       cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
+    if(cur_cu->type == CU_NOTSET || cbf) {
       split_cost += search_cu(encoder_state, x,           y,           depth + 1, work_tree);
       split_cost += search_cu(encoder_state, x + half_cu, y,           depth + 1, work_tree);
       split_cost += search_cu(encoder_state, x,           y + half_cu, depth + 1, work_tree);