diff --git a/src/encoder.c b/src/encoder.c index 2b76abe1..05a459f9 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1836,19 +1836,23 @@ void encode_coding_tree(encoder_state * const encoder_state, } // for ref_list } // if !merge + { + int cbf = (cbf_is_set(cur_cu->cbf.y, depth) || + cbf_is_set(cur_cu->cbf.u, depth) || + cbf_is_set(cur_cu->cbf.v, depth)); - // Only need to signal coded block flag if not skipped or merged - // skip = no coded residual, merge = coded residual - if (!cur_cu->merged) { - cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model); - CABAC_BIN(cabac, cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth], "rqt_root_cbf"); + // Only need to signal coded block flag if not skipped or merged + // skip = no coded residual, merge = coded residual + if (!cur_cu->merged) { + cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model); + CABAC_BIN(cabac, cbf, "rqt_root_cbf"); + } + // Code (possible) coeffs to bitstream + + if (cbf) { + encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); + } } - // Code (possible) coeffs to bitstream - - if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { - encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); - } - // END for each part } else if (cur_cu->type == CU_INTRA) { @@ -2114,21 +2118,20 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 encode_transform_tree(encoder_state, x, y + offset, depth+1, lcu); encode_transform_tree(encoder_state, x + offset, y + offset, depth+1, lcu); - // Derive coded coeff flags from the next depth - if (depth == MAX_DEPTH) { - cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4]; - cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1]; - cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1]; - } else { + // Propagate coded block flags from child CUs to parent CU. + if (depth < MAX_DEPTH) { cu_info *cu_a = &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + (y_local>>3) *LCU_T_CU_WIDTH]; cu_info *cu_b = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH]; cu_info *cu_c = &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH]; - cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cu_a->coeff_top_y[depth+1] | cu_b->coeff_top_y[depth+1] - | cu_c->coeff_top_y[depth+1]; - cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1] | cu_a->coeff_top_u[depth+1] | cu_b->coeff_top_u[depth+1] - | cu_c->coeff_top_u[depth+1]; - cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1] | cu_a->coeff_top_v[depth+1] | cu_b->coeff_top_v[depth+1] - | cu_c->coeff_top_v[depth+1]; + if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) { + cbf_set(&cur_cu->cbf.y, depth); + } + if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) { + cbf_set(&cur_cu->cbf.u, depth); + } + if (cbf_is_set(cu_a->cbf.v, depth+1) || cbf_is_set(cu_b->cbf.v, depth+1) || cbf_is_set(cu_c->cbf.v, depth+1)) { + cbf_set(&cur_cu->cbf.v, depth); + } } return; @@ -2317,19 +2320,7 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 if (coeff_y[i] != 0) { // Found one, we can break here cbf_y = 1; - if (depth <= MAX_DEPTH) { - int d; - for (d = 0; d <= depth; ++d) { - cur_cu->coeff_top_y[d] = 1; - } - } else { - int pu_index = (x_pu & 1) + 2 * (y_pu & 1); - int d; - cur_cu->coeff_top_y[depth + pu_index] = 1; - for (d = 0; d < depth; ++d) { - cur_cu->coeff_top_y[d] = 1; - } - } + cbf_set(&cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); break; } } @@ -2392,26 +2383,19 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 transform_chroma(encoder_state, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); for (i = 0; i < chroma_size; i++) { if (coeff_u[i] != 0) { - int d; - for (d = 0; d <= depth; ++d) { - cur_cu->coeff_top_u[d] = 1; - } + cbf_set(&cur_cu->cbf.u, depth); break; } } transform_chroma(encoder_state, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block); for (i = 0; i < chroma_size; i++) { if (coeff_v[i] != 0) { - int d; - for (d = 0; d <= depth; ++d) { - cur_cu->coeff_top_v[d] = 1; - } + cbf_set(&cur_cu->cbf.v, depth); break; } } - // Save coefficients to cu. - if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { + if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) { i = 0; for (y = 0; y < width_c; y++) { for (x = 0; x < width_c; x++) { @@ -2423,11 +2407,11 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 } reconstruct_chroma(encoder_state, cur_cu, chroma_depth, - cur_cu->coeff_top_u[depth], + cbf_is_set(cur_cu->cbf.u, depth), coeff_u, recbase_u, pred_u, color_type_u, pre_quant_coeff, block); reconstruct_chroma(encoder_state, cur_cu, chroma_depth, - cur_cu->coeff_top_v[depth], + cbf_is_set(cur_cu->cbf.v, depth), coeff_v, recbase_v, pred_v, color_type_v, pre_quant_coeff, block); } @@ -2458,13 +2442,7 @@ static void encode_transform_unit(encoder_state * const encoder_state, int8_t scan_idx = SCAN_DIAG; uint32_t dir_mode; - int cbf_y; - if (depth <= MAX_DEPTH) { - cbf_y = cur_cu->coeff_top_y[depth]; - } else { - int pu_index = x_pu % 2 + 2 * (y_pu % 2); - cbf_y = cur_cu->coeff_top_y[depth + pu_index]; - } + int cbf_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); if (cbf_y) { int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH); @@ -2525,7 +2503,7 @@ static void encode_transform_unit(encoder_state * const encoder_state, return; } - if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { + if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) { int x, y; coefficient *orig_pos_u, *orig_pos_v; @@ -2570,11 +2548,11 @@ static void encode_transform_unit(encoder_state * const encoder_state, } } - if (cur_cu->coeff_top_u[depth]) { + if (cbf_is_set(cur_cu->cbf.u, depth)) { encode_coeff_nxn(encoder_state, coeff_u, width_c, 2, scan_idx, 0); } - if (cur_cu->coeff_top_v[depth]) { + if (cbf_is_set(cur_cu->cbf.v, depth)) { encode_coeff_nxn(encoder_state, coeff_v, width_c, 2, scan_idx, 0); } } @@ -2608,15 +2586,9 @@ void encode_transform_coeff(encoder_state * const encoder_state, int32_t x_pu,in int8_t split = (cur_cu->tr_depth > depth); - int8_t cb_flag_u = cur_cu->coeff_top_u[depth]; - int8_t cb_flag_v = cur_cu->coeff_top_v[depth]; - int cb_flag_y; - if (depth <= MAX_DEPTH) { - cb_flag_y = cur_cu->coeff_top_y[depth]; - } else { - int pu_index = x_pu % 2 + 2 * (y_pu % 2); - cb_flag_y = cur_cu->coeff_top_y[depth + pu_index]; - } + const int cb_flag_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); + const int cb_flag_u = cbf_is_set(cur_cu->cbf.u, depth); + const int cb_flag_v = cbf_is_set(cur_cu->cbf.v, depth); // The split_transform_flag is not signaled when: // - transform size is greater than 32 (depth == 0) diff --git a/src/filter.c b/src/filter.c index 18bc09a9..20ac1c4f 100644 --- a/src/filter.c +++ b/src/filter.c @@ -232,14 +232,14 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state, * (cur_pic->width_in_lcu << MAX_DEPTH)]; // Filter strength strength = 0; - // Intra blocks have strength 2 if(cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { strength = 2; + } else if(cbf_is_set(cu_q->cbf.y, cu_q->tr_depth) || cbf_is_set(cu_p->cbf.y, cu_p->tr_depth)) { // Non-zero residual/coeffs and transform boundary - } else if(cu_q->coeff_top_y[cu_q->tr_depth] || cu_p->coeff_top_y[cu_p->tr_depth]) { + // Neither CU is intra so tr_depth <= MAX_DEPTH. strength = 1; - // Absolute motion vector diff between blocks >= 1 (Integer pixel) } else if((abs(cu_q->inter.mv[0] - cu_p->inter.mv[0]) >= 4) || (abs(cu_q->inter.mv[1] - cu_p->inter.mv[1]) >= 4)) { + // Absolute motion vector diff between blocks >= 1 (Integer pixel) strength = 1; } else if(cu_q->inter.mv_ref != cu_p->inter.mv_ref) { strength = 1; diff --git a/src/picture.h b/src/picture.h index b61d36f1..2fc6d2bd 100644 --- a/src/picture.h +++ b/src/picture.h @@ -74,6 +74,13 @@ typedef struct int8_t mode; } cu_info_inter; +typedef struct +{ + uint8_t y; + uint8_t u; + uint8_t v; +} cu_cbf_t; + /** * \brief Struct for CU info */ @@ -88,10 +95,7 @@ typedef struct int8_t merged; //!< \brief flag to indicate this block is merged int8_t merge_idx; //!< \brief merge index - // MAX_DEPTH+4 for the 4 PUs at the last level. - int8_t coeff_top_y[MAX_DEPTH+5]; //!< \brief is there coded coeffs Y in top level - int8_t coeff_top_u[MAX_DEPTH+5]; //!< \brief is there coded coeffs U in top level - int8_t coeff_top_v[MAX_DEPTH+5]; //!< \brief is there coded coeffs V in top level + cu_cbf_t cbf; cu_info_intra intra[4]; cu_info_inter inter; } cu_info; @@ -207,6 +211,27 @@ typedef struct { ////////////////////////////////////////////////////////////////////////// // FUNCTIONS +/** + * Check if CBF in a given level >= depth is true. + */ +static INLINE int cbf_is_set(uint8_t cbf_flags, int depth) +{ + // Transform data for 4x4 blocks is stored at depths 4-8 for luma, so masks + // for those levels don't include the other ones. + static const uint8_t masks[8] = { 0xff, 0x7f, 0x3f, 0x1f, 0x8, 0x4, 0x2, 0x1 }; + + return (cbf_flags & masks[depth]) != 0; +} + +/** + * Set CBF in a level to true. + */ +static INLINE void cbf_set(uint8_t *cbf_flags, int depth) +{ + // Return value of the bit corresponding to the level. + *cbf_flags |= 1 << (7 - depth); +} + yuv_t * yuv_t_alloc(int luma_size); void yuv_t_free(yuv_t * yuv); diff --git a/src/search.c b/src/search.c index 66e4c3f4..c3deae4e 100644 --- a/src/search.c +++ b/src/search.c @@ -655,7 +655,7 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu cu_info *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH]; if (cu != cu_from) { // Chroma coeff data is not used, luma is needed for deblocking - memcpy(cu->coeff_top_y, cu_from->coeff_top_y, 8); + cu->cbf.y = cu_from->cbf.y; } } } @@ -910,10 +910,13 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size); intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height); } else if (cur_cu->type == CU_INTER) { + int cbf; inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]); - if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { + cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth); + + if(cur_cu->merged && !cbf) { cur_cu->merged = 0; cur_cu->skipped = 1; // Selecting skip reduces bits needed to code the CU @@ -931,12 +934,12 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) { int half_cu = cu_width / 2; int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost); + int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth); // If skip mode was selected for the block, skip further search. // Skip mode means there's no coefficients in the block, so splitting // might not give any better results but takes more time to do. - if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] || - cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { + if(cur_cu->type == CU_NOTSET || cbf) { split_cost += search_cu(encoder_state, x, y, depth + 1, work_tree); split_cost += search_cu(encoder_state, x + half_cu, y, depth + 1, work_tree); split_cost += search_cu(encoder_state, x, y + half_cu, depth + 1, work_tree);