From bdc16d26129c0ae5052c675a3341149f1c468dca Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Tue, 6 May 2014 14:43:12 +0300 Subject: [PATCH] Improve cu_info coded block flag data structure a bit. - It works just like the old structure except that the flags are checked with bitmasks instead of having the flag value be propagated upwards. There isn't really any benefit to this because the flags still have to be propagated to parent CUs. - Wrapped them inside a struct to make copying them easier. (Just need to copy the struct instead of making individual copies) --- src/encoder.c | 106 +++++++++++++++++++------------------------------- src/filter.c | 6 +-- src/picture.h | 33 ++++++++++++++-- src/search.c | 11 ++++-- 4 files changed, 78 insertions(+), 78 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 2b76abe1..05a459f9 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1836,19 +1836,23 @@ void encode_coding_tree(encoder_state * const encoder_state, } // for ref_list } // if !merge + { + int cbf = (cbf_is_set(cur_cu->cbf.y, depth) || + cbf_is_set(cur_cu->cbf.u, depth) || + cbf_is_set(cur_cu->cbf.v, depth)); - // Only need to signal coded block flag if not skipped or merged - // skip = no coded residual, merge = coded residual - if (!cur_cu->merged) { - cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model); - CABAC_BIN(cabac, cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth], "rqt_root_cbf"); + // Only need to signal coded block flag if not skipped or merged + // skip = no coded residual, merge = coded residual + if (!cur_cu->merged) { + cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model); + CABAC_BIN(cabac, cbf, "rqt_root_cbf"); + } + // Code (possible) coeffs to bitstream + + if (cbf) { + encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); + } } - // Code (possible) coeffs to bitstream - - if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { - encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); - } - // END for each part } else if (cur_cu->type == CU_INTRA) { @@ -2114,21 +2118,20 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 encode_transform_tree(encoder_state, x, y + offset, depth+1, lcu); encode_transform_tree(encoder_state, x + offset, y + offset, depth+1, lcu); - // Derive coded coeff flags from the next depth - if (depth == MAX_DEPTH) { - cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4]; - cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1]; - cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1]; - } else { + // Propagate coded block flags from child CUs to parent CU. + if (depth < MAX_DEPTH) { cu_info *cu_a = &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + (y_local>>3) *LCU_T_CU_WIDTH]; cu_info *cu_b = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH]; cu_info *cu_c = &lcu->cu[LCU_CU_OFFSET + ((x_local + offset)>>3) + ((y_local+offset)>>3)*LCU_T_CU_WIDTH]; - cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cu_a->coeff_top_y[depth+1] | cu_b->coeff_top_y[depth+1] - | cu_c->coeff_top_y[depth+1]; - cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1] | cu_a->coeff_top_u[depth+1] | cu_b->coeff_top_u[depth+1] - | cu_c->coeff_top_u[depth+1]; - cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1] | cu_a->coeff_top_v[depth+1] | cu_b->coeff_top_v[depth+1] - | cu_c->coeff_top_v[depth+1]; + if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) { + cbf_set(&cur_cu->cbf.y, depth); + } + if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) { + cbf_set(&cur_cu->cbf.u, depth); + } + if (cbf_is_set(cu_a->cbf.v, depth+1) || cbf_is_set(cu_b->cbf.v, depth+1) || cbf_is_set(cu_c->cbf.v, depth+1)) { + cbf_set(&cur_cu->cbf.v, depth); + } } return; @@ -2317,19 +2320,7 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 if (coeff_y[i] != 0) { // Found one, we can break here cbf_y = 1; - if (depth <= MAX_DEPTH) { - int d; - for (d = 0; d <= depth; ++d) { - cur_cu->coeff_top_y[d] = 1; - } - } else { - int pu_index = (x_pu & 1) + 2 * (y_pu & 1); - int d; - cur_cu->coeff_top_y[depth + pu_index] = 1; - for (d = 0; d < depth; ++d) { - cur_cu->coeff_top_y[d] = 1; - } - } + cbf_set(&cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); break; } } @@ -2392,26 +2383,19 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 transform_chroma(encoder_state, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); for (i = 0; i < chroma_size; i++) { if (coeff_u[i] != 0) { - int d; - for (d = 0; d <= depth; ++d) { - cur_cu->coeff_top_u[d] = 1; - } + cbf_set(&cur_cu->cbf.u, depth); break; } } transform_chroma(encoder_state, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block); for (i = 0; i < chroma_size; i++) { if (coeff_v[i] != 0) { - int d; - for (d = 0; d <= depth; ++d) { - cur_cu->coeff_top_v[d] = 1; - } + cbf_set(&cur_cu->cbf.v, depth); break; } } - // Save coefficients to cu. - if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { + if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) { i = 0; for (y = 0; y < width_c; y++) { for (x = 0; x < width_c; x++) { @@ -2423,11 +2407,11 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 } reconstruct_chroma(encoder_state, cur_cu, chroma_depth, - cur_cu->coeff_top_u[depth], + cbf_is_set(cur_cu->cbf.u, depth), coeff_u, recbase_u, pred_u, color_type_u, pre_quant_coeff, block); reconstruct_chroma(encoder_state, cur_cu, chroma_depth, - cur_cu->coeff_top_v[depth], + cbf_is_set(cur_cu->cbf.v, depth), coeff_v, recbase_v, pred_v, color_type_v, pre_quant_coeff, block); } @@ -2458,13 +2442,7 @@ static void encode_transform_unit(encoder_state * const encoder_state, int8_t scan_idx = SCAN_DIAG; uint32_t dir_mode; - int cbf_y; - if (depth <= MAX_DEPTH) { - cbf_y = cur_cu->coeff_top_y[depth]; - } else { - int pu_index = x_pu % 2 + 2 * (y_pu % 2); - cbf_y = cur_cu->coeff_top_y[depth + pu_index]; - } + int cbf_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); if (cbf_y) { int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH); @@ -2525,7 +2503,7 @@ static void encode_transform_unit(encoder_state * const encoder_state, return; } - if (cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { + if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) { int x, y; coefficient *orig_pos_u, *orig_pos_v; @@ -2570,11 +2548,11 @@ static void encode_transform_unit(encoder_state * const encoder_state, } } - if (cur_cu->coeff_top_u[depth]) { + if (cbf_is_set(cur_cu->cbf.u, depth)) { encode_coeff_nxn(encoder_state, coeff_u, width_c, 2, scan_idx, 0); } - if (cur_cu->coeff_top_v[depth]) { + if (cbf_is_set(cur_cu->cbf.v, depth)) { encode_coeff_nxn(encoder_state, coeff_v, width_c, 2, scan_idx, 0); } } @@ -2608,15 +2586,9 @@ void encode_transform_coeff(encoder_state * const encoder_state, int32_t x_pu,in int8_t split = (cur_cu->tr_depth > depth); - int8_t cb_flag_u = cur_cu->coeff_top_u[depth]; - int8_t cb_flag_v = cur_cu->coeff_top_v[depth]; - int cb_flag_y; - if (depth <= MAX_DEPTH) { - cb_flag_y = cur_cu->coeff_top_y[depth]; - } else { - int pu_index = x_pu % 2 + 2 * (y_pu % 2); - cb_flag_y = cur_cu->coeff_top_y[depth + pu_index]; - } + const int cb_flag_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); + const int cb_flag_u = cbf_is_set(cur_cu->cbf.u, depth); + const int cb_flag_v = cbf_is_set(cur_cu->cbf.v, depth); // The split_transform_flag is not signaled when: // - transform size is greater than 32 (depth == 0) diff --git a/src/filter.c b/src/filter.c index 18bc09a9..20ac1c4f 100644 --- a/src/filter.c +++ b/src/filter.c @@ -232,14 +232,14 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state, * (cur_pic->width_in_lcu << MAX_DEPTH)]; // Filter strength strength = 0; - // Intra blocks have strength 2 if(cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { strength = 2; + } else if(cbf_is_set(cu_q->cbf.y, cu_q->tr_depth) || cbf_is_set(cu_p->cbf.y, cu_p->tr_depth)) { // Non-zero residual/coeffs and transform boundary - } else if(cu_q->coeff_top_y[cu_q->tr_depth] || cu_p->coeff_top_y[cu_p->tr_depth]) { + // Neither CU is intra so tr_depth <= MAX_DEPTH. strength = 1; - // Absolute motion vector diff between blocks >= 1 (Integer pixel) } else if((abs(cu_q->inter.mv[0] - cu_p->inter.mv[0]) >= 4) || (abs(cu_q->inter.mv[1] - cu_p->inter.mv[1]) >= 4)) { + // Absolute motion vector diff between blocks >= 1 (Integer pixel) strength = 1; } else if(cu_q->inter.mv_ref != cu_p->inter.mv_ref) { strength = 1; diff --git a/src/picture.h b/src/picture.h index b61d36f1..2fc6d2bd 100644 --- a/src/picture.h +++ b/src/picture.h @@ -74,6 +74,13 @@ typedef struct int8_t mode; } cu_info_inter; +typedef struct +{ + uint8_t y; + uint8_t u; + uint8_t v; +} cu_cbf_t; + /** * \brief Struct for CU info */ @@ -88,10 +95,7 @@ typedef struct int8_t merged; //!< \brief flag to indicate this block is merged int8_t merge_idx; //!< \brief merge index - // MAX_DEPTH+4 for the 4 PUs at the last level. - int8_t coeff_top_y[MAX_DEPTH+5]; //!< \brief is there coded coeffs Y in top level - int8_t coeff_top_u[MAX_DEPTH+5]; //!< \brief is there coded coeffs U in top level - int8_t coeff_top_v[MAX_DEPTH+5]; //!< \brief is there coded coeffs V in top level + cu_cbf_t cbf; cu_info_intra intra[4]; cu_info_inter inter; } cu_info; @@ -207,6 +211,27 @@ typedef struct { ////////////////////////////////////////////////////////////////////////// // FUNCTIONS +/** + * Check if CBF in a given level >= depth is true. + */ +static INLINE int cbf_is_set(uint8_t cbf_flags, int depth) +{ + // Transform data for 4x4 blocks is stored at depths 4-8 for luma, so masks + // for those levels don't include the other ones. + static const uint8_t masks[8] = { 0xff, 0x7f, 0x3f, 0x1f, 0x8, 0x4, 0x2, 0x1 }; + + return (cbf_flags & masks[depth]) != 0; +} + +/** + * Set CBF in a level to true. + */ +static INLINE void cbf_set(uint8_t *cbf_flags, int depth) +{ + // Return value of the bit corresponding to the level. + *cbf_flags |= 1 << (7 - depth); +} + yuv_t * yuv_t_alloc(int luma_size); void yuv_t_free(yuv_t * yuv); diff --git a/src/search.c b/src/search.c index 66e4c3f4..c3deae4e 100644 --- a/src/search.c +++ b/src/search.c @@ -655,7 +655,7 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu cu_info *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH]; if (cu != cu_from) { // Chroma coeff data is not used, luma is needed for deblocking - memcpy(cu->coeff_top_y, cu_from->coeff_top_y, 8); + cu->cbf.y = cu_from->cbf.y; } } } @@ -910,10 +910,13 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size); intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height); } else if (cur_cu->type == CU_INTER) { + int cbf; inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]); - if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { + cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth); + + if(cur_cu->merged && !cbf) { cur_cu->merged = 0; cur_cu->skipped = 1; // Selecting skip reduces bits needed to code the CU @@ -931,12 +934,12 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) { int half_cu = cu_width / 2; int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost); + int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth); // If skip mode was selected for the block, skip further search. // Skip mode means there's no coefficients in the block, so splitting // might not give any better results but takes more time to do. - if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] || - cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { + if(cur_cu->type == CU_NOTSET || cbf) { split_cost += search_cu(encoder_state, x, y, depth + 1, work_tree); split_cost += search_cu(encoder_state, x + half_cu, y, depth + 1, work_tree); split_cost += search_cu(encoder_state, x, y + half_cu, depth + 1, work_tree);