diff --git a/src/cu.h b/src/cu.h index 3f70df25..96897b62 100644 --- a/src/cu.h +++ b/src/cu.h @@ -597,5 +597,6 @@ static INLINE void cbf_copy(uint16_t *cbf, uint16_t src, color_t plane) } #define GET_SPLITDATA(CU,curDepth) (((CU)->split_tree >> (curDepth)) & 7) +#define PU_IS_TU(cu) ((cu)->log2_width <= TR_MAX_LOG2_SIZE && (cu)->log2_height <= TR_MAX_LOG2_SIZE) #endif diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 2556c950..4ee820c0 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -109,86 +109,32 @@ static void encode_mts_idx( bool uvg_is_lfnst_allowed( const encoder_state_t* const state, const cu_info_t* const pred_cu, - const int width, - const int height, - const int x, - const int y, enum uvg_tree_type tree_type, const color_t color, - const lcu_t* lcu) + const cu_loc_t* const cu_loc) { - if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA && pred_cu->depth == pred_cu->tr_depth) { + if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA && PU_IS_TU(pred_cu)) { const int isp_mode = pred_cu->intra.isp_mode; - const int depth = pred_cu->depth; - const int chroma_width = width >> 1; - const int chroma_height = height >> 1; - const int cu_width = tree_type != UVG_LUMA_T || depth == 4 ? width : chroma_width; - const int cu_height = tree_type != UVG_LUMA_T || depth == 4 ? height : chroma_height; - bool can_use_lfnst_with_mip = (width >= 16 && height >= 16); - bool is_sep_tree = depth == 4 || tree_type != UVG_BOTH_T; + const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; + const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; + bool can_use_lfnst_with_mip = (cu_width >= 16 && cu_height >= 16); + bool is_sep_tree = tree_type != UVG_BOTH_T; bool mip_flag = pred_cu->type == CU_INTRA && color == COLOR_Y ? pred_cu->intra.mip_flag : false; - if ((isp_mode && !uvg_can_use_isp_with_lfnst(width, height, isp_mode, tree_type)) || + if ((isp_mode && !uvg_can_use_isp_with_lfnst(cu_width, cu_height, isp_mode, tree_type)) || (pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip) || (is_sep_tree && MIN(cu_width, cu_height) < 4) || (cu_width > TR_MAX_WIDTH || cu_height > TR_MAX_WIDTH)) { return false; } - bool luma_flag = (depth == 4 && color == COLOR_Y) || (tree_type != UVG_CHROMA_T && depth != 4); - bool chroma_flag = (depth == 4 && color != COLOR_Y) || tree_type != UVG_LUMA_T; + bool luma_flag = tree_type != UVG_CHROMA_T; + bool chroma_flag = tree_type != UVG_LUMA_T; bool non_zero_coeff_non_ts_corner_8x8 = false; bool last_scan_pos = false; bool is_tr_skip = false; - - int split_num = color == COLOR_Y && isp_mode ? uvg_get_isp_split_num(width, height, isp_mode, false) : 0; - const videoframe_t* const frame = state->tile->frame; - if (split_num) { - // Constraints for ISP split blocks - for (int i = 0; i < split_num; ++i) { - cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x, y, width, height, i, isp_mode, false); - int local_split_x = split_loc.x; - int local_split_y = split_loc.y; - uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y); - const cu_info_t* split_cu = lcu ? LCU_GET_CU_AT_PX(lcu, local_split_x, local_split_y) : - uvg_cu_array_at_const(frame->cu_array, local_split_x, local_split_y); - - //if (cbf_is_set(split_cu->cbf, depth, COLOR_Y)) { - // ISP_TODO: remove this if clause altogether if it seems it is not needed - if (true) { - non_zero_coeff_non_ts_corner_8x8 |= (luma_flag && split_cu->violates_lfnst_constrained_luma) || (chroma_flag && split_cu->violates_lfnst_constrained_chroma); - //last_scan_pos |= split_cu->lfnst_last_scan_pos; - last_scan_pos |= true; - } - } - } else { - non_zero_coeff_non_ts_corner_8x8 |= (luma_flag && pred_cu->violates_lfnst_constrained_luma) || (chroma_flag && pred_cu->violates_lfnst_constrained_chroma); - last_scan_pos |= pred_cu->lfnst_last_scan_pos; - } - - //const int num_pred_units = kvz_part_mode_num_parts[pred_cu->part_size]; - const int tr_depth = pred_cu->tr_depth; - assert(depth <= tr_depth && "Depth greater than transform depth. This should never trigger."); - const int num_transform_units = 1 << (2 * (tr_depth - depth)); - const int tu_row_length = 1 << (tr_depth - depth); - const int tu_width = cu_width >> (tr_depth - depth); - const int tu_height = tu_width; // TODO: height for non-square blocks - - // TODO: chroma transform skip - if (color == COLOR_Y) { - for (int i = 0; i < num_transform_units; i++) { - // TODO: this works only for square blocks - const int tu_x = x + ((i % tu_row_length) * tu_width); - const int tu_y = y + ((i / tu_row_length) * tu_height); - const cu_info_t* cur_tu = lcu ? LCU_GET_CU_AT_PX(lcu, tu_x, tu_y) : uvg_cu_array_at_const(frame->cu_array, tu_x, tu_y); - assert(cur_tu != NULL && "NULL transform unit."); - bool cbf_set = cbf_is_set(cur_tu->cbf, tr_depth, COLOR_Y); - - if (cur_tu != NULL && cbf_set && cur_tu->tr_idx == MTS_SKIP) { - is_tr_skip = true; - } - } + if (color == COLOR_Y && pred_cu->tr_idx == MTS_SKIP) { + is_tr_skip = true; } if ((!pred_cu->lfnst_last_scan_pos && !isp_mode) || non_zero_coeff_non_ts_corner_8x8 || is_tr_skip) { @@ -205,19 +151,15 @@ static bool encode_lfnst_idx( const encoder_state_t* const state, cabac_data_t * const cabac, const cu_info_t * const pred_cu, - const int x, - const int y, - const int depth, - const int width, - const int height, enum uvg_tree_type tree_type, - const color_t color) + const color_t color, + const cu_loc_t* const cu_loc) { - if (uvg_is_lfnst_allowed(state, pred_cu, width, height, x, y, tree_type, color, NULL)) { + if (uvg_is_lfnst_allowed(state, pred_cu, tree_type, color, cu_loc)) { // Getting separate tree bool from block size is a temporary fix until a proper dual tree check is possible (there is no dual tree structure at time of writing this). // VTM seems to force explicit dual tree structure for small 4x4 blocks - bool is_separate_tree = depth == 4 || tree_type != UVG_BOTH_T; + bool is_separate_tree = tree_type != UVG_BOTH_T; const int lfnst_index = !is_separate_tree || color == COLOR_Y ? pred_cu->lfnst_idx : pred_cu->cr_lfnst_idx; assert((lfnst_index >= 0 && lfnst_index < 3) && "Invalid LFNST index."); @@ -1752,6 +1694,8 @@ void uvg_encode_coding_tree( uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, NULL, NULL); } + const bool is_local_dual_tree = cu_height * cu_width < 64 && tree_type == UVG_BOTH_T; + // Code chroma prediction mode. if (state->encoder_control->chroma_format != UVG_CSP_400 && depth != 4 && tree_type == UVG_BOTH_T) { encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, NULL); @@ -1779,7 +1723,7 @@ void uvg_encode_coding_tree( } if (tree_type != UVG_CHROMA_T) { - bool lfnst_written = encode_lfnst_idx(state, cabac, cur_cu, x, y, depth, cu_width, cu_height, tree_type, COLOR_Y); + bool lfnst_written = encode_lfnst_idx(state, cabac, cur_cu, is_local_dual_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc); } encode_mts_idx(state, cabac, cur_cu, cu_loc); @@ -1795,7 +1739,7 @@ void uvg_encode_coding_tree( tmp->lfnst_last_scan_pos = false; encode_transform_coeff(state, &cu_loc, depth, 0, 1, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc); // Write LFNST only once for single tree structure - encode_lfnst_idx(state, cabac, tmp, x, y, depth, cu_width, cu_height, tree_type, COLOR_UV); + encode_lfnst_idx(state, cabac, tmp, is_local_dual_tree ? UVG_CHROMA_T : tree_type, COLOR_UV, cu_loc); } } diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index e75ad46a..a7fe896b 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -45,13 +45,9 @@ bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pre bool uvg_is_lfnst_allowed( const encoder_state_t* const state, const cu_info_t* const pred_cu, - const int width, - const int height, - const int x, - const int y, enum uvg_tree_type tree_type, const color_t color, - const lcu_t* lcu); + const cu_loc_t* const cu_loc); void uvg_encode_coding_tree( encoder_state_t * const state, diff --git a/src/search.c b/src/search.c index 883c9d59..51de64f0 100644 --- a/src/search.c +++ b/src/search.c @@ -673,8 +673,11 @@ static double cu_rd_cost_tr_split_accurate( } } - if(cu_loc->width == 4 || tree_type == UVG_LUMA_T) { - if (uvg_is_lfnst_allowed(state, tr_cu, width, height, cu_loc->local_x, cu_loc->local_y, tree_type, COLOR_Y, lcu)) { + const bool is_local_sep_tree = pred_cu->log2_width + pred_cu->log2_height < 6 && tree_type == UVG_BOTH_T; + + if(is_local_sep_tree || tree_type == UVG_LUMA_T) { + + if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc)) { const int lfnst_idx = tr_cu->lfnst_idx; CABAC_FBITS_UPDATE( cabac, @@ -746,11 +749,12 @@ static double cu_rd_cost_tr_split_accurate( } } - if (uvg_is_lfnst_allowed(state, tr_cu, width, height, cu_loc->local_x, cu_loc->local_y, tree_type, cu_loc->width == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y, lcu)) { - const int lfnst_idx = (cu_loc->width != 4 && tree_type != UVG_CHROMA_T) ? tr_cu->lfnst_idx : tr_cu->cr_lfnst_idx; + const bool is_chroma_tree = is_local_sep_tree || tree_type == UVG_CHROMA_T; + if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, cu_loc)) { + const int lfnst_idx = is_chroma_tree ? tr_cu->cr_lfnst_idx : tr_cu->lfnst_idx; CABAC_FBITS_UPDATE( cabac, - &cabac->ctx.lfnst_idx_model[tr_cu->depth == 4 || tree_type != UVG_BOTH_T], + &cabac->ctx.lfnst_idx_model[is_chroma_tree], lfnst_idx != 0, tr_tree_bits, "lfnst_idx"); @@ -971,6 +975,8 @@ static double search_cu( cur_cu->lfnst_idx = 0; cur_cu->joint_cb_cr = 0; cur_cu->split_tree = split_tree.split_tree; + cur_cu->log2_width = uvg_g_convert_to_log2[cu_width]; + cur_cu->log2_height = uvg_g_convert_to_log2[cu_height]; // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. diff --git a/src/search_intra.c b/src/search_intra.c index 2355fad1..e40df0df 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -300,7 +300,7 @@ static double search_intra_trdepth( pred_cu->tr_depth = depth; const bool mts_enabled = (state->encoder_control->cfg.mts == UVG_MTS_INTRA || state->encoder_control->cfg.mts == UVG_MTS_BOTH) - && tr_cu->depth == tr_cu->tr_depth; + && PU_IS_TU(pred_cu); nosplit_cost = 0.0; @@ -330,10 +330,10 @@ static double search_intra_trdepth( num_transforms = pred_cu->intra.isp_mode == ISP_MODE_NO_ISP ? num_transforms : 1; } const int mts_start = trafo; - //TODO: height - if (state->encoder_control->cfg.trskip_enable && - width <= (1 << state->encoder_control->cfg.trskip_max_size) /*&& height == 4*/ && - pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { // tr_skip cannot be used wit ISP + if (state->encoder_control->cfg.trskip_enable + && width <= (1 << state->encoder_control->cfg.trskip_max_size) + && height <= (1 << state->encoder_control->cfg.trskip_max_size) + && pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { num_transforms = MAX(num_transforms, 2); } pred_cu->intra.mode_chroma = -1; @@ -346,9 +346,10 @@ static double search_intra_trdepth( max_lfnst_idx = 0; } + const bool is_local_dual_tree = pred_cu->log2_width + pred_cu->log2_height < 6 && tree_type == UVG_BOTH_T; + int start_idx = 0; - int end_idx = state->encoder_control->cfg.lfnst && - depth == pred_cu->tr_depth && + int end_idx = state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && uvg_can_use_isp_with_lfnst(width, height, pred_cu->intra.isp_mode, tree_type) ? max_lfnst_idx : 0; for (int i = start_idx; i < end_idx + 1; ++i) { search_data->lfnst_costs[i] = MAX_DOUBLE; @@ -436,11 +437,11 @@ static double search_intra_trdepth( lcu, search_data->best_isp_cbfs); double transform_bits = 0; - if (state->encoder_control->cfg.lfnst && depth == pred_cu->tr_depth && + if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && trafo != MTS_SKIP) { if (!constraints[0] && constraints[1]) { transform_bits += CTX_ENTROPY_FBITS( - &state->search_cabac.ctx.lfnst_idx_model[tr_cu->depth == 4 || + &state->search_cabac.ctx.lfnst_idx_model[is_local_dual_tree || tree_type == UVG_LUMA_T], lfnst_idx != 0); if (lfnst_idx > 0) { @@ -566,30 +567,6 @@ static double search_intra_trdepth( } nosplit_cbf = pred_cu->cbf; - - uvg_pixels_blit( - lcu->rec.y, - nosplit_pixels.y, - width, - width, - LCU_WIDTH, - width); - if (reconstruct_chroma) { - uvg_pixels_blit( - lcu->rec.u, - nosplit_pixels.u, - width_c, - width_c, - LCU_WIDTH_C, - width_c); - uvg_pixels_blit( - lcu->rec.v, - nosplit_pixels.v, - width_c, - width_c, - LCU_WIDTH_C, - width_c); - } } @@ -619,31 +596,7 @@ static double search_intra_trdepth( uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type); } - - double cbf_bits = 0.0; - - // Add cost of cbf chroma bits on transform tree. - // All cbf bits are accumulated to pred_cu.cbf and cbf_is_set returns true - // if cbf is set at any level >= depth, so cbf chroma is assumed to be 0 - // if this and any previous transform block has no chroma coefficients. - // When searching the first block we don't actually know the real values, - // so this will code cbf as 0 and not code the cbf at all for descendants. - if (state->encoder_control->chroma_format != UVG_CSP_400) { - const uint8_t tr_depth = depth - pred_cu->depth; - cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac; - - cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_cb[0]); - if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { - CABAC_FBITS_UPDATE(cabac, ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U), cbf_bits, "cbf_cb"); - } - ctx = &(state->cabac.ctx.qt_cbf_model_cr[cbf_is_set(pred_cu->cbf, depth, COLOR_U)]); - if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { - CABAC_FBITS_UPDATE(cabac, ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V), cbf_bits, "cbf_cr"); - } - } - - double bits = cbf_bits; - split_cost += bits * state->lambda; + } else { assert(width <= TR_MAX_WIDTH); } @@ -652,17 +605,6 @@ static double search_intra_trdepth( return split_cost; } else { uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type); - - pred_cu->cbf = nosplit_cbf; - - // We only restore the pixel data and not coefficients or cbf data. - // The only thing we really need are the border pixels.uvg_intra_get_dir_luma_predictor - uvg_pixels_blit(nosplit_pixels.y, lcu->rec.y, width, width, width, LCU_WIDTH); - if (reconstruct_chroma) { - uvg_pixels_blit(nosplit_pixels.u, lcu->rec.u, width_c, width_c, width_c, LCU_WIDTH_C); - uvg_pixels_blit(nosplit_pixels.v, lcu->rec.v, width_c, width_c, width_c, LCU_WIDTH_C); - } - return nosplit_cost; } } diff --git a/src/transform.c b/src/transform.c index 86ff515b..2a532715 100644 --- a/src/transform.c +++ b/src/transform.c @@ -723,7 +723,7 @@ void uvg_chroma_transform_search( COEFF_ORDER_LINEAR); } if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) { - if(uvg_is_lfnst_allowed(state, pred_cu, width, height, 0, 0 , UVG_CHROMA_T, COLOR_UV, lcu)) { + if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc)) { const int lfnst_idx = pred_cu->cr_lfnst_idx; CABAC_FBITS_UPDATE( &state->search_cabac,