diff --git a/src/dep_quant.c b/src/dep_quant.c index cd461815..80a5c179 100644 --- a/src/dep_quant.c +++ b/src/dep_quant.c @@ -450,7 +450,7 @@ static void xSetLastCoeffOffset( bool lastCbfIsInferred = false; bool useIntraSubPartitions = cur_tu->type == CU_INTRA && cur_tu->intra.isp_mode && compID == COLOR_Y; if (useIntraSubPartitions) { - uint32_t nTus = uvg_get_isp_split_num(width, height, cur_tu->intra.isp_mode, true); + uint32_t nTus = uvg_get_isp_split_num(1 << cur_tu->log2_width, 1 << cur_tu->log2_height, cur_tu->intra.isp_mode, true); bool isLastSubPartition = cur_tu->intra.isp_index +1 == nTus; //TODO: isp check if (isLastSubPartition) { lastCbfIsInferred = cur_tu->intra.isp_cbfs == 0; @@ -479,7 +479,7 @@ static void xSetLastCoeffOffset( } -static const unsigned prefixCtx[] = {0, 0, 0, 3, 6, 10, 15, 21}; + static const unsigned prefixCtx[] = {0, 0, 0, 3, 6, 10, 15, 21}; uint32_t ctxBits[14]; for (unsigned xy = 0; xy < 2; xy++) { int32_t bitOffset = (xy ? cbfDeltaBits : 0); @@ -1143,7 +1143,7 @@ int uvg_dep_quant( *absSum = 0; const bool is_mts = compID == COLOR_Y && cur_tu->tr_idx > MTS_SKIP; - const bool is_ts = cur_tu->tr_skip >> compID & 1; + const bool is_ts = (cur_tu->tr_skip >> compID) & 1; const uint32_t log2_tr_width = uvg_g_convert_to_log2[width]; const uint32_t log2_tr_height = uvg_g_convert_to_log2[height]; diff --git a/src/intra.c b/src/intra.c index d3241b34..026254e1 100644 --- a/src/intra.c +++ b/src/intra.c @@ -37,8 +37,10 @@ #include "image.h" #include "uvg_math.h" #include "mip_data.h" +#include "rdo.h" #include "search.h" #include "search_intra.h" +#include "strategies-picture.h" #include "strategies/strategies-intra.h" #include "tables.h" #include "transform.h" @@ -1693,6 +1695,8 @@ int8_t uvg_get_co_located_luma_mode( } + + /** * \brief Returns ISP split partition size based on block dimensions and split type. * @@ -1788,8 +1792,6 @@ static void intra_recon_tb_leaf( const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; - int log2_width = uvg_g_convert_to_log2[width]; - int log2_height = uvg_g_convert_to_log2[height]; const int lcu_width = LCU_WIDTH >> shift; @@ -2026,3 +2028,58 @@ bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp } return true; } + + +double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, + const cu_loc_t* const cu_loc, + double cost_treshold, + intra_search_data_t* const search_data, + lcu_t* const lcu) { + assert(state->search_cabac.update && "ISP reconstruction must be done with CABAC update"); + double cost = 0; + + const int width = cu_loc->width; + const int height = cu_loc->height; + + search_data->best_isp_cbfs = 0; + // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. + // Small blocks are split only twice. + int split_type = search_data->pred_cu.intra.isp_mode; + int split_limit = uvg_get_isp_split_num(width, height, split_type, true); + + int cbf_context = 2; + + for (int i = 0; i < split_limit; ++i) { + cu_loc_t tu_loc; + uvg_get_isp_split_loc(&tu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true); + cu_loc_t pu_loc; + uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false); + search_data->pred_cu.intra.isp_index = 0; + if (tu_loc.x % 4 == 0) { + intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, UVG_LUMA_T); + } + uvg_quantize_lcu_residual(state, true, false, false, + &tu_loc, &search_data->pred_cu, lcu, + false, UVG_LUMA_T); + + int index = cu_loc->local_y * LCU_WIDTH + cu_loc->local_x; + int ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], + LCU_WIDTH, LCU_WIDTH, + tu_loc.width, tu_loc.height); + double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, NULL, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU); + + + int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y); + if (i + 1 != split_limit && search_data->best_isp_cbfs != 0) { + CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon"); + } + cost += ssd + coeff_bits * state->lambda; + + cbf_context = 2 + cbf; + + search_data->best_isp_cbfs |= cbf << i; + search_data->pred_cu.intra.isp_cbfs = search_data->best_isp_cbfs; + + } + return cost; +} \ No newline at end of file diff --git a/src/intra.h b/src/intra.h index 515abc85..71de9a6a 100644 --- a/src/intra.h +++ b/src/intra.h @@ -152,6 +152,12 @@ void uvg_intra_recon_cu( bool recon_luma, bool recon_chroma); +double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, + const cu_loc_t* const cu_loc, + double cost_treshold, + intra_search_data_t* const search_data, + lcu_t* const lcu); + int8_t uvg_get_co_located_luma_mode( const cu_loc_t* const chroma_loc, const cu_loc_t* const cu_loc, diff --git a/src/search.c b/src/search.c index b8bb7a63..40cc012a 100644 --- a/src/search.c +++ b/src/search.c @@ -1364,12 +1364,27 @@ static double search_cu( } #endif if (state->encoder_control->cfg.cclm && tree_type != UVG_CHROMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { - uvg_intra_recon_cu(state, - &intra_search, cu_loc, - &intra_search.pred_cu, lcu, - tree_type, - true, - false); + if(intra_search.pred_cu.intra.isp_mode == ISP_MODE_NO_ISP) { + uvg_intra_recon_cu(state, + &intra_search, cu_loc, + &intra_search.pred_cu, lcu, + tree_type, + true, + false); + } + else { + cabac_data_t temp_cabac; + memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t)); + state->search_cabac.update = 1; + uvg_recon_and_estimate_cost_isp( + state, + cu_loc, + 0, + &intra_search, + lcu + ); + memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); + } downsample_cclm_rec( state, x, y, cu_width / 2, cu_height / 2, lcu->rec.y, lcu->left_ref.y[64] @@ -1461,7 +1476,7 @@ static double search_cu( if (cur_cu->type == CU_INTRA) { bool recon_chroma = true; - bool recon_luma = tree_type != UVG_CHROMA_T; + bool recon_luma = tree_type != UVG_CHROMA_T && cur_cu->intra.isp_mode == ISP_MODE_NO_ISP; if (is_separate_tree || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T || cu_loc->chroma_height % 4 == 2) { recon_chroma = false; } @@ -1471,6 +1486,15 @@ static double search_cu( NULL, lcu, tree_type, recon_luma, recon_chroma); + if (!state->encoder_control->cfg.cclm && cur_cu->intra.isp_mode != ISP_MODE_NO_ISP) { + uvg_recon_and_estimate_cost_isp( + state, + cu_loc, + 0, + &intra_search, + lcu + ); + } if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) diff --git a/src/search_intra.c b/src/search_intra.c index 8067772a..6a488952 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -264,6 +264,7 @@ static void derive_mts_constraints(cu_info_t *const pred_cu, } + /** * \brief Perform search for best intra transform split configuration. * @@ -353,14 +354,14 @@ static double search_intra_trdepth( } int start_idx = 0; - int end_idx = state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && + int end_lfnst_idx = state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && uvg_can_use_isp_with_lfnst(width, height, pred_cu->intra.isp_mode, tree_type) ? max_lfnst_idx : 0; - for (int i = start_idx; i < end_idx + 1; ++i) { + for (int i = start_idx; i < end_lfnst_idx + 1; ++i) { search_data->lfnst_costs[i] = MAX_DOUBLE; } - for (int lfnst_idx = start_idx; lfnst_idx <= end_idx; lfnst_idx++) { + for (int lfnst_idx = start_idx; lfnst_idx <= end_lfnst_idx; lfnst_idx++) { // Initialize lfnst variables pred_cu->lfnst_idx = lfnst_idx; pred_cu->violates_lfnst_constrained_luma = false; @@ -391,21 +392,32 @@ static double search_intra_trdepth( continue; } - if (!has_been_split) { + if (!has_been_split && (lfnst_idx != 0 || trafo != 0)) { memcpy(&state->search_cabac, &cabac_data, sizeof(cabac_data)); state->search_cabac.update = 1; } - - uvg_intra_recon_cu( - state, - search_data, - cu_loc, - pred_cu, - lcu, - UVG_LUMA_T, - true, - false + double rd_cost; + if (pred_cu->intra.isp_mode != ISP_MODE_NO_ISP) { + rd_cost = uvg_recon_and_estimate_cost_isp( + state, + cu_loc, + cost_treshold, + search_data, + lcu ); + } + else { + uvg_intra_recon_cu( + state, + search_data, + cu_loc, + pred_cu, + lcu, + UVG_LUMA_T, + true, + false + ); + } if (pred_cu->intra.isp_mode != ISP_MODE_NO_ISP && search_data->best_isp_cbfs == 0) continue; if (trafo != 0 && !cbf_is_set(pred_cu->cbf, COLOR_Y)) continue; @@ -417,13 +429,8 @@ static double search_intra_trdepth( continue; } } - - const unsigned scan_offset = xy_to_zorder( - LCU_WIDTH, - lcu_px.x, - lcu_px.y); - - if (trafo != MTS_SKIP && end_idx != 0) { + + if (trafo != MTS_SKIP && end_lfnst_idx != 0) { uvg_derive_lfnst_constraints( pred_cu, constraints, @@ -434,22 +441,25 @@ static double search_intra_trdepth( COLOR_Y); } - if (!constraints[1] && cbf_is_set(pred_cu->cbf, COLOR_Y)) { + if (!constraints[1] && (cbf_is_set(pred_cu->cbf, COLOR_Y) || pred_cu->intra.isp_mode != ISP_MODE_NO_ISP)) { //end_idx = 0; if (pred_cu->lfnst_idx > 0) { continue; } } - - double rd_cost = uvg_cu_rd_cost_luma( - state, - cu_loc, - pred_cu, - lcu, - search_data->best_isp_cbfs); + + + if (pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { + rd_cost = uvg_cu_rd_cost_luma( + state, + cu_loc, + pred_cu, + lcu, + search_data->best_isp_cbfs); + } double transform_bits = 0; if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && - trafo != MTS_SKIP) { + trafo != MTS_SKIP && end_lfnst_idx != 0) { if (!constraints[0] && constraints[1]) { transform_bits += CTX_ENTROPY_FBITS( &state->search_cabac.ctx.lfnst_idx_model[tree_type == UVG_LUMA_T], @@ -462,9 +472,9 @@ static double search_intra_trdepth( } } if (num_transforms > 2 && trafo != MTS_SKIP && width <= 32 - /*&& height <= 32*/ + && height <= 32 && !pred_cu->violates_mts_coeff_constraint && pred_cu-> - mts_last_scan_pos && lfnst_idx == 0) { + mts_last_scan_pos) { bool symbol = trafo != 0; int ctx_idx = 0; @@ -1320,12 +1330,12 @@ static int8_t search_intra_rdo( can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL double best_isp_cost = MAX_DOUBLE; double best_bits = MAX_DOUBLE; - int8_t best_isp_mode = -1; + int8_t best_isp_mode = 0; int max_isp_modes = can_do_isp_search && uvg_can_use_isp(width, height) && state->encoder_control->cfg.isp ? NUM_ISP_MODES : 1; // - int best_mts_mode_for_isp[NUM_ISP_MODES] = {0}; - int best_lfnst_mode_for_isp[NUM_ISP_MODES] = {0}; + uint8_t best_mts_mode_for_isp[NUM_ISP_MODES] = {0}; + uint8_t best_lfnst_mode_for_isp[NUM_ISP_MODES] = {0}; for (int isp_mode = 0; isp_mode < max_isp_modes; ++isp_mode) { @@ -1353,6 +1363,7 @@ static int8_t search_intra_rdo( search_data[mode].bits = best_bits; search_data[mode].pred_cu.intra.isp_mode = best_isp_mode; search_data[mode].pred_cu.tr_idx = best_mts_mode_for_isp[best_isp_mode]; + search_data[mode].pred_cu.tr_skip = best_mts_mode_for_isp[best_isp_mode] == MTS_SKIP; search_data[mode].pred_cu.lfnst_idx = best_lfnst_mode_for_isp[best_isp_mode]; }