diff --git a/src/intra.c b/src/intra.c index e65acfea..5712b40a 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1665,12 +1665,15 @@ void uvg_intra_recon_cu( return; } if (search_data->pred_cu.intra.isp_mode != ISP_MODE_NO_ISP && recon_luma ) { + search_data->best_isp_cbfs = 0; // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; int part_dim = uvg_get_isp_split_dim(width, height, split_type); int limit = split_type == ISP_MODE_HOR ? height : width; + int split_num = 0; for (int part = 0; part < limit; part += part_dim) { + cbf_clear(&cur_cu->cbf, depth, COLOR_Y); const int part_x = split_type == ISP_MODE_HOR ? x : x + part; const int part_y = split_type == ISP_MODE_HOR ? y + part: y; const int part_w = split_type == ISP_MODE_HOR ? part_dim : width; @@ -1683,6 +1686,8 @@ void uvg_intra_recon_cu( uvg_quantize_lcu_residual(state, true, false, false, &loc, depth, cur_cu, lcu, false, tree_type); + search_data->best_isp_cbfs |= cbf_is_set(cur_cu->cbf, depth, COLOR_Y) << (split_num++); + } } const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP; @@ -1700,6 +1705,7 @@ void uvg_intra_recon_cu( intra_recon_tb_leaf(state, &loc, lcu, COLOR_V, search_data, tree_type); } + // TODO: not necessary to call if only luma and ISP is on uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3), search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma, &loc, depth, cur_cu, lcu, diff --git a/src/intra.h b/src/intra.h index 75b969b3..6fee8f1f 100644 --- a/src/intra.h +++ b/src/intra.h @@ -71,6 +71,7 @@ typedef struct { double coeff_bits; double distortion; double lfnst_costs[3]; + uint8_t best_isp_cbfs; } intra_search_data_t ; @@ -168,5 +169,6 @@ int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* l #define SPLIT_TYPE_HOR 1 #define SPLIT_TYPE_VER 2 +int uvg_get_isp_split_dim(const int width, const int height, const int split_type); bool uvg_can_use_isp(const int width, const int height, const int max_tr_size); bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp_mode, const enum uvg_tree_type tree_type); diff --git a/src/search.c b/src/search.c index c549fa39..b09a835f 100644 --- a/src/search.c +++ b/src/search.c @@ -306,7 +306,8 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, double uvg_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu) + lcu_t *const lcu, + uint8_t isp_cbf) { const int width = LCU_WIDTH >> depth; const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); @@ -328,29 +329,40 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, int offset = width / 2; double sum = 0; - sum += uvg_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += uvg_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + sum += uvg_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu, isp_cbf); + sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, isp_cbf); + sum += uvg_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, isp_cbf); + sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, isp_cbf); return sum + tr_tree_bits * state->lambda; } // Add transform_tree cbf_luma bit cost. - const int is_tr_split = tr_cu->tr_depth - tr_cu->depth; - int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); - if (pred_cu->type == CU_INTRA || + if (pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { + const int is_tr_split = tr_cu->tr_depth - tr_cu->depth; + int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); + if (pred_cu->type == CU_INTRA || is_tr_split || cbf_is_set(tr_cu->cbf, depth, COLOR_U) || cbf_is_set(tr_cu->cbf, depth, COLOR_V)) - { - cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_luma[0]); + { + cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]); - CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search"); + CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search"); + } + + if (is_set && state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size)) { + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, pred_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag"); + } } - - if (is_set && state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size)) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, pred_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag"); + else { + cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]); + // TODO: 8x4 CUs + for (int i = 0; i < 4; i++) { + if (i != 3 && isp_cbf != 0x8) { + CABAC_FBITS_UPDATE(cabac, ctx, (isp_cbf >> i) & 1, tr_tree_bits, "cbf_y_search"); + } + } } // SSD between reconstruction and original @@ -477,7 +489,8 @@ static double cu_rd_cost_tr_split_accurate( const int depth, const cu_info_t* const pred_cu, lcu_t* const lcu, - enum uvg_tree_type tree_type) { + enum uvg_tree_type tree_type, + uint8_t isp_cbf) { const int width = LCU_WIDTH >> depth; const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); @@ -522,25 +535,37 @@ static double cu_rd_cost_tr_split_accurate( int offset = LCU_WIDTH >> (depth + 1); double sum = 0; - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu, tree_type); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, tree_type); - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, tree_type); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, tree_type); + sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu, tree_type, isp_cbf); + sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, tree_type, isp_cbf); + sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, tree_type, isp_cbf); + sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, tree_type, isp_cbf); return sum + tr_tree_bits * state->lambda; } const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) && tree_type != UVG_CHROMA_T; + const bool is_isp = !(pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP); // Add transform_tree cbf_luma bit cost. - const int is_tr_split = depth - tr_cu->depth; - if ((pred_cu->type == CU_INTRA || - is_tr_split || - cb_flag_u || - cb_flag_v) + if (is_isp) { + const int is_tr_split = depth - tr_cu->depth; + if ((pred_cu->type == CU_INTRA || + is_tr_split || + cb_flag_u || + cb_flag_v) && !skip_residual_coding && tree_type != UVG_CHROMA_T) - { - cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]); + { + cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]); - CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search"); + CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search"); + } + } + else { + cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_luma[0]); + // TODO: 8x4 CUs + for (int i = 0; i < 4; i++) { + if (i != 3 && isp_cbf != 0x8) { + CABAC_FBITS_UPDATE(cabac, ctx, (isp_cbf >> i) & 1, tr_tree_bits, "cbf_y_search"); + } + } } if (cb_flag_y || cb_flag_u || cb_flag_v) { @@ -562,7 +587,7 @@ static double cu_rd_cost_tr_split_accurate( } // Chroma transform skip enable/disable is non-normative, so we need to count the chroma // tr-skip bits even when we are never using it. - const bool can_use_tr_skip = state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size); + const bool can_use_tr_skip = state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !is_isp; if(cb_flag_y){ if (can_use_tr_skip) { @@ -1111,7 +1136,7 @@ static double search_cu( cost = bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type); + cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0); if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) { cost = inter_zero_coeff_cost; @@ -1268,7 +1293,7 @@ static double search_cu( double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits; cost += mode_bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type); + cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0); memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac)); diff --git a/src/search.h b/src/search.h index 7566fb96..1a013670 100644 --- a/src/search.h +++ b/src/search.h @@ -87,7 +87,8 @@ void uvg_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, double uvg_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, - lcu_t *const lcu); + lcu_t *const lcu, + uint8_t isp_cbf); double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, cu_info_t *const pred_cu, diff --git a/src/search_inter.c b/src/search_inter.c index 11df3c8c..44164684 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -2237,7 +2237,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, int cbf = cbf_is_set_any(cur_cu->cbf, depth); if(cbf) { - *inter_cost = uvg_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu); + *inter_cost = uvg_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu, 0); if (reconstruct_chroma) { if (cur_cu->depth != cur_cu->tr_depth || !state->encoder_control->cfg.jccr) { *inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu); diff --git a/src/search_intra.c b/src/search_intra.c index 614f32d5..6e2fa36a 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -406,6 +406,7 @@ static double search_intra_trdepth( UVG_LUMA_T, true, false); + if (pred_cu->intra.isp_mode != ISP_MODE_NO_ISP && search_data->best_isp_cbfs == 0) continue; if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue; @@ -445,7 +446,8 @@ static double search_intra_trdepth( lcu_px.y, depth, pred_cu, - lcu); + lcu, + search_data->best_isp_cbfs); double transform_bits = 0; if (state->encoder_control->cfg.lfnst && depth == pred_cu->tr_depth && trafo != MTS_SKIP) { diff --git a/src/transform.c b/src/transform.c index 10031f7b..01f6289f 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1269,7 +1269,6 @@ static void quantize_tr_residual( } - // ISP_TODO: when other ISP things work, ask Joose about this cbf_clear(&cur_pu->cbf, depth, color); if (has_coeffs) { cbf_set(&cur_pu->cbf, depth, color);