From 658d639f8ba0528f05885b82944ac431bab12daa Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Mon, 6 Jun 2022 12:10:31 +0300 Subject: [PATCH] [jccr] Chroma tr_search for inter --- src/context.c | 1 + src/search.c | 12 +----- src/search_inter.c | 94 ++++++++++++++++++++++++++++++++++++++++------ src/search_intra.c | 2 +- src/transform.c | 10 ++--- src/transform.h | 3 +- 6 files changed, 93 insertions(+), 29 deletions(-) diff --git a/src/context.c b/src/context.c index b05d579d..8e042cc2 100644 --- a/src/context.c +++ b/src/context.c @@ -471,6 +471,7 @@ void uvg_ctx_init(cabac_ctx_t *ctx, int32_t qp, int32_t init_value, uint8_t rate void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) { cabac_data_t * const cabac = &state->cabac; + memset(&state->cabac.ctx, 0, sizeof(state->cabac.ctx)); uint16_t i, ii; // Initialize contexts diff --git a/src/search.c b/src/search.c index 52911e2c..4289579a 100644 --- a/src/search.c +++ b/src/search.c @@ -1077,20 +1077,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, } uvg_quantize_lcu_residual(state, - true, has_chroma, - state->encoder_control->cfg.jccr, x, y, + true, has_chroma && !cur_cu->joint_cb_cr, + cur_cu->joint_cb_cr, x, y, depth, NULL, lcu, false); - if (cur_cu->depth == cur_cu->tr_depth && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr) { - uvg_select_jccr_mode(state, - x, y, - depth, - NULL, - lcu, - NULL); - } int cbf = cbf_is_set_any(cur_cu->cbf, depth); diff --git a/src/search_inter.c b/src/search_inter.c index bf340285..af122d8e 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -2101,7 +2101,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, const int width = LCU_WIDTH >> depth; cabac_data_t cabac_copy; memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); - cabac_copy.update = 1; + cabac_data_t* cabac = &state->search_cabac; + state->search_cabac.update = 1; cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); *cur_pu = *cur_cu; @@ -2128,21 +2129,90 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, const int skip_context = uvg_get_skip_context(x, y, lcu, NULL, NULL); if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost; - bits += uvg_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); + bits += uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu); } else { - no_cbf_bits = uvg_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); - bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 1); + no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu); + bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1); } double no_cbf_cost = ssd + no_cbf_bits * state->lambda; - uvg_quantize_lcu_residual(state, - true, reconstruct_chroma, - reconstruct_chroma && state->encoder_control->cfg.jccr, x, y, - depth, - cur_cu, - lcu, - false); + const int can_use_chroma_tr_skip = state->encoder_control->cfg.trskip_enable && + (1 << state->encoder_control->cfg.trskip_max_size) >= width && + state->encoder_control->cfg.chroma_trskip_enable; + + double chroma_cost = 0; + if((state->encoder_control->cfg.jccr || can_use_chroma_tr_skip) && cur_cu->depth == cur_cu->tr_depth && reconstruct_chroma) { + uvg_quantize_lcu_residual(state, + true, false,false, x, y, + depth, + cur_cu, + lcu, + false); + ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C]; + ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C]; + kvz_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width); + kvz_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width); + ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C]; + ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; + + uvg_generate_residual( + &lcu->ref.u[index], + u_pred, + u_resi, + width, + LCU_WIDTH_C, + width); + uvg_generate_residual( + &lcu->ref.v[index], + v_pred, + v_resi, + width, + LCU_WIDTH_C, + width); + + kvz_chorma_ts_out_t chorma_ts_out; + kvz_chroma_transform_search( + state, + depth, + lcu, + &cabac_copy, + width, + width, + index, + 0, + cur_cu, + u_pred, + v_pred, + u_resi, + v_resi, + &chorma_ts_out); + cbf_clear(&cur_cu->cbf, depth, COLOR_U); + cbf_clear(&cur_cu->cbf, depth, COLOR_V); + if (chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost < chorma_ts_out.best_combined_cost) { + cur_cu->joint_cb_cr = 0; + cur_cu->tr_skip |= (chorma_ts_out.best_u_index == CHROMA_TS) << COLOR_U; + cur_cu->tr_skip |= (chorma_ts_out.best_v_index == CHROMA_TS) << COLOR_V; + if(chorma_ts_out.best_u_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, depth, COLOR_U); + if(chorma_ts_out.best_v_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, depth, COLOR_V); + chroma_cost += chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost; + } + else { + cur_cu->joint_cb_cr = chorma_ts_out.best_combined_index; + if (chorma_ts_out.best_combined_index & 2) cbf_set(&cur_cu->cbf, depth, COLOR_U); + if (chorma_ts_out.best_combined_index & 1) cbf_set(&cur_cu->cbf, depth, COLOR_V); + chroma_cost += chorma_ts_out.best_combined_cost; + } + } + else { + uvg_quantize_lcu_residual(state, + true, reconstruct_chroma, + reconstruct_chroma && state->encoder_control->cfg.jccr, x, y, + depth, + cur_cu, + lcu, + false); + } int cbf = cbf_is_set_any(cur_cu->cbf, depth); @@ -2153,7 +2223,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, *inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu); } else { - uvg_select_jccr_mode(state, x_px, y_px, depth, cur_cu, lcu, inter_cost); + *inter_cost += chroma_cost; } } } diff --git a/src/search_intra.c b/src/search_intra.c index f977b6b5..04935328 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -1436,7 +1436,7 @@ int8_t uvg_search_intra_chroma_rdo( state, depth, lcu, - temp_cabac, + &temp_cabac, width, height, offset, diff --git a/src/transform.c b/src/transform.c index 43620918..468a7532 100644 --- a/src/transform.c +++ b/src/transform.c @@ -256,7 +256,7 @@ static void generate_jccr_transforms( int64_t d1 = 0; int64_t d2 = 0; const int cbf_mask = jccr * (state->frame->jccr_sign ? -1 : 1); - int16_t* current_resi = &temp_resi[(jccr - 1) * trans_offset]; + int16_t* current_resi = &temp_resi[MAX((jccr - 1) , 0) * trans_offset]; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) @@ -428,7 +428,7 @@ void kvz_chroma_transform_search( encoder_state_t* const state, int depth, lcu_t* const lcu, - cabac_data_t temp_cabac, + cabac_data_t* temp_cabac, int8_t width, int8_t height, const int offset, @@ -634,11 +634,11 @@ void kvz_chroma_transform_search( double v_cost = KVZ_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda; if (u_cost < chorma_ts_out->best_u_cost) { chorma_ts_out->best_u_cost = u_cost; - chorma_ts_out->best_u_index = transforms[i]; + chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL; } if (v_cost < chorma_ts_out->best_v_cost) { chorma_ts_out->best_v_cost = v_cost; - chorma_ts_out->best_v_index = transforms[i]; + chorma_ts_out->best_v_index = v_has_coeffs ? transforms[i] : NO_RESIDUAL; } } else { @@ -648,7 +648,7 @@ void kvz_chroma_transform_search( chorma_ts_out->best_combined_index = transforms[i]; } } - memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); + memcpy(&state->search_cabac, temp_cabac, sizeof(cabac_data_t)); } } diff --git a/src/transform.h b/src/transform.h index 69aeebe5..7ccc7146 100644 --- a/src/transform.h +++ b/src/transform.h @@ -92,7 +92,7 @@ void kvz_chroma_transform_search( encoder_state_t* const state, int depth, lcu_t* const lcu, - cabac_data_t temp_cabac, + cabac_data_t* temp_cabac, int8_t width, int8_t height, const int offset, @@ -107,6 +107,7 @@ void kvz_chroma_transform_search( enum kvz_chroma_transforms { DCT7_CHROMA = 0, CHROMA_TS = 4, + NO_RESIDUAL = 8, JCCR_1 = 1, JCCR_2 = 2, JCCR_3 = 3,