diff --git a/src/rdo.c b/src/rdo.c index bad372a9..c9f2db05 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -301,7 +301,8 @@ static INLINE double get_coeff_cabac_cost( color_t color, int8_t scan_mode, int8_t tr_skip, - cu_info_t* cur_tu) + cu_info_t* cur_tu, + int coeff_order) { const int width = cu_loc->width; const int height = cu_loc->height; @@ -315,11 +316,20 @@ static INLINE double get_coeff_cabac_cost( // Make sure there are coeffs present bool found = false; + coeff_t* coeff_ptr = NULL; coeff_t sub_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; - uvg_get_sub_coeff(sub_coeff, coeff, x_local, y_local, sub_coeff_w, sub_coeff_h, lcu_width); + + if (coeff_order == COEFF_ORDER_LINEAR) { + coeff_ptr = coeff; + } + else { + // Coeff order CU + uvg_get_sub_coeff(sub_coeff, coeff, x_local, y_local, sub_coeff_w, sub_coeff_h, lcu_width); + coeff_ptr = sub_coeff; + } for (int i = 0; i < sub_coeff_w * sub_coeff_h; i++) { - if (sub_coeff[i] != 0) { + if (coeff_ptr[i] != 0) { found = 1; break; } @@ -342,7 +352,7 @@ static INLINE double get_coeff_cabac_cost( if(!tr_skip) { uvg_encode_coeff_nxn((encoder_state_t*) state, &cabac_copy, - sub_coeff, + coeff_ptr, cu_loc, color, scan_mode, @@ -352,7 +362,7 @@ static INLINE double get_coeff_cabac_cost( else { uvg_encode_ts_residual((encoder_state_t* const)state, &cabac_copy, - sub_coeff, + coeff_ptr, width, height, color, @@ -408,7 +418,8 @@ double uvg_get_coeff_cost( cu_loc_t *cu_loc, color_t color, int8_t scan_mode, - int8_t tr_skip) + int8_t tr_skip, + int coeff_order) { uint8_t save_cccs = state->encoder_control->cfg.fastrd_sampling_on; uint8_t check_accuracy = state->encoder_control->cfg.fastrd_accuracy_check_on; @@ -428,13 +439,13 @@ double uvg_get_coeff_cost( uint64_t weights = uvg_fast_coeff_get_weights(state); uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, height, weights); if (check_accuracy) { - double ccc = get_coeff_cabac_cost(state, coeff, cu_loc, color, scan_mode, tr_skip, cur_tu); + double ccc = get_coeff_cabac_cost(state, coeff, cu_loc, color, scan_mode, tr_skip, cur_tu, coeff_order); save_accuracy(state->qp, ccc, fast_cost); } return fast_cost; } } else { - double ccc = get_coeff_cabac_cost(state, coeff, cu_loc, color, scan_mode, tr_skip, cur_tu); + double ccc = get_coeff_cabac_cost(state, coeff, cu_loc, color, scan_mode, tr_skip, cur_tu, coeff_order); if (save_cccs) { save_ccc(state->qp, coeff, width * width, ccc); } diff --git a/src/rdo.h b/src/rdo.h index c9b88df3..eb9714f6 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -76,7 +76,8 @@ double uvg_get_coeff_cost( cu_loc_t *cu_loc, color_t color, int8_t scan_mode, - int8_t tr_skip); + int8_t tr_skip, + int coeff_order); int32_t uvg_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par, uint16_t abs_go_rice, uint32_t reg_bins, int8_t type, int use_limited_prefix_length); diff --git a/src/search.c b/src/search.c index 633f95f7..66eac299 100644 --- a/src/search.c +++ b/src/search.c @@ -393,9 +393,10 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, if (!skip_residual_coding) { int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { - const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; + //const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; + const coeff_t* coeffs = lcu->coeff.y; - coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); + coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); } else { int split_type = pred_cu->intra.isp_mode; @@ -408,9 +409,10 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, const int part_y = split_loc.y; // TODO: maybe just pass the cu_loc_t to these functions - const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, part_x, part_y)]; + //const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, part_x, part_y)]; + const coeff_t* coeffs = lcu->coeff.y; - coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, &split_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); + coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, &split_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); } } } @@ -501,14 +503,14 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, if (!skip_residual_coding) { int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); - const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); + //const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); if((pred_cu->joint_cb_cr & 3) == 0){ - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, &loc, 2, scan_order, 0); - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, &loc, 2, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU); } else { - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, &loc, 2, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU); } } @@ -638,9 +640,10 @@ static double cu_rd_cost_tr_split_accurate( } int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { - const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; + //const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; + const coeff_t* coeffs = lcu->coeff.y; - coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); + coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); } else { int split_type = pred_cu->intra.isp_mode; @@ -653,9 +656,10 @@ static double cu_rd_cost_tr_split_accurate( const int part_y = split_loc.y; // TODO: maybe just pass the cu_loc_t to these functions - const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, part_x, part_y)]; + //const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, part_x, part_y)]; + const coeff_t* coeffs = lcu->coeff.y; - coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, &split_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); + coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, &split_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); } } } @@ -687,7 +691,7 @@ static double cu_rd_cost_tr_split_accurate( const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1)); const int chroma_height = chroma_width; // TODO: height for non-square blocks int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); - const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); + //const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); const bool chroma_can_use_tr_skip = state->encoder_control->cfg.trskip_enable && chroma_width <= (1 << state->encoder_control->cfg.trskip_max_size); if(pred_cu->joint_cb_cr == 0) { @@ -707,8 +711,8 @@ static double cu_rd_cost_tr_split_accurate( if(chroma_can_use_tr_skip && cb_flag_v) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], tr_cu, &loc, COLOR_U, scan_order, tr_cu->tr_skip & 2); - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], tr_cu, &loc, COLOR_V, scan_order, tr_cu->tr_skip & 4); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); } else { @@ -725,7 +729,7 @@ static double cu_rd_cost_tr_split_accurate( if (chroma_can_use_tr_skip) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], tr_cu, &loc, COLOR_U, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); } } diff --git a/src/transform.c b/src/transform.c index e5a3dc82..30e3bd64 100644 --- a/src/transform.c +++ b/src/transform.c @@ -694,7 +694,8 @@ void uvg_chroma_transform_search( cu_loc, COLOR_U, scan_order, - transforms[i] == CHROMA_TS); + transforms[i] == CHROMA_TS, + COEFF_ORDER_LINEAR); u_bits += coeff_cost; } if (cbf_v && !IS_JCCR_MODE(transforms[i])) { @@ -710,7 +711,8 @@ void uvg_chroma_transform_search( cu_loc, COLOR_V, scan_order, - transforms[i] == CHROMA_TS); + transforms[i] == CHROMA_TS, + COEFF_ORDER_LINEAR); } if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) { if(uvg_is_lfnst_allowed(state, pred_cu, width, height, 0, 0 , UVG_CHROMA_T, COLOR_UV, lcu)) { diff --git a/src/transform.h b/src/transform.h index 69a9450f..78a2325a 100644 --- a/src/transform.h +++ b/src/transform.h @@ -47,6 +47,9 @@ extern const uint8_t uvg_g_chroma_scale[58]; extern const int16_t uvg_g_inv_quant_scales[6]; extern const int16_t uvg_g_quant_scales[6]; +#define COEFF_ORDER_LINEAR 0 +#define COEFF_ORDER_CU 1 + void uvg_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t width, int8_t height); void uvg_itransformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t width, int8_t height);