diff --git a/src/cu.c b/src/cu.c index 3a0f03fa..aedf341c 100644 --- a/src/cu.c +++ b/src/cu.c @@ -306,6 +306,8 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height) loc->x = x; loc->y = y; + loc->local_x = x % LCU_WIDTH; + loc->local_y = y % LCU_WIDTH; loc->width = width; loc->height = height; // TODO: when MTT is implemented, chroma dimensions can be minimum 2. diff --git a/src/cu.h b/src/cu.h index 1d49d347..dfad7861 100644 --- a/src/cu.h +++ b/src/cu.h @@ -119,7 +119,7 @@ typedef struct { uint8_t type : 3; //!< \brief block type, one of cu_type_t values uint8_t depth : 3; //!< \brief depth / size of this block - uint8_t tr_depth : 3; //!< \brief transform depth + uint8_t tr_depth ; //!< \brief transform depth uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped uint8_t merged : 1; //!< \brief flag to indicate this block is merged uint8_t merge_idx : 3; //!< \brief merge index @@ -129,6 +129,8 @@ typedef struct uint16_t cbf; + uint32_t split_tree : 3 * 9; + /** * \brief QP used for the CU. * @@ -170,6 +172,8 @@ typedef struct typedef struct { int16_t x; int16_t y; + uint8_t local_x; + uint8_t local_y; int8_t width; int8_t height; int8_t chroma_width; diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 6f6fc9d8..fcb6d308 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -660,7 +660,7 @@ static void encode_transform_coeff( bool last_split, bool can_skip_last_cbf, int *luma_cbf_ctx, // Always true except when writing sub partition coeffs (ISP) - cu_loc_t *original_loc) // Original dimensions before ISP split + const cu_loc_t * const original_loc) // Original dimensions before ISP split { cabac_data_t * const cabac = &state->cabac; int x = cu_loc->x; @@ -829,7 +829,6 @@ int uvg_encode_inter_prediction_unit( encoder_state_t * const state, cabac_data_t * const cabac, const cu_info_t * const cur_cu, - int depth, lcu_t* lcu, double* bits_out, const cu_loc_t* const cu_loc) @@ -867,7 +866,7 @@ int uvg_encode_inter_prediction_unit( // Code Inter Dir uint8_t inter_dir = cur_cu->inter.mv_dir; - if ((LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4 + if (cu_loc->width + cu_loc->height > 12) { // ToDo: limit on 4x8/8x4 uint32_t inter_dir_ctx = (7 - ((uvg_math_floor_log2(cu_loc->width) + uvg_math_floor_log2(cu_loc->height) + 1) >> 1)); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[inter_dir_ctx]), (inter_dir == 3), bits, "inter_pred_idc"); @@ -1038,10 +1037,13 @@ static void encode_chroma_intra_cu( else if (cabac->only_count && bits_out)*bits_out += bits; } -void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, - cabac_data_t * const cabac, - const cu_info_t * const cur_cu, - int x, int y, int depth, const lcu_t* lcu, double* bits_out) +void uvg_encode_intra_luma_coding_unit( + const encoder_state_t * const state, + cabac_data_t * const cabac, + const cu_info_t * const cur_cu, + const cu_loc_t* const cu_loc, + const lcu_t* lcu, + double* bits_out) { const videoframe_t * const frame = state->tile->frame; uint8_t intra_pred_mode_actual; @@ -1053,6 +1055,9 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, uint32_t flag; double bits = 0; + const int x = cu_loc->x; + const int y = cu_loc->y; + /* if ((cur_cu->type == CU_INTRA && (LCU_WIDTH >> cur_cu->depth <= 32))) { cabac->cur_ctx = &(cabac->ctx.bdpcm_mode[0]); @@ -1076,8 +1081,8 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, } */ - uint32_t width = (LCU_WIDTH >> depth); - uint32_t height = (LCU_WIDTH >> depth); // TODO: height for non-square blocks + uint32_t width = cu_loc->width; + uint32_t height = cu_loc->height; // TODO: height for non-square blocks // Code MIP related bits bool enable_mip = state->encoder_control->cfg.mip; @@ -1102,9 +1107,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, } if (cur_cu->type == CU_INTRA && !cur_cu->bdpcmMode && enable_mip) { - const int cu_width = LCU_WIDTH >> depth; - const int cu_height = cu_width; // TODO: height for non-square blocks - uint8_t ctx_id = uvg_get_mip_flag_context(x, y, cu_width, cu_height, lcu, lcu ? NULL : frame->cu_array); + uint8_t ctx_id = uvg_get_mip_flag_context(cu_loc, lcu, lcu ? NULL : frame->cu_array); // Write MIP flag CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mip_flag[ctx_id]), mip_flag, bits, "mip_flag"); @@ -1149,8 +1152,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.intra_subpart_model[1]), isp_mode - 1, bits, "intra_subpartitions_split_type"); // Vertical or horizontal split } } - - const int cu_width = LCU_WIDTH >> depth; + // PREDINFO CODING // If intra prediction mode is found from the predictors, // it can be signaled with two EP's. Otherwise we can send @@ -1165,7 +1167,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, if (x > 0) { assert(x >> 2 > 0); const int x_scu = SUB_SCU(x) - 1; - const int y_scu = SUB_SCU(y + cu_width - 1); + const int y_scu = SUB_SCU(y + height - 1); left_pu = lcu ? LCU_GET_CU_AT_PX( lcu, @@ -1174,7 +1176,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, uvg_cu_array_at_const( frame->cu_array, x - 1, - y + cu_width - 1); + y + height - 1); } // Don't take the above PU across the LCU boundary. if (y % LCU_WIDTH > 0 && y > 0) { @@ -1182,11 +1184,11 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, above_pu = lcu ? LCU_GET_CU_AT_PX( lcu, - SUB_SCU(x + cu_width - 1), + SUB_SCU(x + width - 1), SUB_SCU(y) - 1) : uvg_cu_array_at_const( frame->cu_array, - x + cu_width - 1, + x + width - 1, y - 1); } @@ -1405,28 +1407,25 @@ bool uvg_write_split_flag( void uvg_encode_coding_tree( encoder_state_t * const state, - uint16_t x, - uint16_t y, - uint8_t depth, lcu_coeff_t *coeff, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc, + const split_tree_t split_tree) { cabac_data_t * const cabac = &state->cabac; const encoder_control_t * const ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; - const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, x, y); - - const int width = LCU_WIDTH >> depth; - const int height = width; // TODO: height for non-square blocks - cu_loc_t cu_loc; - uvg_cu_loc_ctor(&cu_loc, x, y, width, height); - - const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc.width : cu_loc.chroma_width; - const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc.height : cu_loc.chroma_height; + const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, cu_loc->x, cu_loc->y); + + const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; + const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; const int half_cu = cu_width >> 1; - + const int x = cu_loc->x; + const int y = cu_loc->y; + + const int depth = split_tree.current_depth; const cu_info_t *left_cu = NULL; if (x > 0) { @@ -1458,33 +1457,33 @@ void uvg_encode_coding_tree( // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (depth != MAX_DEPTH && !(tree_type == UVG_CHROMA_T && depth == MAX_DEPTH -1)) { - const int split_flag = uvg_write_split_flag(state, cabac, left_cu, above_cu, GET_SPLITDATA(cur_cu, depth), depth, cu_width, x, y, tree_type,NULL); + const int split_flag = uvg_write_split_flag(state, cabac, left_cu, above_cu, (cur_cu->split_tree >> (split_tree.current_depth * 3)) & 7, depth, cu_width, x, y, tree_type,NULL); if (split_flag || border) { + const int half_luma = cu_loc->width / 2; + split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 }; + + cu_loc_t new_cu_loc; + uvg_cu_loc_ctor(&new_cu_loc, x, y, half_luma, half_luma); // Split blocks and remember to change x and y block positions - uvg_encode_coding_tree(state, x, y, depth + 1, coeff, tree_type); + uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); if (!border_x || border_split_x) { - uvg_encode_coding_tree(state, x + half_cu, y, depth + 1, coeff, tree_type); + uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_luma, half_luma); + uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); } if (!border_y || border_split_y) { - uvg_encode_coding_tree(state, x, y + half_cu, depth + 1, coeff, tree_type); + uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_luma, half_luma); + uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); } if (!border || (border_split_x && border_split_y)) { - uvg_encode_coding_tree(state, x + half_cu, y + half_cu, depth + 1, coeff, tree_type); + uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_luma, half_luma); + uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree); } return; } } - - //ToDo: check if we can actually split - //ToDo: Implement MT split - if (depth < MAX_PU_DEPTH) - { - // cabac->cur_ctx = &(cabac->ctx.trans_subdiv_model[5 - ((uvg_g_convert_to_bit[LCU_WIDTH] + 2) - depth)]); - // CABAC_BIN(cabac, 0, "split_transform_flag"); - } - + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1); if (ctrl->cfg.lossless) { @@ -1519,8 +1518,8 @@ void uvg_encode_coding_tree( cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]); CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag"); } - DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu); - uvg_hmvp_add_mv(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu); + DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_height, cur_cu); + uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu); int16_t num_cand = state->encoder_control->cfg.max_merge; if (num_cand > 1) { for (int ui = 0; ui < num_cand - 1; ui++) { @@ -1555,7 +1554,7 @@ void uvg_encode_coding_tree( CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag"); } - if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) { + if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4 && cu_height != 4) { int8_t ctx_predmode = 0; @@ -1629,11 +1628,11 @@ void uvg_encode_coding_tree( bool non_zero_mvd = false; // TODO: height for non-square blocks - const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, cu_loc.x, cu_loc.y); + const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, cu_loc->x, cu_loc->y); - non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, depth, NULL, NULL, &cu_loc); - DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu); - uvg_hmvp_add_mv(state, x, y, width, height, cur_pu); + non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, NULL, NULL, cu_loc); + DBG_PRINT_MV(state, x, y, cu_width, cu_height, cur_pu); + uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_pu); // imv mode, select between fullpel, half-pel and quarter-pel resolutions @@ -1662,7 +1661,7 @@ void uvg_encode_coding_tree( // Code (possible) coeffs to bitstream if (cbf) { int luma_cbf_ctx = 0; - encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 0, coeff, tree_type, true, false, &luma_cbf_ctx, &cu_loc); + encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 0, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc); } encode_mts_idx(state, cabac, cur_cu); @@ -1670,7 +1669,7 @@ void uvg_encode_coding_tree( } } else if (cur_cu->type == CU_INTRA) { if(tree_type != UVG_CHROMA_T) { - uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, x, y, depth, NULL, NULL); + uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, NULL, NULL); } // Code chroma prediction mode. @@ -1694,7 +1693,7 @@ void uvg_encode_coding_tree( // Check if last split to write chroma bool last_split = (i + 1) == split_limit; - encode_transform_coeff(state, &split_loc, depth, 0, 0, 0, 0, coeff, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, &cu_loc); + encode_transform_coeff(state, &split_loc, depth, 0, 0, 0, 0, coeff, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, cu_loc); can_skip_last_cbf &= luma_cbf_ctx == 2; } } @@ -1714,7 +1713,7 @@ void uvg_encode_coding_tree( tmp->violates_lfnst_constrained_luma = false; tmp->violates_lfnst_constrained_chroma = false; tmp->lfnst_last_scan_pos = false; - encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 1, coeff, tree_type, true, false, &luma_cbf_ctx, &cu_loc); + encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 1, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc); // Write LFNST only once for single tree structure encode_lfnst_idx(state, cabac, tmp, x, y, depth, cu_width, cu_height, tree_type, COLOR_UV); } @@ -1843,7 +1842,7 @@ double uvg_mock_encode_coding_unit( if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { const uint8_t imv_mode = UVG_IMV_OFF; - const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, depth, lcu, &bits, cu_loc); + const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, lcu, &bits, cu_loc); if (ctrl->cfg.amvr && non_zero_mvd) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.imv_flag[0]), imv_mode, bits, "imv_flag"); if (imv_mode > UVG_IMV_OFF) { @@ -1856,7 +1855,7 @@ double uvg_mock_encode_coding_unit( } else if (cur_cu->type == CU_INTRA) { if(tree_type != UVG_CHROMA_T) { - uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, x, y, depth, lcu, &bits); + uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, lcu, &bits); } if((depth != 4 || (x % 8 != 0 && y % 8 != 0)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, &bits); diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 231e22ff..6c0c2cd1 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -54,11 +54,10 @@ bool uvg_is_lfnst_allowed( void uvg_encode_coding_tree( encoder_state_t * const state, - uint16_t x_ctb, - uint16_t y_ctb, - uint8_t depth, lcu_coeff_t *coeff, - enum uvg_tree_type tree_type); + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc, + const split_tree_t split_tree); void uvg_encode_ts_residual(encoder_state_t* const state, cabac_data_t* const cabac, @@ -87,15 +86,17 @@ int uvg_encode_inter_prediction_unit( encoder_state_t* const state, cabac_data_t* const cabac, const cu_info_t* const cur_cu, - int depth, lcu_t* lcu, double* bits_out, const cu_loc_t* const cu_loc); -void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state, +void uvg_encode_intra_luma_coding_unit( + const encoder_state_t* const state, cabac_data_t* const cabac, const cu_info_t* const cur_cu, - int x, int y, int depth, const lcu_t* lcu, double* bits_out); + const cu_loc_t* const cu_loc, + const lcu_t* lcu, + double* bits_out); bool uvg_write_split_flag( diff --git a/src/encoderstate.c b/src/encoderstate.c index cdadccf4..dc3416e3 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -870,10 +870,15 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque) enum uvg_tree_type tree_type = state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T; //Encode coding tree - uvg_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0, lcu->coeff, tree_type); + cu_loc_t start; + uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH); + split_tree_t split_tree = { 0, 0 }; + + uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, split_tree); if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { - uvg_encode_coding_tree(state, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, 0, lcu->coeff, UVG_CHROMA_T); + uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, LCU_WIDTH, LCU_WIDTH); + uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, split_tree); } if (!state->cabac.only_count) { diff --git a/src/intra.c b/src/intra.c index 75f0c3a4..764ac072 100644 --- a/src/intra.c +++ b/src/intra.c @@ -585,12 +585,18 @@ static void predict_cclm( } -int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a) { +uint8_t uvg_get_mip_flag_context( + const cu_loc_t* const cu_loc, + const lcu_t* lcu, + cu_array_t* const cu_a) { assert(!(lcu && cu_a)); - if (width > 2 * height || height > 2 * width) { + if (cu_loc->width > 2 * cu_loc->height || cu_loc->height > 2 * cu_loc->width) { return 3; } - + + const int x = cu_loc->x; + const int y = cu_loc->y; + int context = 0; const cu_info_t* left = NULL; const cu_info_t* top = NULL; @@ -1761,26 +1767,26 @@ static void intra_recon_tb_leaf( */ void uvg_intra_recon_cu( encoder_state_t* const state, - int x, - int y, - int depth, intra_search_data_t* search_data, + const cu_loc_t* cu_loc, cu_info_t *cur_cu, lcu_t *lcu, enum uvg_tree_type tree_type, bool recon_luma, bool recon_chroma) { - const vector2d_t lcu_px = { SUB_SCU(x) >> (tree_type == UVG_CHROMA_T), SUB_SCU(y) >> (tree_type == UVG_CHROMA_T) }; - const int8_t width = LCU_WIDTH >> depth; - const int8_t height = width; // TODO: height for non-square blocks. + const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; + const vector2d_t lcu_px = { cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T) }; + const int8_t width = cu_loc->width; + const int8_t height = cu_loc->height; // TODO: height for non-square blocks. if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } + cu_loc_t chroma_cu_loc; if(!recon_luma && recon_chroma) { - x &= ~7; - y &= ~7; + uvg_cu_loc_ctor(&chroma_cu_loc, cu_loc->x & ~7, cu_loc->y & ~7, width, height); + cu_loc = &chroma_cu_loc; } // Reset CBFs because CBFs might have been set @@ -1793,22 +1799,25 @@ void uvg_intra_recon_cu( cbf_clear(&cur_cu->cbf, depth, COLOR_V); } - if (depth == 0 || cur_cu->tr_depth > depth) { + if (width > TR_MAX_WIDTH || height > TR_MAX_WIDTH) { + cu_loc_t split_cu_loc; - const int offset = width / 2; - const int32_t x2 = x + offset; - const int32_t y2 = y + offset; - - uvg_intra_recon_cu(state, x, y, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma); - uvg_intra_recon_cu(state, x2, y, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma); - uvg_intra_recon_cu(state, x, y2, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma); - uvg_intra_recon_cu(state, x2, y2, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma); + const int half_width = width / 2; + const int half_height = height / 2; + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); + uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); + uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); + uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); + uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma); // Propagate coded block flags from child CUs to parent CU. uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), lcu_px.y >> (tree_type == UVG_CHROMA_T))->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf, - LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf, + LCU_GET_CU_AT_PX(lcu, (lcu_px.x + half_width) >> (tree_type == UVG_CHROMA_T), lcu_px.y >> (tree_type == UVG_CHROMA_T))->cbf, + LCU_GET_CU_AT_PX(lcu, lcu_px.x >> (tree_type == UVG_CHROMA_T), (lcu_px.y + half_height) >> (tree_type == UVG_CHROMA_T))->cbf, + LCU_GET_CU_AT_PX(lcu, (lcu_px.x + half_width) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + half_height) >> (tree_type == UVG_CHROMA_T))->cbf, }; if (recon_luma && depth <= MAX_DEPTH) { @@ -1826,8 +1835,6 @@ void uvg_intra_recon_cu( // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; int split_limit = uvg_get_isp_split_num(width, height, split_type, true); - cu_loc_t origin_cu; - uvg_cu_loc_ctor(&origin_cu, x, y, width, height); for (int i = 0; i < split_limit; ++i) { cu_loc_t tu_loc; @@ -1845,24 +1852,21 @@ void uvg_intra_recon_cu( } } const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP; - const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0); - - cu_loc_t loc; - uvg_cu_loc_ctor(&loc, x, y, width, height); - + const bool has_chroma = recon_chroma && (cu_loc->x % 8 == 0 && cu_loc->y % 8 == 0); + // Process a leaf TU. if (has_luma) { - intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_Y, search_data, tree_type); + intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type); } if (has_chroma) { - intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_U, search_data, tree_type); - intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_V, search_data, tree_type); + intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data, tree_type); + intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data, tree_type); } // TODO: not necessary to call if only luma and ISP is on uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3), search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma, - &loc, depth, cur_cu, lcu, + cu_loc, depth, cur_cu, lcu, false, tree_type); } diff --git a/src/intra.h b/src/intra.h index c4bdc87e..deeb173d 100644 --- a/src/intra.h +++ b/src/intra.h @@ -142,10 +142,8 @@ void uvg_intra_predict( void uvg_intra_recon_cu( encoder_state_t* const state, - int x, - int y, - int depth, intra_search_data_t* search_data, + const cu_loc_t* cu_loc, cu_info_t *cur_cu, lcu_t *lcu, enum uvg_tree_type tree_type, @@ -161,7 +159,10 @@ const cu_info_t* uvg_get_co_located_luma_cu( const cu_array_t* const cu_array, enum uvg_tree_type tree_type); -int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a); +uint8_t uvg_get_mip_flag_context( + const cu_loc_t* const cu_loc, + const lcu_t* lcu, + cu_array_t* const cu_a); // ISP related defines #define NUM_ISP_MODES 3 diff --git a/src/search.c b/src/search.c index d61be039..1d992077 100644 --- a/src/search.c +++ b/src/search.c @@ -761,16 +761,17 @@ static double cu_rd_cost_tr_split_accurate( // Return estimate of bits used to code prediction mode of cur_cu. -static double calc_mode_bits(const encoder_state_t *state, - const lcu_t *lcu, - const cu_info_t * cur_cu, - int x, int y, int depth) +static double calc_mode_bits( + const encoder_state_t *state, + const lcu_t *lcu, + const cu_info_t * cur_cu, + const cu_loc_t* const cu_loc) { assert(cur_cu->type == CU_INTRA); - double mode_bits = uvg_luma_mode_bits(state, cur_cu, x, y, depth, lcu); + double mode_bits = uvg_luma_mode_bits(state, cur_cu, cu_loc, lcu); - if (((depth == 4 && x % 8 && y % 8) || (depth != 4)) && state->encoder_control->chroma_format != UVG_CSP_400) { + if (((cu_loc->width == 4 && cu_loc->x % 8 && cu_loc->y % 8) || (cu_loc->width != 4)) && state->encoder_control->chroma_format != UVG_CSP_400) { mode_bits += uvg_chroma_mode_bits(state, cur_cu->intra.mode_chroma, cur_cu->intra.mode); } @@ -945,6 +946,7 @@ static double search_cu( cur_cu->lfnst_last_scan_pos = 0; cur_cu->lfnst_idx = 0; cur_cu->joint_cb_cr = 0; + cur_cu->split_tree = split_tree.split_tree; // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. @@ -1001,9 +1003,7 @@ static double search_cu( intra_search.pred_cu = *cur_cu; if(tree_type != UVG_CHROMA_T) { intra_search.pred_cu.joint_cb_cr = 4; - uvg_search_cu_intra(state, x, y, depth, &intra_search, - lcu, - tree_type); + uvg_search_cu_intra(state, &intra_search, lcu, tree_type, cu_loc); } #ifdef COMPLETE_PRED_MODE_BITS // Technically counting these bits would be correct, however counting @@ -1017,10 +1017,11 @@ static double search_cu( #endif if (state->encoder_control->cfg.cclm && tree_type != UVG_CHROMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { uvg_intra_recon_cu(state, - x, y, - depth, &intra_search, - &intra_search.pred_cu, - lcu, tree_type, true, false); + &intra_search, cu_loc, + &intra_search.pred_cu, lcu, + tree_type, + true, + false); downsample_cclm_rec( state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] @@ -1058,14 +1059,13 @@ static double search_cu( else { intra_search.pred_cu.intra.mode_chroma = 0; } - - if(tree_type != UVG_CHROMA_T && ctrl->cfg.rdo >= 2) { - uvg_intra_recon_cu(state, - x, y, - depth, &intra_search, - &intra_search.pred_cu, - lcu, - tree_type, false, true); + uvg_intra_recon_cu(state, + &intra_search, cu_loc, + &intra_search.pred_cu, lcu, + tree_type, + false, + true); + if(tree_type != UVG_CHROMA_T) { intra_cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, &intra_search.pred_cu, lcu); } else { @@ -1128,20 +1128,20 @@ static double search_cu( } lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); uvg_intra_recon_cu(state, - x, y, - depth, &intra_search, - NULL, - lcu, tree_type,recon_luma,recon_chroma); + &intra_search, cu_loc, + NULL, lcu, + tree_type, + recon_luma, recon_chroma); if(split_tree.current_depth == 4 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; uvg_intra_recon_cu(state, - x, y, - depth, &intra_search, - NULL, - lcu, - tree_type,false,true); + &intra_search, cu_loc, + NULL, lcu, + tree_type, + false, + true); } if (cur_cu->joint_cb_cr == 4) cur_cu->joint_cb_cr = 0; @@ -1334,7 +1334,7 @@ static double search_cu( // It is ok to interrupt the search as soon as it is known that // the split costs at least as much as not splitting. if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { - const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << split_tree.current_depth, split_tree.current_depth + 1}; + const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << (split_tree.current_depth * 3), split_tree.current_depth + 1}; cu_loc_t new_cu_loc; if (split_cost < cost) { uvg_cu_loc_ctor(&new_cu_loc, x, y, half_cu, half_cu); @@ -1399,14 +1399,14 @@ static double search_cu( proxy.pred_cu = *cur_cu; uvg_intra_recon_cu(state, - x, y, - depth, - &proxy, + &proxy, cu_loc, NULL, lcu, - tree_type, true, state->encoder_control->chroma_format == UVG_CSP_400); + tree_type, + true, + state->encoder_control->chroma_format == UVG_CSP_400); - double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits; + double mode_bits = calc_mode_bits(state, lcu, cur_cu, cu_loc) + bits; cost += mode_bits * state->lambda; cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0); diff --git a/src/search_intra.c b/src/search_intra.c index 67424bbf..10c6657d 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -265,23 +265,21 @@ static void derive_mts_constraints(cu_info_t *const pred_cu, */ static double search_intra_trdepth( encoder_state_t * const state, - int x_px, - int y_px, - int depth, + const cu_loc_t* const cu_loc, int max_depth, double cost_treshold, intra_search_data_t *const search_data, lcu_t *const lcu, enum uvg_tree_type tree_type) { - assert(depth >= 0 && depth <= MAX_PU_DEPTH); - const int width = LCU_WIDTH >> depth; - const int height = width; // TODO: height for non-square blocks - const int width_c = width > TR_MIN_WIDTH ? width / 2 : width; - - const int offset = width / 2; - const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; + const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; + const uint8_t width = cu_loc->width; + const uint8_t height = cu_loc->height; // TODO: height for non-square blocks + const uint8_t width_c = cu_loc->chroma_width; + const uint8_t height_c = cu_loc->chroma_height; + + const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y }; const bool reconstruct_chroma = false;// (depth != 4 || (depth == 4 && (x_px & 4 && y_px & 4))) && state->encoder_control->chroma_format != UVG_CSP_400; cu_info_t* pred_cu = &search_data->pred_cu; @@ -297,7 +295,7 @@ static double search_intra_trdepth( double split_cost = INT32_MAX; double nosplit_cost = INT32_MAX; - if (depth > 0) { + if (width <= TR_MAX_WIDTH && height <= TR_MAX_WIDTH) { tr_cu->tr_depth = depth; pred_cu->tr_depth = depth; @@ -389,15 +387,14 @@ static double search_intra_trdepth( uvg_intra_recon_cu( state, - x_px, - y_px, - depth, search_data, + cu_loc, pred_cu, lcu, UVG_LUMA_T, true, - false); + false + ); if (pred_cu->intra.isp_mode != ISP_MODE_NO_ISP && search_data->best_isp_cbfs == 0) continue; if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue; @@ -418,7 +415,6 @@ static double search_intra_trdepth( if (trafo != MTS_SKIP && end_idx != 0) { uvg_derive_lfnst_constraints( pred_cu, - depth, constraints, lcu->coeff.y, width, @@ -496,15 +492,14 @@ static double search_intra_trdepth( // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently uvg_intra_recon_cu( state, - x_px, - y_px, - depth, search_data, + cu_loc, pred_cu, lcu, UVG_BOTH_T, false, - true); + true + ); best_rd_cost += uvg_cu_rd_cost_chroma( state, lcu_px.x, @@ -521,11 +516,10 @@ static double search_intra_trdepth( pred_cu->lfnst_last_scan_pos}; uvg_derive_lfnst_constraints( pred_cu, - depth, constraints, lcu->coeff.u, width_c, - width_c, + height_c, &lcu_px, COLOR_U); if (constraints[0] || !constraints[1]) { @@ -534,11 +528,10 @@ static double search_intra_trdepth( } uvg_derive_lfnst_constraints( pred_cu, - depth, constraints, lcu->coeff.u, width_c, - width_c, + height_c, &lcu_px, COLOR_U); if (constraints[0] || !constraints[1]) { @@ -554,11 +547,11 @@ static double search_intra_trdepth( pred_cu->intra.mode_chroma = chroma_mode; pred_cu->joint_cb_cr= 4; // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently uvg_intra_recon_cu(state, - x_px, y_px, - depth, search_data, - pred_cu, - lcu, - UVG_BOTH_T,false,true); + search_data, cu_loc, + pred_cu, lcu, + UVG_BOTH_T, + false, + true); best_rd_cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); pred_cu->intra.mode = luma_mode; } @@ -610,17 +603,25 @@ static double search_intra_trdepth( // max_depth. // - Min transform size hasn't been reached (MAX_PU_DEPTH). if (depth < max_depth && depth < MAX_PU_DEPTH) { + cu_loc_t split_cu_loc; + + const int half_width = width / 2; + const int half_height = height / 2; split_cost = 0; - split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); + split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type); if (split_cost < nosplit_cost) { - split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); + split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type); } if (split_cost < nosplit_cost) { - split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); + split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type); } if (split_cost < nosplit_cost) { - split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); + split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type); } double cbf_bits = 0.0; @@ -654,7 +655,7 @@ static double search_intra_trdepth( if (depth == 0 || split_cost < nosplit_cost) { return split_cost; } else { - uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type); pred_cu->cbf = nosplit_cbf; @@ -1372,17 +1373,16 @@ static void get_rough_cost_for_2n_modes( */ static int8_t search_intra_rdo( encoder_state_t * const state, - int x_px, - int y_px, - int depth, int modes_to_check, intra_search_data_t *search_data, lcu_t *lcu, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc) { + const int8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra); - const int width = LCU_WIDTH >> depth; - const int height = width; // TODO: height for non-square blocks + const int width = cu_loc->width; + const int height = cu_loc->height; // TODO: height for non-square blocks for (int mode = 0; mode < modes_to_check; mode++) { bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false : true; // Cannot use ISP with MIP @@ -1399,12 +1399,12 @@ static int8_t search_intra_rdo( search_data[mode].pred_cu.intra.isp_mode = isp_mode; - double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, x_px, y_px, depth, lcu); + double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, cu_loc, lcu); search_data[mode].pred_cu.tr_idx = MTS_TR_NUM; search_data[mode].bits = rdo_bitcost; search_data[mode].cost = rdo_bitcost * state->lambda; - double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type); + double mode_cost = search_intra_trdepth(state, cu_loc, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type); best_mts_mode_for_isp[isp_mode] = search_data[mode].pred_cu.tr_idx; best_lfnst_mode_for_isp[isp_mode] = search_data[mode].pred_cu.lfnst_idx; search_data[mode].cost += mode_cost; @@ -1440,7 +1440,9 @@ static int8_t search_intra_rdo( } -double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu) +double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, const cu_loc_t* + const cu_loc, + const lcu_t* lcu) { cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac; double mode_bits = 0; @@ -1449,8 +1451,8 @@ double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const c uvg_encode_intra_luma_coding_unit( state, &cabac_copy, cur_cu, - x, y, depth, lcu, &mode_bits - ); + cu_loc, lcu, &mode_bits + ); return mode_bits; } @@ -1651,11 +1653,11 @@ int8_t uvg_search_intra_chroma_rdo( state->search_cabac.update = 1; chroma_data[mode_i].cost = mode_bits * state->lambda; uvg_intra_recon_cu(state, - x_px, y_px, - depth, &chroma_data[mode_i], - pred_cu, - lcu, - tree_type, false, true); + &chroma_data[mode_i], &loc, + pred_cu, lcu, + tree_type, + false, + true); chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); } @@ -1829,19 +1831,15 @@ static int select_candidates_for_further_search(const encoder_state_t * const st */ void uvg_search_cu_intra( encoder_state_t * const state, - const int x_px, - const int y_px, - const int depth, intra_search_data_t* mode_out, lcu_t *lcu, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc) { - const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; - const int8_t cu_width = LCU_WIDTH >> depth; - const cu_loc_t cu_loc = { x_px, y_px, cu_width, cu_width, - MAX(cu_width >> 1, TR_MIN_WIDTH), MAX(cu_width >> 1, TR_MIN_WIDTH) }; - const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth; - const vector2d_t luma_px = { x_px, y_px }; + const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y }; + const int8_t log2_width = uvg_g_convert_to_log2[cu_loc->width]; + const int8_t log2_height = uvg_g_convert_to_log2[cu_loc->width]; + const vector2d_t luma_px = { cu_loc->x, cu_loc->y}; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); @@ -1857,25 +1855,22 @@ void uvg_search_cu_intra( // Select left and top CUs if they are available. // Top CU is not available across LCU boundary. - if (x_px >= SCU_WIDTH) { - left_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x - 1, lcu_px.y+ cu_width-1); + if (cu_loc->x >= SCU_WIDTH) { + left_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x - 1, lcu_px.y+ cu_loc->height-1); } - if (y_px >= SCU_WIDTH && lcu_px.y > 0) { - above_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x+ cu_width-1, lcu_px.y - 1); + if (cu_loc->y >= SCU_WIDTH && lcu_px.y > 0) { + above_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x+ cu_loc->width-1, lcu_px.y - 1); } - int8_t num_cand = uvg_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); + int8_t num_cand = uvg_intra_get_dir_luma_predictor(cu_loc->x, cu_loc->y, candidate_modes, cur_cu, left_cu, above_cu); - if (depth > 0) { - uvg_intra_build_reference(&cu_loc, &cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0); + bool is_large = cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH; + if (!is_large) { + uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0); } - - // The maximum number of possible MIP modes depend on block size & shape - int width = LCU_WIDTH >> depth; - int height = width; // TODO: proper height for non-square blocks. - + // This is needed for bit cost calculation and requires too many parameters to be // calculated inside the rough search functions - uint8_t mip_ctx = uvg_get_mip_flag_context(x_px, y_px, cu_width, cu_width, lcu, NULL); + uint8_t mip_ctx = uvg_get_mip_flag_context(cu_loc, lcu, NULL); // Find best intra mode for 2Nx2N. uvg_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; @@ -1886,15 +1881,15 @@ void uvg_search_cu_intra( temp_pred_cu.type = CU_INTRA; FILL(temp_pred_cu.intra, 0); // Find modes with multiple reference lines if in use. Do not use if CU in first row. - uint8_t lines = state->encoder_control->cfg.mrl && (y_px % LCU_WIDTH) != 0 ? MAX_REF_LINE_IDX : 1; + uint8_t lines = state->encoder_control->cfg.mrl && lcu_px.y != 0 ? MAX_REF_LINE_IDX : 1; uint8_t number_of_modes; uint8_t num_regular_modes; - bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4); + bool skip_rough_search = (is_large || state->encoder_control->cfg.rdo >= 4); if (!skip_rough_search) { num_regular_modes = number_of_modes = search_intra_rough( state, - &cu_loc, + cu_loc, ref_pixels, LCU_WIDTH, refs, @@ -1903,7 +1898,7 @@ void uvg_search_cu_intra( search_data, &temp_pred_cu, mip_ctx); - // if(lines == 1) sort_modes(search_data, number_of_modes); + // if(lines == 1) sort_modes(search_data, number_of_modes); } else { for (int8_t i = 0; i < UVG_NUM_INTRA_MODES; i++) { @@ -1925,7 +1920,7 @@ void uvg_search_cu_intra( // Copy extra ref lines, including ref line 1 and top left corner. for (int i = 0; i < MAX_REF_LINE_IDX; ++i) { - int height = (LCU_WIDTH >> depth) * 2 + MAX_REF_LINE_IDX; + int height = (cu_loc->height) * 2 + MAX_REF_LINE_IDX; height = MIN(height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist. height = MIN(height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX); uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)], @@ -1934,7 +1929,7 @@ void uvg_search_cu_intra( frame->rec->stride, 1); } } - uvg_intra_build_reference(&cu_loc, &cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0); + uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0); for(int i = 1; i < INTRA_MPM_COUNT; i++) { num_mrl_modes++; const int index = (i - 1) + (INTRA_MPM_COUNT -1)*(line-1) + number_of_modes; @@ -1946,7 +1941,7 @@ void uvg_search_cu_intra( } } if (!skip_rough_search && lines != 1) { - get_rough_cost_for_2n_modes(state, refs, &cu_loc, + get_rough_cost_for_2n_modes(state, refs, cu_loc, ref_pixels, LCU_WIDTH, search_data + number_of_modes, num_mrl_modes, mip_ctx); @@ -1959,11 +1954,11 @@ void uvg_search_cu_intra( int num_mip_modes = 0; if (state->encoder_control->cfg.mip) { // MIP is not allowed for 64 x 4 or 4 x 64 blocks - if (!((width == 64 && height == 4) || (width == 4 && height == 64))) { - num_mip_modes = NUM_MIP_MODES_FULL(width, height); + if (!((cu_loc->height == 64 && cu_loc->width== 4) || (cu_loc->height== 4 && cu_loc->width == 64))) { + num_mip_modes = NUM_MIP_MODES_FULL(cu_loc->width, cu_loc->height); for (int transpose = 0; transpose < 2; transpose++) { - const int half_mip_modes = NUM_MIP_MODES_HALF(width, height); + const int half_mip_modes = num_mip_modes / 2; for (int i = 0; i < half_mip_modes; ++i) { const int index = i + number_of_modes + transpose * half_mip_modes; search_data[index].pred_cu = temp_pred_cu; @@ -1975,7 +1970,7 @@ void uvg_search_cu_intra( } } if (!skip_rough_search) { - get_rough_cost_for_2n_modes(state, refs, &cu_loc, + get_rough_cost_for_2n_modes(state, refs, cu_loc, ref_pixels, LCU_WIDTH, search_data + number_of_modes, num_mip_modes, mip_ctx); @@ -1986,7 +1981,10 @@ void uvg_search_cu_intra( // Set transform depth to current depth, meaning no transform splits. - uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type); + { + const int8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; + uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type); + } // Refine results with slower search or get some results if rough search was skipped. const int32_t rdo_level = state->encoder_control->cfg.rdo; if (rdo_level >= 2 || skip_rough_search) { @@ -2003,7 +2001,7 @@ void uvg_search_cu_intra( {2, 3, 3, 3, 3, 2}, // 64x4, 64x8, 64x16, 64x32, 64x64, 64x128, {2, 2, 2, 2, 2, 3}, // 128x4, 128x8, 128x16, 128x32, 128x64, 128x128, }; - number_of_modes_to_search = g_aucIntraModeNumFast_UseMPM_2D[7- depth - 3][7 - depth - 3]; + number_of_modes_to_search = g_aucIntraModeNumFast_UseMPM_2D[log2_width - 2][log2_height - 2]; } else { // Check only the predicted modes. number_of_modes_to_search = 0; @@ -2015,8 +2013,8 @@ void uvg_search_cu_intra( search_data, num_regular_modes, num_mip_modes, - width, - height + cu_loc->width, + cu_loc->height ); } } @@ -2041,13 +2039,11 @@ void uvg_search_cu_intra( search_intra_rdo( state, - x_px, - y_px, - depth, number_of_modes_to_search, search_data, lcu, - tree_type); + tree_type, + cu_loc); search_data[0].pred_cu.mts_last_scan_pos = false; search_data[0].pred_cu.violates_mts_coeff_constraint = false; } diff --git a/src/search_intra.h b/src/search_intra.h index 36470e63..e9264275 100644 --- a/src/search_intra.h +++ b/src/search_intra.h @@ -43,7 +43,9 @@ #include "global.h" // IWYU pragma: keep #include "intra.h" -double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu); +double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, const cu_loc_t* + const cu_loc, + const lcu_t* lcu); double uvg_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, int8_t luma_mode); @@ -59,11 +61,9 @@ int8_t uvg_search_cu_intra_chroma( void uvg_search_cu_intra( encoder_state_t * const state, - const int x_px, - const int y_px, - const int depth, intra_search_data_t* search_data, lcu_t *lcu, - enum uvg_tree_type tree_type); + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc); #endif // SEARCH_INTRA_H_ diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index 72cd1fb1..c352b395 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -2641,8 +2641,8 @@ static void mts_idct_generic( if (tu->lfnst_idx || tu->cr_lfnst_idx) { if ((width == 4 && height > 4) || (width > 4 && height == 4)) { - skip_width == width - 4; - skip_height == height - 4; + skip_width = width - 4; + skip_height = height - 4; } else if ((width >= 8 && height >= 8)) { skip_width = width - 8; diff --git a/src/transform.c b/src/transform.c index a497003b..84eb3558 100644 --- a/src/transform.c +++ b/src/transform.c @@ -174,7 +174,6 @@ int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t con */ void uvg_derive_lfnst_constraints( cu_info_t* const pred_cu, - const int depth, bool* constraints, const coeff_t* coeff, const int width, @@ -182,7 +181,7 @@ void uvg_derive_lfnst_constraints( const vector2d_t * const lcu_px, color_t color) { - coeff_scan_order_t scan_idx = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); + coeff_scan_order_t scan_idx = SCAN_DIAG; // ToDo: large block support in VVC? const uint32_t log2_block_size = uvg_g_convert_to_log2[width]; @@ -584,9 +583,9 @@ void uvg_chroma_transform_search( if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (depth == 4 || tree_type == UVG_CHROMA_T)) { bool constraints[2] = { false, false }; - uvg_derive_lfnst_constraints(pred_cu, depth, constraints, u_quant_coeff, width, height, NULL, COLOR_U); + uvg_derive_lfnst_constraints(pred_cu, constraints, u_quant_coeff, width, height, NULL, COLOR_U); if(!IS_JCCR_MODE(transforms[i])) { - uvg_derive_lfnst_constraints(pred_cu, depth, constraints, v_quant_coeff, width, height, NULL, COLOR_V); + uvg_derive_lfnst_constraints(pred_cu, constraints, v_quant_coeff, width, height, NULL, COLOR_V); } if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) continue; } diff --git a/src/transform.h b/src/transform.h index a7427ea0..4d5e8ba8 100644 --- a/src/transform.h +++ b/src/transform.h @@ -74,7 +74,6 @@ int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t con void uvg_derive_lfnst_constraints( cu_info_t* const pred_cu, - const int depth, bool* constraints, const coeff_t* coeff, const int width,