[mtt] remove all rest usages of deriving width and height from depth

This commit is contained in:
Joose Sainio 2022-09-07 16:11:36 +03:00 committed by Marko Viitanen
parent 26dcadc149
commit dcf879e5ed
13 changed files with 257 additions and 247 deletions

View file

@ -306,6 +306,8 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
loc->x = x;
loc->y = y;
loc->local_x = x % LCU_WIDTH;
loc->local_y = y % LCU_WIDTH;
loc->width = width;
loc->height = height;
// TODO: when MTT is implemented, chroma dimensions can be minimum 2.

View file

@ -119,7 +119,7 @@ typedef struct
{
uint8_t type : 3; //!< \brief block type, one of cu_type_t values
uint8_t depth : 3; //!< \brief depth / size of this block
uint8_t tr_depth : 3; //!< \brief transform depth
uint8_t tr_depth ; //!< \brief transform depth
uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped
uint8_t merged : 1; //!< \brief flag to indicate this block is merged
uint8_t merge_idx : 3; //!< \brief merge index
@ -129,6 +129,8 @@ typedef struct
uint16_t cbf;
uint32_t split_tree : 3 * 9;
/**
* \brief QP used for the CU.
*
@ -170,6 +172,8 @@ typedef struct
typedef struct {
int16_t x;
int16_t y;
uint8_t local_x;
uint8_t local_y;
int8_t width;
int8_t height;
int8_t chroma_width;

View file

@ -660,7 +660,7 @@ static void encode_transform_coeff(
bool last_split,
bool can_skip_last_cbf,
int *luma_cbf_ctx, // Always true except when writing sub partition coeffs (ISP)
cu_loc_t *original_loc) // Original dimensions before ISP split
const cu_loc_t * const original_loc) // Original dimensions before ISP split
{
cabac_data_t * const cabac = &state->cabac;
int x = cu_loc->x;
@ -829,7 +829,6 @@ int uvg_encode_inter_prediction_unit(
encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int depth,
lcu_t* lcu,
double* bits_out,
const cu_loc_t* const cu_loc)
@ -867,7 +866,7 @@ int uvg_encode_inter_prediction_unit(
// Code Inter Dir
uint8_t inter_dir = cur_cu->inter.mv_dir;
if ((LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4
if (cu_loc->width + cu_loc->height > 12) { // ToDo: limit on 4x8/8x4
uint32_t inter_dir_ctx = (7 - ((uvg_math_floor_log2(cu_loc->width) + uvg_math_floor_log2(cu_loc->height) + 1) >> 1));
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[inter_dir_ctx]), (inter_dir == 3), bits, "inter_pred_idc");
@ -1038,10 +1037,13 @@ static void encode_chroma_intra_cu(
else if (cabac->only_count && bits_out)*bits_out += bits;
}
void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int x, int y, int depth, const lcu_t* lcu, double* bits_out)
void uvg_encode_intra_luma_coding_unit(
const encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
const cu_loc_t* const cu_loc,
const lcu_t* lcu,
double* bits_out)
{
const videoframe_t * const frame = state->tile->frame;
uint8_t intra_pred_mode_actual;
@ -1053,6 +1055,9 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
uint32_t flag;
double bits = 0;
const int x = cu_loc->x;
const int y = cu_loc->y;
/*
if ((cur_cu->type == CU_INTRA && (LCU_WIDTH >> cur_cu->depth <= 32))) {
cabac->cur_ctx = &(cabac->ctx.bdpcm_mode[0]);
@ -1076,8 +1081,8 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
}
*/
uint32_t width = (LCU_WIDTH >> depth);
uint32_t height = (LCU_WIDTH >> depth); // TODO: height for non-square blocks
uint32_t width = cu_loc->width;
uint32_t height = cu_loc->height; // TODO: height for non-square blocks
// Code MIP related bits
bool enable_mip = state->encoder_control->cfg.mip;
@ -1102,9 +1107,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
}
if (cur_cu->type == CU_INTRA && !cur_cu->bdpcmMode && enable_mip) {
const int cu_width = LCU_WIDTH >> depth;
const int cu_height = cu_width; // TODO: height for non-square blocks
uint8_t ctx_id = uvg_get_mip_flag_context(x, y, cu_width, cu_height, lcu, lcu ? NULL : frame->cu_array);
uint8_t ctx_id = uvg_get_mip_flag_context(cu_loc, lcu, lcu ? NULL : frame->cu_array);
// Write MIP flag
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.mip_flag[ctx_id]), mip_flag, bits, "mip_flag");
@ -1149,8 +1152,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.intra_subpart_model[1]), isp_mode - 1, bits, "intra_subpartitions_split_type"); // Vertical or horizontal split
}
}
const int cu_width = LCU_WIDTH >> depth;
// PREDINFO CODING
// If intra prediction mode is found from the predictors,
// it can be signaled with two EP's. Otherwise we can send
@ -1165,7 +1167,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
if (x > 0) {
assert(x >> 2 > 0);
const int x_scu = SUB_SCU(x) - 1;
const int y_scu = SUB_SCU(y + cu_width - 1);
const int y_scu = SUB_SCU(y + height - 1);
left_pu = lcu ?
LCU_GET_CU_AT_PX(
lcu,
@ -1174,7 +1176,7 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
uvg_cu_array_at_const(
frame->cu_array,
x - 1,
y + cu_width - 1);
y + height - 1);
}
// Don't take the above PU across the LCU boundary.
if (y % LCU_WIDTH > 0 && y > 0) {
@ -1182,11 +1184,11 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
above_pu = lcu ?
LCU_GET_CU_AT_PX(
lcu,
SUB_SCU(x + cu_width - 1),
SUB_SCU(x + width - 1),
SUB_SCU(y) - 1) :
uvg_cu_array_at_const(
frame->cu_array,
x + cu_width - 1,
x + width - 1,
y - 1);
}
@ -1405,28 +1407,25 @@ bool uvg_write_split_flag(
void uvg_encode_coding_tree(
encoder_state_t * const state,
uint16_t x,
uint16_t y,
uint8_t depth,
lcu_coeff_t *coeff,
enum uvg_tree_type tree_type)
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc,
const split_tree_t split_tree)
{
cabac_data_t * const cabac = &state->cabac;
const encoder_control_t * const ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, x, y);
const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
cu_loc_t cu_loc;
uvg_cu_loc_ctor(&cu_loc, x, y, width, height);
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc.width : cu_loc.chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc.height : cu_loc.chroma_height;
const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, cu_loc->x, cu_loc->y);
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const int half_cu = cu_width >> 1;
const int x = cu_loc->x;
const int y = cu_loc->y;
const int depth = split_tree.current_depth;
const cu_info_t *left_cu = NULL;
if (x > 0) {
@ -1458,33 +1457,33 @@ void uvg_encode_coding_tree(
// When not in MAX_DEPTH, insert split flag and split the blocks if needed
if (depth != MAX_DEPTH && !(tree_type == UVG_CHROMA_T && depth == MAX_DEPTH -1)) {
const int split_flag = uvg_write_split_flag(state, cabac, left_cu, above_cu, GET_SPLITDATA(cur_cu, depth), depth, cu_width, x, y, tree_type,NULL);
const int split_flag = uvg_write_split_flag(state, cabac, left_cu, above_cu, (cur_cu->split_tree >> (split_tree.current_depth * 3)) & 7, depth, cu_width, x, y, tree_type,NULL);
if (split_flag || border) {
const int half_luma = cu_loc->width / 2;
split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1 };
cu_loc_t new_cu_loc;
uvg_cu_loc_ctor(&new_cu_loc, x, y, half_luma, half_luma);
// Split blocks and remember to change x and y block positions
uvg_encode_coding_tree(state, x, y, depth + 1, coeff, tree_type);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
if (!border_x || border_split_x) {
uvg_encode_coding_tree(state, x + half_cu, y, depth + 1, coeff, tree_type);
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
if (!border_y || border_split_y) {
uvg_encode_coding_tree(state, x, y + half_cu, depth + 1, coeff, tree_type);
uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
if (!border || (border_split_x && border_split_y)) {
uvg_encode_coding_tree(state, x + half_cu, y + half_cu, depth + 1, coeff, tree_type);
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_luma, half_luma);
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc, new_split_tree);
}
return;
}
}
//ToDo: check if we can actually split
//ToDo: Implement MT split
if (depth < MAX_PU_DEPTH)
{
// cabac->cur_ctx = &(cabac->ctx.trans_subdiv_model[5 - ((uvg_g_convert_to_bit[LCU_WIDTH] + 2) - depth)]);
// CABAC_BIN(cabac, 0, "split_transform_flag");
}
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1);
if (ctrl->cfg.lossless) {
@ -1519,8 +1518,8 @@ void uvg_encode_coding_tree(
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
}
DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
uvg_hmvp_add_mv(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_height, cur_cu);
uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_cu);
int16_t num_cand = state->encoder_control->cfg.max_merge;
if (num_cand > 1) {
for (int ui = 0; ui < num_cand - 1; ui++) {
@ -1555,7 +1554,7 @@ void uvg_encode_coding_tree(
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
}
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4 && cu_height != 4) {
int8_t ctx_predmode = 0;
@ -1629,11 +1628,11 @@ void uvg_encode_coding_tree(
bool non_zero_mvd = false;
// TODO: height for non-square blocks
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, cu_loc.x, cu_loc.y);
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, cu_loc->x, cu_loc->y);
non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, depth, NULL, NULL, &cu_loc);
DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu);
uvg_hmvp_add_mv(state, x, y, width, height, cur_pu);
non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, NULL, NULL, cu_loc);
DBG_PRINT_MV(state, x, y, cu_width, cu_height, cur_pu);
uvg_hmvp_add_mv(state, x, y, cu_width, cu_height, cur_pu);
// imv mode, select between fullpel, half-pel and quarter-pel resolutions
@ -1662,7 +1661,7 @@ void uvg_encode_coding_tree(
// Code (possible) coeffs to bitstream
if (cbf) {
int luma_cbf_ctx = 0;
encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 0, coeff, tree_type, true, false, &luma_cbf_ctx, &cu_loc);
encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 0, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc);
}
encode_mts_idx(state, cabac, cur_cu);
@ -1670,7 +1669,7 @@ void uvg_encode_coding_tree(
}
} else if (cur_cu->type == CU_INTRA) {
if(tree_type != UVG_CHROMA_T) {
uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, x, y, depth, NULL, NULL);
uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, NULL, NULL);
}
// Code chroma prediction mode.
@ -1694,7 +1693,7 @@ void uvg_encode_coding_tree(
// Check if last split to write chroma
bool last_split = (i + 1) == split_limit;
encode_transform_coeff(state, &split_loc, depth, 0, 0, 0, 0, coeff, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, &cu_loc);
encode_transform_coeff(state, &split_loc, depth, 0, 0, 0, 0, coeff, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, cu_loc);
can_skip_last_cbf &= luma_cbf_ctx == 2;
}
}
@ -1714,7 +1713,7 @@ void uvg_encode_coding_tree(
tmp->violates_lfnst_constrained_luma = false;
tmp->violates_lfnst_constrained_chroma = false;
tmp->lfnst_last_scan_pos = false;
encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 1, coeff, tree_type, true, false, &luma_cbf_ctx, &cu_loc);
encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 1, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc);
// Write LFNST only once for single tree structure
encode_lfnst_idx(state, cabac, tmp, x, y, depth, cu_width, cu_height, tree_type, COLOR_UV);
}
@ -1843,7 +1842,7 @@ double uvg_mock_encode_coding_unit(
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
const uint8_t imv_mode = UVG_IMV_OFF;
const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, depth, lcu, &bits, cu_loc);
const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, lcu, &bits, cu_loc);
if (ctrl->cfg.amvr && non_zero_mvd) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.imv_flag[0]), imv_mode, bits, "imv_flag");
if (imv_mode > UVG_IMV_OFF) {
@ -1856,7 +1855,7 @@ double uvg_mock_encode_coding_unit(
}
else if (cur_cu->type == CU_INTRA) {
if(tree_type != UVG_CHROMA_T) {
uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, x, y, depth, lcu, &bits);
uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, lcu, &bits);
}
if((depth != 4 || (x % 8 != 0 && y % 8 != 0)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, &bits);

View file

@ -54,11 +54,10 @@ bool uvg_is_lfnst_allowed(
void uvg_encode_coding_tree(
encoder_state_t * const state,
uint16_t x_ctb,
uint16_t y_ctb,
uint8_t depth,
lcu_coeff_t *coeff,
enum uvg_tree_type tree_type);
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc,
const split_tree_t split_tree);
void uvg_encode_ts_residual(encoder_state_t* const state,
cabac_data_t* const cabac,
@ -87,15 +86,17 @@ int uvg_encode_inter_prediction_unit(
encoder_state_t* const state,
cabac_data_t* const cabac,
const cu_info_t* const cur_cu,
int depth,
lcu_t* lcu,
double* bits_out,
const cu_loc_t* const cu_loc);
void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state,
void uvg_encode_intra_luma_coding_unit(
const encoder_state_t* const state,
cabac_data_t* const cabac,
const cu_info_t* const cur_cu,
int x, int y, int depth, const lcu_t* lcu, double* bits_out);
const cu_loc_t* const cu_loc,
const lcu_t* lcu,
double* bits_out);
bool uvg_write_split_flag(

View file

@ -870,10 +870,15 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
enum uvg_tree_type tree_type = state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
//Encode coding tree
uvg_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0, lcu->coeff, tree_type);
cu_loc_t start;
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
split_tree_t split_tree = { 0, 0 };
uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, split_tree);
if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
uvg_encode_coding_tree(state, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, 0, lcu->coeff, UVG_CHROMA_T);
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, LCU_WIDTH, LCU_WIDTH);
uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, split_tree);
}
if (!state->cabac.only_count) {

View file

@ -585,12 +585,18 @@ static void predict_cclm(
}
int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a) {
uint8_t uvg_get_mip_flag_context(
const cu_loc_t* const cu_loc,
const lcu_t* lcu,
cu_array_t* const cu_a) {
assert(!(lcu && cu_a));
if (width > 2 * height || height > 2 * width) {
if (cu_loc->width > 2 * cu_loc->height || cu_loc->height > 2 * cu_loc->width) {
return 3;
}
const int x = cu_loc->x;
const int y = cu_loc->y;
int context = 0;
const cu_info_t* left = NULL;
const cu_info_t* top = NULL;
@ -1761,26 +1767,26 @@ static void intra_recon_tb_leaf(
*/
void uvg_intra_recon_cu(
encoder_state_t* const state,
int x,
int y,
int depth,
intra_search_data_t* search_data,
const cu_loc_t* cu_loc,
cu_info_t *cur_cu,
lcu_t *lcu,
enum uvg_tree_type tree_type,
bool recon_luma,
bool recon_chroma)
{
const vector2d_t lcu_px = { SUB_SCU(x) >> (tree_type == UVG_CHROMA_T), SUB_SCU(y) >> (tree_type == UVG_CHROMA_T) };
const int8_t width = LCU_WIDTH >> depth;
const int8_t height = width; // TODO: height for non-square blocks.
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const vector2d_t lcu_px = { cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T) };
const int8_t width = cu_loc->width;
const int8_t height = cu_loc->height; // TODO: height for non-square blocks.
if (cur_cu == NULL) {
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
cu_loc_t chroma_cu_loc;
if(!recon_luma && recon_chroma) {
x &= ~7;
y &= ~7;
uvg_cu_loc_ctor(&chroma_cu_loc, cu_loc->x & ~7, cu_loc->y & ~7, width, height);
cu_loc = &chroma_cu_loc;
}
// Reset CBFs because CBFs might have been set
@ -1793,22 +1799,25 @@ void uvg_intra_recon_cu(
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
}
if (depth == 0 || cur_cu->tr_depth > depth) {
if (width > TR_MAX_WIDTH || height > TR_MAX_WIDTH) {
cu_loc_t split_cu_loc;
const int offset = width / 2;
const int32_t x2 = x + offset;
const int32_t y2 = y + offset;
uvg_intra_recon_cu(state, x, y, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
uvg_intra_recon_cu(state, x2, y, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
uvg_intra_recon_cu(state, x, y2, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
uvg_intra_recon_cu(state, x2, y2, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
const int half_width = width / 2;
const int half_height = height / 2;
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
uvg_intra_recon_cu(state, search_data, &split_cu_loc, NULL, lcu, tree_type, recon_luma, recon_chroma);
// Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = {
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), lcu_px.y >> (tree_type == UVG_CHROMA_T))->cbf,
LCU_GET_CU_AT_PX(lcu, lcu_px.x >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf,
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf,
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + half_width) >> (tree_type == UVG_CHROMA_T), lcu_px.y >> (tree_type == UVG_CHROMA_T))->cbf,
LCU_GET_CU_AT_PX(lcu, lcu_px.x >> (tree_type == UVG_CHROMA_T), (lcu_px.y + half_height) >> (tree_type == UVG_CHROMA_T))->cbf,
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + half_width) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + half_height) >> (tree_type == UVG_CHROMA_T))->cbf,
};
if (recon_luma && depth <= MAX_DEPTH) {
@ -1826,8 +1835,6 @@ void uvg_intra_recon_cu(
// Small blocks are split only twice.
int split_type = search_data->pred_cu.intra.isp_mode;
int split_limit = uvg_get_isp_split_num(width, height, split_type, true);
cu_loc_t origin_cu;
uvg_cu_loc_ctor(&origin_cu, x, y, width, height);
for (int i = 0; i < split_limit; ++i) {
cu_loc_t tu_loc;
@ -1845,24 +1852,21 @@ void uvg_intra_recon_cu(
}
}
const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP;
const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0);
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x, y, width, height);
const bool has_chroma = recon_chroma && (cu_loc->x % 8 == 0 && cu_loc->y % 8 == 0);
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_Y, search_data, tree_type);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type);
}
if (has_chroma) {
intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_U, search_data, tree_type);
intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_V, search_data, tree_type);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data, tree_type);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data, tree_type);
}
// TODO: not necessary to call if only luma and ISP is on
uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3),
search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma,
&loc, depth, cur_cu, lcu,
cu_loc, depth, cur_cu, lcu,
false, tree_type);
}

View file

@ -142,10 +142,8 @@ void uvg_intra_predict(
void uvg_intra_recon_cu(
encoder_state_t* const state,
int x,
int y,
int depth,
intra_search_data_t* search_data,
const cu_loc_t* cu_loc,
cu_info_t *cur_cu,
lcu_t *lcu,
enum uvg_tree_type tree_type,
@ -161,7 +159,10 @@ const cu_info_t* uvg_get_co_located_luma_cu(
const cu_array_t* const cu_array,
enum uvg_tree_type tree_type);
int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a);
uint8_t uvg_get_mip_flag_context(
const cu_loc_t* const cu_loc,
const lcu_t* lcu,
cu_array_t* const cu_a);
// ISP related defines
#define NUM_ISP_MODES 3

View file

@ -761,16 +761,17 @@ static double cu_rd_cost_tr_split_accurate(
// Return estimate of bits used to code prediction mode of cur_cu.
static double calc_mode_bits(const encoder_state_t *state,
const lcu_t *lcu,
const cu_info_t * cur_cu,
int x, int y, int depth)
static double calc_mode_bits(
const encoder_state_t *state,
const lcu_t *lcu,
const cu_info_t * cur_cu,
const cu_loc_t* const cu_loc)
{
assert(cur_cu->type == CU_INTRA);
double mode_bits = uvg_luma_mode_bits(state, cur_cu, x, y, depth, lcu);
double mode_bits = uvg_luma_mode_bits(state, cur_cu, cu_loc, lcu);
if (((depth == 4 && x % 8 && y % 8) || (depth != 4)) && state->encoder_control->chroma_format != UVG_CSP_400) {
if (((cu_loc->width == 4 && cu_loc->x % 8 && cu_loc->y % 8) || (cu_loc->width != 4)) && state->encoder_control->chroma_format != UVG_CSP_400) {
mode_bits += uvg_chroma_mode_bits(state, cur_cu->intra.mode_chroma, cur_cu->intra.mode);
}
@ -945,6 +946,7 @@ static double search_cu(
cur_cu->lfnst_last_scan_pos = 0;
cur_cu->lfnst_idx = 0;
cur_cu->joint_cb_cr = 0;
cur_cu->split_tree = split_tree.split_tree;
// If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth.
@ -1001,9 +1003,7 @@ static double search_cu(
intra_search.pred_cu = *cur_cu;
if(tree_type != UVG_CHROMA_T) {
intra_search.pred_cu.joint_cb_cr = 4;
uvg_search_cu_intra(state, x, y, depth, &intra_search,
lcu,
tree_type);
uvg_search_cu_intra(state, &intra_search, lcu, tree_type, cu_loc);
}
#ifdef COMPLETE_PRED_MODE_BITS
// Technically counting these bits would be correct, however counting
@ -1017,10 +1017,11 @@ static double search_cu(
#endif
if (state->encoder_control->cfg.cclm && tree_type != UVG_CHROMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
uvg_intra_recon_cu(state,
x, y,
depth, &intra_search,
&intra_search.pred_cu,
lcu, tree_type, true, false);
&intra_search, cu_loc,
&intra_search.pred_cu, lcu,
tree_type,
true,
false);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
@ -1058,14 +1059,13 @@ static double search_cu(
else {
intra_search.pred_cu.intra.mode_chroma = 0;
}
if(tree_type != UVG_CHROMA_T && ctrl->cfg.rdo >= 2) {
uvg_intra_recon_cu(state,
x, y,
depth, &intra_search,
&intra_search.pred_cu,
lcu,
tree_type, false, true);
uvg_intra_recon_cu(state,
&intra_search, cu_loc,
&intra_search.pred_cu, lcu,
tree_type,
false,
true);
if(tree_type != UVG_CHROMA_T) {
intra_cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, &intra_search.pred_cu, lcu);
}
else {
@ -1128,20 +1128,20 @@ static double search_cu(
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
uvg_intra_recon_cu(state,
x, y,
depth, &intra_search,
NULL,
lcu, tree_type,recon_luma,recon_chroma);
&intra_search, cu_loc,
NULL, lcu,
tree_type,
recon_luma, recon_chroma);
if(split_tree.current_depth == 4 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
uvg_intra_recon_cu(state,
x, y,
depth, &intra_search,
NULL,
lcu,
tree_type,false,true);
&intra_search, cu_loc,
NULL, lcu,
tree_type,
false,
true);
}
if (cur_cu->joint_cb_cr == 4) cur_cu->joint_cb_cr = 0;
@ -1334,7 +1334,7 @@ static double search_cu(
// It is ok to interrupt the search as soon as it is known that
// the split costs at least as much as not splitting.
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << split_tree.current_depth, split_tree.current_depth + 1};
const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << (split_tree.current_depth * 3), split_tree.current_depth + 1};
cu_loc_t new_cu_loc;
if (split_cost < cost) {
uvg_cu_loc_ctor(&new_cu_loc, x, y, half_cu, half_cu);
@ -1399,14 +1399,14 @@ static double search_cu(
proxy.pred_cu = *cur_cu;
uvg_intra_recon_cu(state,
x, y,
depth,
&proxy,
&proxy, cu_loc,
NULL,
lcu,
tree_type, true, state->encoder_control->chroma_format == UVG_CSP_400);
tree_type,
true,
state->encoder_control->chroma_format == UVG_CSP_400);
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits;
double mode_bits = calc_mode_bits(state, lcu, cur_cu, cu_loc) + bits;
cost += mode_bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0);

View file

@ -265,23 +265,21 @@ static void derive_mts_constraints(cu_info_t *const pred_cu,
*/
static double search_intra_trdepth(
encoder_state_t * const state,
int x_px,
int y_px,
int depth,
const cu_loc_t* const cu_loc,
int max_depth,
double cost_treshold,
intra_search_data_t *const search_data,
lcu_t *const lcu,
enum uvg_tree_type tree_type)
{
assert(depth >= 0 && depth <= MAX_PU_DEPTH);
const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
const int width_c = width > TR_MIN_WIDTH ? width / 2 : width;
const int offset = width / 2;
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const uint8_t width = cu_loc->width;
const uint8_t height = cu_loc->height; // TODO: height for non-square blocks
const uint8_t width_c = cu_loc->chroma_width;
const uint8_t height_c = cu_loc->chroma_height;
const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y };
const bool reconstruct_chroma = false;// (depth != 4 || (depth == 4 && (x_px & 4 && y_px & 4))) && state->encoder_control->chroma_format != UVG_CSP_400;
cu_info_t* pred_cu = &search_data->pred_cu;
@ -297,7 +295,7 @@ static double search_intra_trdepth(
double split_cost = INT32_MAX;
double nosplit_cost = INT32_MAX;
if (depth > 0) {
if (width <= TR_MAX_WIDTH && height <= TR_MAX_WIDTH) {
tr_cu->tr_depth = depth;
pred_cu->tr_depth = depth;
@ -389,15 +387,14 @@ static double search_intra_trdepth(
uvg_intra_recon_cu(
state,
x_px,
y_px,
depth,
search_data,
cu_loc,
pred_cu,
lcu,
UVG_LUMA_T,
true,
false);
false
);
if (pred_cu->intra.isp_mode != ISP_MODE_NO_ISP && search_data->best_isp_cbfs == 0) continue;
if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue;
@ -418,7 +415,6 @@ static double search_intra_trdepth(
if (trafo != MTS_SKIP && end_idx != 0) {
uvg_derive_lfnst_constraints(
pred_cu,
depth,
constraints,
lcu->coeff.y,
width,
@ -496,15 +492,14 @@ static double search_intra_trdepth(
// TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
uvg_intra_recon_cu(
state,
x_px,
y_px,
depth,
search_data,
cu_loc,
pred_cu,
lcu,
UVG_BOTH_T,
false,
true);
true
);
best_rd_cost += uvg_cu_rd_cost_chroma(
state,
lcu_px.x,
@ -521,11 +516,10 @@ static double search_intra_trdepth(
pred_cu->lfnst_last_scan_pos};
uvg_derive_lfnst_constraints(
pred_cu,
depth,
constraints,
lcu->coeff.u,
width_c,
width_c,
height_c,
&lcu_px,
COLOR_U);
if (constraints[0] || !constraints[1]) {
@ -534,11 +528,10 @@ static double search_intra_trdepth(
}
uvg_derive_lfnst_constraints(
pred_cu,
depth,
constraints,
lcu->coeff.u,
width_c,
width_c,
height_c,
&lcu_px,
COLOR_U);
if (constraints[0] || !constraints[1]) {
@ -554,11 +547,11 @@ static double search_intra_trdepth(
pred_cu->intra.mode_chroma = chroma_mode;
pred_cu->joint_cb_cr= 4; // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
uvg_intra_recon_cu(state,
x_px, y_px,
depth, search_data,
pred_cu,
lcu,
UVG_BOTH_T,false,true);
search_data, cu_loc,
pred_cu, lcu,
UVG_BOTH_T,
false,
true);
best_rd_cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
pred_cu->intra.mode = luma_mode;
}
@ -610,17 +603,25 @@ static double search_intra_trdepth(
// max_depth.
// - Min transform size hasn't been reached (MAX_PU_DEPTH).
if (depth < max_depth && depth < MAX_PU_DEPTH) {
cu_loc_t split_cu_loc;
const int half_width = width / 2;
const int half_height = height / 2;
split_cost = 0;
split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type);
if (split_cost < nosplit_cost) {
split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type);
}
if (split_cost < nosplit_cost) {
split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type);
}
if (split_cost < nosplit_cost) {
split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, nosplit_cost, search_data, lcu, tree_type);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
split_cost += search_intra_trdepth(state, &split_cu_loc, max_depth, nosplit_cost, search_data, lcu, tree_type);
}
double cbf_bits = 0.0;
@ -654,7 +655,7 @@ static double search_intra_trdepth(
if (depth == 0 || split_cost < nosplit_cost) {
return split_cost;
} else {
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type);
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type);
pred_cu->cbf = nosplit_cbf;
@ -1372,17 +1373,16 @@ static void get_rough_cost_for_2n_modes(
*/
static int8_t search_intra_rdo(
encoder_state_t * const state,
int x_px,
int y_px,
int depth,
int modes_to_check,
intra_search_data_t *search_data,
lcu_t *lcu,
enum uvg_tree_type tree_type)
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc)
{
const int8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra);
const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
const int width = cu_loc->width;
const int height = cu_loc->height; // TODO: height for non-square blocks
for (int mode = 0; mode < modes_to_check; mode++) {
bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false : true; // Cannot use ISP with MIP
@ -1399,12 +1399,12 @@ static int8_t search_intra_rdo(
search_data[mode].pred_cu.intra.isp_mode = isp_mode;
double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, x_px, y_px, depth, lcu);
double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, cu_loc, lcu);
search_data[mode].pred_cu.tr_idx = MTS_TR_NUM;
search_data[mode].bits = rdo_bitcost;
search_data[mode].cost = rdo_bitcost * state->lambda;
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type);
double mode_cost = search_intra_trdepth(state, cu_loc, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type);
best_mts_mode_for_isp[isp_mode] = search_data[mode].pred_cu.tr_idx;
best_lfnst_mode_for_isp[isp_mode] = search_data[mode].pred_cu.lfnst_idx;
search_data[mode].cost += mode_cost;
@ -1440,7 +1440,9 @@ static int8_t search_intra_rdo(
}
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu)
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, const cu_loc_t*
const cu_loc,
const lcu_t* lcu)
{
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
double mode_bits = 0;
@ -1449,8 +1451,8 @@ double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const c
uvg_encode_intra_luma_coding_unit(
state,
&cabac_copy, cur_cu,
x, y, depth, lcu, &mode_bits
);
cu_loc, lcu, &mode_bits
);
return mode_bits;
}
@ -1651,11 +1653,11 @@ int8_t uvg_search_intra_chroma_rdo(
state->search_cabac.update = 1;
chroma_data[mode_i].cost = mode_bits * state->lambda;
uvg_intra_recon_cu(state,
x_px, y_px,
depth, &chroma_data[mode_i],
pred_cu,
lcu,
tree_type, false, true);
&chroma_data[mode_i], &loc,
pred_cu, lcu,
tree_type,
false,
true);
chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
}
@ -1829,19 +1831,15 @@ static int select_candidates_for_further_search(const encoder_state_t * const st
*/
void uvg_search_cu_intra(
encoder_state_t * const state,
const int x_px,
const int y_px,
const int depth,
intra_search_data_t* mode_out,
lcu_t *lcu,
enum uvg_tree_type tree_type)
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc)
{
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const int8_t cu_width = LCU_WIDTH >> depth;
const cu_loc_t cu_loc = { x_px, y_px, cu_width, cu_width,
MAX(cu_width >> 1, TR_MIN_WIDTH), MAX(cu_width >> 1, TR_MIN_WIDTH) };
const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth;
const vector2d_t luma_px = { x_px, y_px };
const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y };
const int8_t log2_width = uvg_g_convert_to_log2[cu_loc->width];
const int8_t log2_height = uvg_g_convert_to_log2[cu_loc->width];
const vector2d_t luma_px = { cu_loc->x, cu_loc->y};
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
@ -1857,25 +1855,22 @@ void uvg_search_cu_intra(
// Select left and top CUs if they are available.
// Top CU is not available across LCU boundary.
if (x_px >= SCU_WIDTH) {
left_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x - 1, lcu_px.y+ cu_width-1);
if (cu_loc->x >= SCU_WIDTH) {
left_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x - 1, lcu_px.y+ cu_loc->height-1);
}
if (y_px >= SCU_WIDTH && lcu_px.y > 0) {
above_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x+ cu_width-1, lcu_px.y - 1);
if (cu_loc->y >= SCU_WIDTH && lcu_px.y > 0) {
above_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x+ cu_loc->width-1, lcu_px.y - 1);
}
int8_t num_cand = uvg_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
int8_t num_cand = uvg_intra_get_dir_luma_predictor(cu_loc->x, cu_loc->y, candidate_modes, cur_cu, left_cu, above_cu);
if (depth > 0) {
uvg_intra_build_reference(&cu_loc, &cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0);
bool is_large = cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH;
if (!is_large) {
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0, 0);
}
// The maximum number of possible MIP modes depend on block size & shape
int width = LCU_WIDTH >> depth;
int height = width; // TODO: proper height for non-square blocks.
// This is needed for bit cost calculation and requires too many parameters to be
// calculated inside the rough search functions
uint8_t mip_ctx = uvg_get_mip_flag_context(x_px, y_px, cu_width, cu_width, lcu, NULL);
uint8_t mip_ctx = uvg_get_mip_flag_context(cu_loc, lcu, NULL);
// Find best intra mode for 2Nx2N.
uvg_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
@ -1886,15 +1881,15 @@ void uvg_search_cu_intra(
temp_pred_cu.type = CU_INTRA;
FILL(temp_pred_cu.intra, 0);
// Find modes with multiple reference lines if in use. Do not use if CU in first row.
uint8_t lines = state->encoder_control->cfg.mrl && (y_px % LCU_WIDTH) != 0 ? MAX_REF_LINE_IDX : 1;
uint8_t lines = state->encoder_control->cfg.mrl && lcu_px.y != 0 ? MAX_REF_LINE_IDX : 1;
uint8_t number_of_modes;
uint8_t num_regular_modes;
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4);
bool skip_rough_search = (is_large || state->encoder_control->cfg.rdo >= 4);
if (!skip_rough_search) {
num_regular_modes = number_of_modes = search_intra_rough(
state,
&cu_loc,
cu_loc,
ref_pixels,
LCU_WIDTH,
refs,
@ -1903,7 +1898,7 @@ void uvg_search_cu_intra(
search_data,
&temp_pred_cu,
mip_ctx);
// if(lines == 1) sort_modes(search_data, number_of_modes);
// if(lines == 1) sort_modes(search_data, number_of_modes);
} else {
for (int8_t i = 0; i < UVG_NUM_INTRA_MODES; i++) {
@ -1925,7 +1920,7 @@ void uvg_search_cu_intra(
// Copy extra ref lines, including ref line 1 and top left corner.
for (int i = 0; i < MAX_REF_LINE_IDX; ++i) {
int height = (LCU_WIDTH >> depth) * 2 + MAX_REF_LINE_IDX;
int height = (cu_loc->height) * 2 + MAX_REF_LINE_IDX;
height = MIN(height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist.
height = MIN(height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX);
uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)],
@ -1934,7 +1929,7 @@ void uvg_search_cu_intra(
frame->rec->stride, 1);
}
}
uvg_intra_build_reference(&cu_loc, &cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0);
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_Y, &luma_px, &pic_px, lcu, &refs[line], state->encoder_control->cfg.wpp, extra_refs, line, 0);
for(int i = 1; i < INTRA_MPM_COUNT; i++) {
num_mrl_modes++;
const int index = (i - 1) + (INTRA_MPM_COUNT -1)*(line-1) + number_of_modes;
@ -1946,7 +1941,7 @@ void uvg_search_cu_intra(
}
}
if (!skip_rough_search && lines != 1) {
get_rough_cost_for_2n_modes(state, refs, &cu_loc,
get_rough_cost_for_2n_modes(state, refs, cu_loc,
ref_pixels,
LCU_WIDTH, search_data + number_of_modes, num_mrl_modes,
mip_ctx);
@ -1959,11 +1954,11 @@ void uvg_search_cu_intra(
int num_mip_modes = 0;
if (state->encoder_control->cfg.mip) {
// MIP is not allowed for 64 x 4 or 4 x 64 blocks
if (!((width == 64 && height == 4) || (width == 4 && height == 64))) {
num_mip_modes = NUM_MIP_MODES_FULL(width, height);
if (!((cu_loc->height == 64 && cu_loc->width== 4) || (cu_loc->height== 4 && cu_loc->width == 64))) {
num_mip_modes = NUM_MIP_MODES_FULL(cu_loc->width, cu_loc->height);
for (int transpose = 0; transpose < 2; transpose++) {
const int half_mip_modes = NUM_MIP_MODES_HALF(width, height);
const int half_mip_modes = num_mip_modes / 2;
for (int i = 0; i < half_mip_modes; ++i) {
const int index = i + number_of_modes + transpose * half_mip_modes;
search_data[index].pred_cu = temp_pred_cu;
@ -1975,7 +1970,7 @@ void uvg_search_cu_intra(
}
}
if (!skip_rough_search) {
get_rough_cost_for_2n_modes(state, refs, &cu_loc,
get_rough_cost_for_2n_modes(state, refs, cu_loc,
ref_pixels,
LCU_WIDTH, search_data + number_of_modes, num_mip_modes,
mip_ctx);
@ -1986,7 +1981,10 @@ void uvg_search_cu_intra(
// Set transform depth to current depth, meaning no transform splits.
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth, tree_type);
{
const int8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type);
}
// Refine results with slower search or get some results if rough search was skipped.
const int32_t rdo_level = state->encoder_control->cfg.rdo;
if (rdo_level >= 2 || skip_rough_search) {
@ -2003,7 +2001,7 @@ void uvg_search_cu_intra(
{2, 3, 3, 3, 3, 2}, // 64x4, 64x8, 64x16, 64x32, 64x64, 64x128,
{2, 2, 2, 2, 2, 3}, // 128x4, 128x8, 128x16, 128x32, 128x64, 128x128,
};
number_of_modes_to_search = g_aucIntraModeNumFast_UseMPM_2D[7- depth - 3][7 - depth - 3];
number_of_modes_to_search = g_aucIntraModeNumFast_UseMPM_2D[log2_width - 2][log2_height - 2];
} else {
// Check only the predicted modes.
number_of_modes_to_search = 0;
@ -2015,8 +2013,8 @@ void uvg_search_cu_intra(
search_data,
num_regular_modes,
num_mip_modes,
width,
height
cu_loc->width,
cu_loc->height
);
}
}
@ -2041,13 +2039,11 @@ void uvg_search_cu_intra(
search_intra_rdo(
state,
x_px,
y_px,
depth,
number_of_modes_to_search,
search_data,
lcu,
tree_type);
tree_type,
cu_loc);
search_data[0].pred_cu.mts_last_scan_pos = false;
search_data[0].pred_cu.violates_mts_coeff_constraint = false;
}

View file

@ -43,7 +43,9 @@
#include "global.h" // IWYU pragma: keep
#include "intra.h"
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu);
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, const cu_loc_t*
const cu_loc,
const lcu_t* lcu);
double uvg_chroma_mode_bits(const encoder_state_t *state,
int8_t chroma_mode, int8_t luma_mode);
@ -59,11 +61,9 @@ int8_t uvg_search_cu_intra_chroma(
void uvg_search_cu_intra(
encoder_state_t * const state,
const int x_px,
const int y_px,
const int depth,
intra_search_data_t* search_data,
lcu_t *lcu,
enum uvg_tree_type tree_type);
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc);
#endif // SEARCH_INTRA_H_

View file

@ -2641,8 +2641,8 @@ static void mts_idct_generic(
if (tu->lfnst_idx || tu->cr_lfnst_idx) {
if ((width == 4 && height > 4) || (width > 4 && height == 4)) {
skip_width == width - 4;
skip_height == height - 4;
skip_width = width - 4;
skip_height = height - 4;
}
else if ((width >= 8 && height >= 8)) {
skip_width = width - 8;

View file

@ -174,7 +174,6 @@ int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t con
*/
void uvg_derive_lfnst_constraints(
cu_info_t* const pred_cu,
const int depth,
bool* constraints,
const coeff_t* coeff,
const int width,
@ -182,7 +181,7 @@ void uvg_derive_lfnst_constraints(
const vector2d_t * const lcu_px,
color_t color)
{
coeff_scan_order_t scan_idx = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
coeff_scan_order_t scan_idx = SCAN_DIAG;
// ToDo: large block support in VVC?
const uint32_t log2_block_size = uvg_g_convert_to_log2[width];
@ -584,9 +583,9 @@ void uvg_chroma_transform_search(
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (depth == 4 || tree_type == UVG_CHROMA_T)) {
bool constraints[2] = { false, false };
uvg_derive_lfnst_constraints(pred_cu, depth, constraints, u_quant_coeff, width, height, NULL, COLOR_U);
uvg_derive_lfnst_constraints(pred_cu, constraints, u_quant_coeff, width, height, NULL, COLOR_U);
if(!IS_JCCR_MODE(transforms[i])) {
uvg_derive_lfnst_constraints(pred_cu, depth, constraints, v_quant_coeff, width, height, NULL, COLOR_V);
uvg_derive_lfnst_constraints(pred_cu, constraints, v_quant_coeff, width, height, NULL, COLOR_V);
}
if (!constraints[1] && (u_has_coeffs || v_has_coeffs) && pred_cu->cr_lfnst_idx != 0) continue;
}

View file

@ -74,7 +74,6 @@ int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t con
void uvg_derive_lfnst_constraints(
cu_info_t* const pred_cu,
const int depth,
bool* constraints,
const coeff_t* coeff,
const int width,