[mtt] Single mtt split works for everything else, except 16x16 with TT

This commit is contained in:
Joose Sainio 2022-11-24 09:04:42 +02:00 committed by Marko Viitanen
parent 26ee443d2f
commit d257376ca0
15 changed files with 202 additions and 178 deletions

View file

@ -312,15 +312,16 @@ void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
loc->height = height;
// TODO: when MTT is implemented, chroma dimensions can be minimum 2.
// Chroma width is half of luma width, when not at maximum depth.
loc->chroma_width = MAX(width >> 1, 4);
loc->chroma_height = MAX(height >> 1, 4);
loc->chroma_width = width >> 1;
loc->chroma_height = height >> 1;
}
int uvg_get_split_locs(
const cu_loc_t* const origin,
enum split_type split,
cu_loc_t out[4])
cu_loc_t out[4],
uint8_t* separate_chroma)
{
const int half_width = origin->width >> 1;
const int half_height = origin->height >> 1;
@ -336,24 +337,29 @@ int uvg_get_split_locs(
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height);
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height);
uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height);
if (half_height == 4 && separate_chroma) *separate_chroma = 1;
return 4;
case BT_HOR_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height);
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height);
if (half_height * origin->width < 64 && separate_chroma) *separate_chroma = 1;
return 2;
case BT_VER_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height);
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height);
if (half_width == 4 && separate_chroma) *separate_chroma = 1;
return 2;
case TT_HOR_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height);
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height);
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height);
if (quarter_height * origin->width < 64 && separate_chroma) *separate_chroma = 1;
return 3;
case TT_VER_SPLIT:
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height);
uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height);
uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height);
if (quarter_width == 4 && separate_chroma) *separate_chroma = 1;
return 3;
}
return 0;

View file

@ -189,7 +189,8 @@ void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height);
int uvg_get_split_locs(
const cu_loc_t* const origin,
enum split_type split,
cu_loc_t out[4]);
cu_loc_t out[4],
uint8_t* separate_chroma);
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \

View file

@ -466,7 +466,6 @@ static void encode_chroma_tu(
uvg_get_sub_coeff(coeff_v, coeff->v, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
if (cbf_is_set(cur_pu->cbf, COLOR_U)) {
// TODO: height for this check and the others below
if(state->encoder_control->cfg.trskip_enable
&& width_c <= (1 << state->encoder_control->cfg.trskip_max_size)
&& height_c <= (1 << state->encoder_control->cfg.trskip_max_size)){
@ -505,8 +504,9 @@ static void encode_chroma_tu(
static void encode_transform_unit(
encoder_state_t * const state,
const cu_loc_t *cu_loc,
bool only_chroma,
const cu_info_t* cur_pu,
lcu_coeff_t* coeff,
bool only_chroma,
enum uvg_tree_type tree_type,
bool last_split,
const cu_loc_t *original_loc) // Original cu dimensions, before CU split
@ -524,7 +524,9 @@ static void encode_transform_unit(
int isp_x = x;
int isp_y = y;
uvg_get_isp_cu_arr_coords(&isp_x, &isp_y);
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y);
if(cur_pu == NULL) {
cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y);
}
int8_t scan_idx = SCAN_DIAG;
@ -540,7 +542,10 @@ static void encode_transform_unit(
// CoeffNxN
// Residual Coding
if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) && !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) {
if(state->encoder_control->cfg.trskip_enable
&& width <= (1 << state->encoder_control->cfg.trskip_max_size)
&& height <= (1 << state->encoder_control->cfg.trskip_max_size)
&& !(cur_pu->type == CU_INTRA && cur_pu->intra.isp_mode != ISP_MODE_NO_ISP)) {
cabac->cur_ctx = &cabac->ctx.transform_skip_model_luma;
CABAC_BIN(cabac, cur_pu->tr_idx == MTS_SKIP, "transform_skip_flag");
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, height, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
@ -561,7 +566,7 @@ static void encode_transform_unit(
}
bool joint_chroma = cur_pu->joint_cb_cr != 0;
if (cur_pu->log2_height + cur_pu->log2_width < 6 && tree_type != UVG_CHROMA_T) {
if (cur_pu->log2_height + cur_pu->log2_width < 6 && tree_type != UVG_CHROMA_T && !only_chroma) {
// For size 4x4 luma transform the corresponding chroma transforms are
// also of size 4x4 covering 8x8 luma pixels. The residual is coded in
// the last transform unit.
@ -597,6 +602,7 @@ static void encode_transform_coeff(
const cu_loc_t * cu_loc,
bool only_chroma,
lcu_coeff_t* coeff,
const cu_info_t* cur_tu,
enum uvg_tree_type tree_type,
bool last_split,
bool can_skip_last_cbf,
@ -604,10 +610,6 @@ static void encode_transform_coeff(
const cu_loc_t * const original_loc) // Original dimensions before ISP split
{
cabac_data_t * const cabac = &state->cabac;
int x = cu_loc->x;
int y = cu_loc->y;
const int width = cu_loc->width;
const int height = cu_loc->height;
bool isp_split = cu_loc->x != original_loc->x || cu_loc->y != original_loc->y;
@ -618,20 +620,16 @@ static void encode_transform_coeff(
//const encoder_control_t *const ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, x, y);
// Round coordinates down to a multiple of 8 to get the location of the
// containing CU.
const int x_cu = 8 * (x / 8);
const int y_cu = 8 * (y / 8);
const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, x, y); // TODO: very suspect, chroma cbfs stored in upper left corner, everything else in bottom right for depth 4
if(cur_tu == NULL) {
cur_tu = uvg_cu_array_at_const(used_array, cu_loc->x, cu_loc->y);
}
const bool ver_split = cu_loc->height > TR_MAX_WIDTH;
const bool hor_split = cu_loc->width > TR_MAX_WIDTH;
const int cb_flag_y = tree_type != UVG_CHROMA_T ? cbf_is_set(cur_pu->cbf, COLOR_Y) : 0;
const int cb_flag_u = tree_type != UVG_LUMA_T ?( cur_pu->joint_cb_cr ? (cur_pu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_cu->cbf, COLOR_U)) : 0;
const int cb_flag_v = tree_type != UVG_LUMA_T ? (cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, COLOR_V)) : 0;
const int cb_flag_y = tree_type != UVG_CHROMA_T ? cbf_is_set(cur_tu->cbf, COLOR_Y) : 0;
const int cb_flag_u = tree_type != UVG_LUMA_T ?(cur_tu->joint_cb_cr ? (cur_tu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_tu->cbf, COLOR_U)) : 0;
const int cb_flag_v = tree_type != UVG_LUMA_T ? (cur_tu->joint_cb_cr ? cur_tu->joint_cb_cr & 1 : cbf_is_set(cur_tu->cbf, COLOR_V)) : 0;
if (hor_split || ver_split) {
@ -647,9 +645,9 @@ static void encode_transform_coeff(
}
cu_loc_t split_cu_loc[4];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc);
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
encode_transform_coeff(state, &split_cu_loc[i], only_chroma, coeff, tree_type, true, luma_cbf_ctx, &split_cu_loc[i]);
encode_transform_coeff(state, &split_cu_loc[i], only_chroma, coeff, NULL, tree_type, true, luma_cbf_ctx, &split_cu_loc[i]);
}
return;
}
@ -658,7 +656,7 @@ static void encode_transform_coeff(
// Not the last CU for area of 64 pixels cowered by more than one luma CU.
// Not the last ISP Split
if (state->encoder_control->chroma_format != UVG_CSP_400
&& (cur_pu->log2_height + cur_pu->log2_width >= 6 || only_chroma)
&& (cur_tu->log2_height + cur_tu->log2_width >= 6 || only_chroma)
&& tree_type != UVG_LUMA_T
&& last_split) {
cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
@ -684,22 +682,22 @@ static void encode_transform_coeff(
// - transform depth > 0
// - we have chroma coefficients at this level
// When it is not present, it is inferred to be 1.
if ((cur_cu->type == CU_INTRA || !PU_IS_TU(cur_cu) || cb_flag_u || cb_flag_v) && !only_chroma && tree_type != UVG_CHROMA_T) {
if ((cur_tu->type == CU_INTRA || !PU_IS_TU(cur_tu) || cb_flag_u || cb_flag_v) && !only_chroma && tree_type != UVG_CHROMA_T) {
if (can_skip_last_cbf && isp_split && last_split) {
// Do not write luma cbf if first three isp splits have luma cbf 0
} else {
cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_luma[*luma_cbf_ctx]);
CABAC_BIN(cabac, cb_flag_y, "cbf_luma");
if (PU_IS_TU(cur_cu)) {
if (PU_IS_TU(cur_tu)) {
*luma_cbf_ctx = 2 + cb_flag_y;
}
}
}
if (cb_flag_y | cb_flag_u | cb_flag_v) {
if (state->must_code_qp_delta && (only_chroma || cb_flag_y || cur_pu->log2_height + cur_pu->log2_width >= 6) ) {
const int qp_pred = uvg_get_cu_ref_qp(state, x_cu, y_cu, state->last_qp);
const int qp_delta = cur_cu->qp - qp_pred;
if (state->must_code_qp_delta && (only_chroma || cb_flag_y || cur_tu->log2_height + cur_tu->log2_width >= 6) ) {
const int qp_pred = uvg_get_cu_ref_qp(state, cu_loc->x, cu_loc->y, state->last_qp);
const int qp_delta = cur_tu->qp - qp_pred;
// Possible deltaQP range depends on bit depth as stated in HEVC specification.
assert(qp_delta >= UVG_QP_DELTA_MIN && qp_delta <= UVG_QP_DELTA_MAX && "QP delta not in valid range.");
@ -722,18 +720,18 @@ static void encode_transform_coeff(
}
if((
((cb_flag_u || cb_flag_v )
&& cur_cu->type == CU_INTRA)
&& cur_tu->type == CU_INTRA)
|| (cb_flag_u && cb_flag_v))
&& (cur_pu->log2_height + cur_pu->log2_width >= 6 || only_chroma || tree_type == UVG_CHROMA_T)
&& (cur_tu->log2_height + cur_tu->log2_width >= 6 || only_chroma || tree_type == UVG_CHROMA_T)
&& state->encoder_control->cfg.jccr
&& last_split
) {
assert(cur_pu->joint_cb_cr < 4 && "JointCbCr is in search state.");
assert(cur_tu->joint_cb_cr < 4 && "JointCbCr is in search state.");
cabac->cur_ctx = &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1];
CABAC_BIN(cabac, cur_pu->joint_cb_cr != 0, "tu_joint_cbcr_residual_flag");
CABAC_BIN(cabac, cur_tu->joint_cb_cr != 0, "tu_joint_cbcr_residual_flag");
}
encode_transform_unit(state, cu_loc, only_chroma, coeff, tree_type, last_split, original_loc);
encode_transform_unit(state, cu_loc, only_chroma ? cur_tu : NULL, coeff, only_chroma, tree_type, last_split, original_loc);
}
}
@ -1261,10 +1259,10 @@ uint8_t uvg_write_split_flag(
bh_split = bv_split = th_split = tv_split = false;
}
else {
bv_split &= cu_width <= state->encoder_control->cfg.max_bt_size[slice_type];
tv_split &= cu_width <= state->encoder_control->cfg.max_tt_size[slice_type];
bh_split &= cu_height <= state->encoder_control->cfg.max_bt_size[slice_type];
th_split &= cu_height <= state->encoder_control->cfg.max_tt_size[slice_type];
bv_split &= cu_width <= state->encoder_control->cfg.max_bt_size[slice_type] && cu_width > state->encoder_control->cfg.min_qt_size[slice_type];
tv_split &= cu_width <= state->encoder_control->cfg.max_tt_size[slice_type] && cu_width > 2 * state->encoder_control->cfg.min_qt_size[slice_type];
bh_split &= cu_height <= state->encoder_control->cfg.max_bt_size[slice_type] && cu_height > state->encoder_control->cfg.min_qt_size[slice_type];
th_split &= cu_height <= state->encoder_control->cfg.max_tt_size[slice_type] && cu_height > 2 * state->encoder_control->cfg.min_qt_size[slice_type];
}
bool allow_split = allow_qt | bh_split | bv_split | th_split | tv_split;
@ -1307,12 +1305,12 @@ uint8_t uvg_write_split_flag(
unsigned left_qt_depth = 0;
unsigned top_qt_depth = 0;
if(left_cu) {
while (((left_cu->split_tree >> left_qt_depth) & 7u) == QT_SPLIT) {
while (((left_cu->split_tree >> (left_qt_depth * 3)) & 7u) == QT_SPLIT) {
left_qt_depth++;
}
}
if(above_cu) {
while (((above_cu->split_tree >> top_qt_depth) & 7u) == QT_SPLIT) {
while (((above_cu->split_tree >> (top_qt_depth * 3)) & 7u) == QT_SPLIT) {
top_qt_depth++;
}
}
@ -1353,7 +1351,9 @@ void uvg_encode_coding_tree(
lcu_coeff_t *coeff,
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc,
split_tree_t split_tree)
const cu_loc_t* const chroma_loc,
split_tree_t split_tree,
bool has_chroma)
{
cabac_data_t * const cabac = &state->cabac;
const encoder_control_t * const ctrl = state->encoder_control;
@ -1410,13 +1410,15 @@ void uvg_encode_coding_tree(
NULL);
if (split_flag || border) {
const int half_luma = cu_loc->width / 2;
const split_tree_t new_split_tree = { cur_cu->split_tree, split_tree.current_depth + 1, split_tree.mtt_depth + (split_flag != QT_SPLIT)};
cu_loc_t new_cu_loc[4];
const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc);
uint8_t separate_chroma = 0;
const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc, &separate_chroma);
for (int split = 0; split <splits; ++split) {
uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc[split], new_split_tree);
uvg_encode_coding_tree(state, coeff, tree_type,
&new_cu_loc[split], separate_chroma ? cu_loc : &new_cu_loc[split],
new_split_tree, !separate_chroma || split == splits - 1);
}
return;
}
@ -1595,7 +1597,7 @@ void uvg_encode_coding_tree(
// Code (possible) coeffs to bitstream
if (has_coeffs) {
int luma_cbf_ctx = 0;
encode_transform_coeff(state, cu_loc, 0, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc);
encode_transform_coeff(state, cu_loc, 0, coeff, cur_cu, tree_type, true, false, &luma_cbf_ctx, cu_loc);
}
encode_mts_idx(state, cabac, cur_cu, cu_loc);
@ -1629,29 +1631,28 @@ void uvg_encode_coding_tree(
// Check if last split to write chroma
bool last_split = (i + 1) == split_limit;
encode_transform_coeff(state, &split_loc, 0, coeff, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, cu_loc);
can_skip_last_cbf &= luma_cbf_ctx == 2;
encode_transform_coeff(state, &split_loc, 0, coeff, NULL, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx, cu_loc);
}
}
if (tree_type != UVG_CHROMA_T) {
bool lfnst_written = encode_lfnst_idx(state, cabac, cur_cu, is_local_dual_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc);
encode_lfnst_idx(state, cabac, cur_cu, is_local_dual_tree && state->encoder_control->chroma_format != UVG_CSP_400 ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc);
}
encode_mts_idx(state, cabac, cur_cu, cu_loc);
// For 4x4 the chroma PU/TU is coded after the last
if (state->encoder_control->chroma_format != UVG_CSP_400 &&
((depth == 4 && x % 8 && y % 8) || tree_type == UVG_CHROMA_T) &&
(has_chroma || tree_type == UVG_CHROMA_T) &&
tree_type != UVG_LUMA_T) {
encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, NULL);
// LFNST constraints must be reset here. Otherwise the left over values will interfere when calculating new constraints
cu_info_t* tmp = uvg_cu_array_at((cu_array_t*)used_array, x, y);
cu_info_t* tmp = (cu_info_t*)cur_cu;
tmp->violates_lfnst_constrained_luma = false;
tmp->violates_lfnst_constrained_chroma = false;
tmp->lfnst_last_scan_pos = false;
encode_transform_coeff(state, cu_loc, 1, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc);
encode_transform_coeff(state, chroma_loc, 1, coeff, cur_cu, tree_type, true, false, &luma_cbf_ctx, chroma_loc);
// Write LFNST only once for single tree structure
encode_lfnst_idx(state, cabac, tmp, is_local_dual_tree ? UVG_CHROMA_T : tree_type, COLOR_UV, cu_loc);
encode_lfnst_idx(state, cabac, tmp, is_local_dual_tree ? UVG_CHROMA_T : tree_type, COLOR_UV, chroma_loc);
}
}

View file

@ -54,7 +54,9 @@ void uvg_encode_coding_tree(
lcu_coeff_t *coeff,
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc,
split_tree_t split_tree);
const cu_loc_t* const chroma_loc,
split_tree_t split_tree,
bool has_chroma);
void uvg_encode_ts_residual(encoder_state_t* const state,
cabac_data_t* const cabac,

View file

@ -1125,7 +1125,7 @@ static void uvg_encoder_state_write_bitstream_picture_header(
WRITE_U(stream, 0, 1, "ph_mvd_l1_zero_flag");
}
if (encoder->cfg.jccr) {
if (encoder->cfg.jccr && encoder->chroma_format != UVG_CSP_400) {
WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag");
}
// END PICTURE HEADER

View file

@ -885,11 +885,11 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
split_tree_t split_tree = { 0, 0, 0 };
uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, split_tree);
uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, &start, split_tree, true);
if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, LCU_WIDTH, LCU_WIDTH);
uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, split_tree);
uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, &start, split_tree, true);
}
if (!state->cabac.only_count) {

View file

@ -128,9 +128,9 @@ typedef int16_t coeff_t;
typedef int32_t mv_t;
//#define VERBOSE 1
#define VERBOSE 1
#define UVG_DEBUG_PRINT_CABAC 1
//#define UVG_DEBUG 1
#define UVG_DEBUG 1
//#define UVG_DEBUG_PRINT_YUVIEW_CSV 1
//#define UVG_DEBUG_PRINT_MV_INFO 1

View file

@ -916,7 +916,8 @@ static void mip_predict(
}
int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height)
int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height, const
bool account_for_dc_planar)
{
int8_t pred_mode = mode;
if (!is_isp && log2_width != log2_height) {
@ -927,7 +928,7 @@ int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int
pred_mode += (66 - 1);
}
else if (log2_height > log2_width && mode > 66 - modeShift[deltaSize]) {
pred_mode -= (66 - 1);
pred_mode -= (66 - 1) + (account_for_dc_planar ? 2 : 0);
}
}
}
@ -958,7 +959,8 @@ static void intra_predict_regular(
int8_t pred_mode = uvg_wide_angle_correction(mode,
is_isp,
log2_width,
log2_height);
log2_height,
false);
const uvg_intra_ref *used_ref = &refs->ref;
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || isp_mode /*ISP_TODO: replace this fake ISP check*/) {
@ -1817,12 +1819,7 @@ void uvg_intra_recon_cu(
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
cu_loc_t chroma_cu_loc;
if(!recon_luma && recon_chroma) {
uvg_cu_loc_ctor(&chroma_cu_loc, cu_loc->x & ~7, cu_loc->y & ~7, width, height);
cu_loc = &chroma_cu_loc;
}
// Reset CBFs because CBFs might have been set
// for depth earlier
if (recon_luma) {
@ -1846,7 +1843,7 @@ void uvg_intra_recon_cu(
}
cu_loc_t split_cu_loc[4];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc);
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
uvg_intra_recon_cu(state, search_data, &split_cu_loc[i], NULL, lcu, tree_type, recon_luma, recon_chroma);
}
@ -1876,7 +1873,7 @@ void uvg_intra_recon_cu(
}
}
const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP;
const bool has_chroma = recon_chroma && (cu_loc->x % 8 == 0 && cu_loc->y % 8 == 0);
const bool has_chroma = recon_chroma;
// Process a leaf TU.
if (has_luma) {

View file

@ -169,7 +169,8 @@ int8_t uvg_wide_angle_correction(
int_fast8_t mode,
const bool is_isp,
const int log2_width,
const int log2_height);
const int log2_height,
const bool account_for_dc_planar);
// ISP related defines
#define NUM_ISP_MODES 3

View file

@ -380,18 +380,23 @@ double uvg_cu_rd_cost_luma(
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
double sum = 0;
const int half_width = cu_loc->width >> 1;
const int half_height = cu_loc->height >> 1;
cu_loc_t split_cu_loc;
// Recursively process sub-CUs.
enum split_type split;
if (cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) {
split = QT_SPLIT;
}
else if (cu_loc->width > TR_MAX_WIDTH) {
split = BT_VER_SPLIT;
}
else {
split = BT_HOR_SPLIT;
}
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y+ half_height, half_width, half_height);
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
cu_loc_t split_cu_loc[4];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc[i], pred_cu, lcu, isp_cbf);
}
return sum + tr_tree_bits * state->lambda;
}
@ -478,20 +483,12 @@ double uvg_cu_rd_cost_chroma(
const cu_loc_t * const cu_loc)
{
const vector2d_t lcu_px = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 };
const int width = cu_loc->chroma_width;
const int height = cu_loc->chroma_height;
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
double tr_tree_bits = 0;
double coeff_bits = 0;
if (cu_loc->width == 4 && cu_loc->height == 4 && (cu_loc->x % 8 == 0 || cu_loc->y % 8 == 0)) {
// For MAX_PU_DEPTH calculate chroma for previous depth for the first
// block and return 0 cost for all others.
return 0;
}
const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
int u_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 2) >> 1 : cbf_is_set(pred_cu->cbf, COLOR_U);
int v_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 1) : cbf_is_set(pred_cu->cbf, COLOR_V);
@ -499,18 +496,22 @@ double uvg_cu_rd_cost_chroma(
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
double sum = 0;
// Recursively process sub-CUs.
const int half_width = cu_loc->width >> 1;
const int half_height = cu_loc->height >> 1;
cu_loc_t split_cu_loc;
enum split_type split;
if (cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) {
split = QT_SPLIT;
}
else if (cu_loc->width > TR_MAX_WIDTH) {
split = BT_VER_SPLIT;
}
else {
split = BT_HOR_SPLIT;
}
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
cu_loc_t split_cu_loc[4];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc[i]);
}
return sum + tr_tree_bits * state->lambda;
}
@ -544,10 +545,10 @@ double uvg_cu_rd_cost_chroma(
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
int ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
cu_loc->chroma_width);
int ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
cu_loc->chroma_width);
ssd = ssd_u + ssd_v;
}
@ -580,7 +581,9 @@ static double cu_rd_cost_tr_split_accurate(
lcu_t* const lcu,
enum uvg_tree_type tree_type,
uint8_t isp_cbf,
const cu_loc_t* const cu_loc) {
const cu_loc_t* const cu_loc,
const cu_loc_t* const chroma_loc,
bool has_chroma) {
const int width = cu_loc->width;
const int height = cu_loc->height; // TODO: height for non-square blocks
@ -590,8 +593,6 @@ static double cu_rd_cost_tr_split_accurate(
double coeff_bits = 0;
double tr_tree_bits = 0;
const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, COLOR_U);
const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, COLOR_V);
@ -610,22 +611,24 @@ static double cu_rd_cost_tr_split_accurate(
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
double sum = 0;
const int half_width = cu_loc->width >> 1;
const int half_height = cu_loc->height >> 1;
cu_loc_t split_cu_loc;
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
enum split_type split;
if(cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) {
split = QT_SPLIT;
} else if(cu_loc->width > TR_MAX_WIDTH) {
split = BT_VER_SPLIT;
} else {
split = BT_HOR_SPLIT;
}
cu_loc_t split_cu_loc[4];
const int split_count= uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc[i], &split_cu_loc[i], has_chroma);
}
return sum + tr_tree_bits * state->lambda;
}
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (cu_loc->x % 8 && cu_loc->y % 8)) && tree_type != UVG_LUMA_T;
has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && has_chroma && tree_type != UVG_LUMA_T;
if (!skip_residual_coding && has_chroma) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
@ -712,7 +715,7 @@ static double cu_rd_cost_tr_split_accurate(
}
}
const bool is_local_sep_tree = pred_cu->log2_width + pred_cu->log2_height < 6 && tree_type == UVG_BOTH_T;
const bool is_local_sep_tree = (cu_loc->width != chroma_loc->width || cu_loc->height != chroma_loc->height) && state->encoder_control->chroma_format != UVG_CSP_400;
if(is_local_sep_tree || tree_type == UVG_LUMA_T) {
@ -738,11 +741,11 @@ static double cu_rd_cost_tr_split_accurate(
unsigned chroma_ssd = 0;
if(has_chroma) {
cu_loc_t chroma_loc;
const vector2d_t lcu_px = { (cu_loc->local_x >> 1) & ~3, (cu_loc->local_y >> 1) &~3 };
uvg_cu_loc_ctor(&chroma_loc, lcu_px.x, lcu_px.y, width, height);
const int chroma_width = cu_loc->chroma_width;
const int chroma_height = cu_loc->chroma_height; // TODO: height for non-square blocks
cu_loc_t temp_chroma_loc;
const vector2d_t lcu_px = { chroma_loc->local_x >> 1, chroma_loc->local_y >> 1};
uvg_cu_loc_ctor(&temp_chroma_loc, lcu_px.x, lcu_px.y, chroma_loc->width, chroma_loc->height);
const int chroma_width = chroma_loc->chroma_width;
const int chroma_height = chroma_loc->chroma_height;
int8_t scan_order = SCAN_DIAG;
//const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
@ -766,8 +769,8 @@ static double cu_rd_cost_tr_split_accurate(
if(chroma_can_use_tr_skip && cb_flag_v) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
}
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &temp_chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
}
else {
@ -789,7 +792,7 @@ static double cu_rd_cost_tr_split_accurate(
}
const bool is_chroma_tree = is_local_sep_tree || tree_type == UVG_CHROMA_T;
if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, cu_loc)) {
if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, is_chroma_tree ? cu_loc : chroma_loc)) {
const int lfnst_idx = is_chroma_tree ? tr_cu->cr_lfnst_idx : tr_cu->lfnst_idx;
CABAC_FBITS_UPDATE(
cabac,
@ -931,10 +934,11 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map)
static double search_cu(
encoder_state_t* const state,
const cu_loc_t* const cu_loc,
const cu_loc_t* const chroma_loc,
lcu_t* lcu,
enum uvg_tree_type
tree_type,
const split_tree_t split_tree)
enum uvg_tree_type tree_type,
const split_tree_t split_tree,
bool has_chroma)
{
const int depth = split_tree.current_depth;
const encoder_control_t* ctrl = state->encoder_control;
@ -1091,9 +1095,8 @@ static double search_cu(
double intra_cost = intra_search.cost;
if (intra_cost < cost && tree_type != UVG_LUMA_T) {
int8_t intra_mode = intra_search.pred_cu.intra.mode;
// TODO: This heavily relies to square CUs
if ((cur_cu->log2_height + cur_cu->log2_width >= 6 || (x % 8 && y % 8) || tree_type == UVG_CHROMA_T)
if ((has_chroma || tree_type == UVG_CHROMA_T)
&& state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
intra_search.pred_cu.joint_cb_cr = 0;
@ -1104,7 +1107,7 @@ static double search_cu(
}
intra_search.pred_cu.intra.mode_chroma = intra_search.pred_cu.intra.mode;
if (ctrl->cfg.rdo >= 2 || ctrl->cfg.jccr || ctrl->cfg.lfnst) {
uvg_search_cu_intra_chroma(state, cu_loc, lcu, &intra_search, tree_type);
uvg_search_cu_intra_chroma(state, chroma_loc, lcu, &intra_search, tree_type, cu_loc->x != chroma_loc->x || cu_loc->y != chroma_loc->y);
if (intra_search.pred_cu.joint_cb_cr == 0) {
intra_search.pred_cu.joint_cb_cr = 4;
@ -1118,13 +1121,13 @@ static double search_cu(
intra_search.pred_cu.intra.mode_chroma = 0;
}
uvg_intra_recon_cu(state,
&intra_search, cu_loc,
&intra_search, chroma_loc,
&intra_search.pred_cu, lcu,
tree_type,
false,
true);
if(tree_type != UVG_CHROMA_T) {
intra_cost += uvg_cu_rd_cost_chroma(state, &intra_search.pred_cu, lcu, cu_loc);
intra_cost += uvg_cu_rd_cost_chroma(state, &intra_search.pred_cu, lcu, chroma_loc);
}
else {
intra_cost = intra_search.cost;
@ -1178,7 +1181,7 @@ static double search_cu(
bool recon_chroma = true;
bool recon_luma = tree_type != UVG_CHROMA_T;
if ((cur_cu->log2_height + cur_cu->log2_width < 6) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
if ((cur_cu->log2_height + cur_cu->log2_width < 6) || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
recon_chroma = false;
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
@ -1189,12 +1192,12 @@ static double search_cu(
recon_luma, recon_chroma);
if((cur_cu->log2_height + cur_cu->log2_width < 6 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 )
if((cur_cu->log2_height + cur_cu->log2_width < 6 && has_chroma && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 )
|| tree_type == UVG_CHROMA_T) {
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
uvg_intra_recon_cu(state,
&intra_search, cu_loc,
NULL, lcu,
&intra_search, chroma_loc,
cur_cu, lcu,
tree_type,
false,
true);
@ -1279,9 +1282,9 @@ static double search_cu(
// The cabac functions assume chroma locations whereas the search uses luma locations
// for the chroma tree, therefore we need to shift the chroma coordinates here for
// passing to the bit cost calculating functions.
cu_loc_t chroma_loc = *cu_loc;
chroma_loc.y >>= 1;
chroma_loc.x >>= 1;
cu_loc_t separate_tree_chroma_loc = *cu_loc;
separate_tree_chroma_loc.y >>= 1;
separate_tree_chroma_loc.x >>= 1;
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
double bits = 0;
@ -1291,7 +1294,7 @@ static double search_cu(
bits += uvg_mock_encode_coding_unit(
state,
cabac,
tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc,
tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc,
lcu,
cur_cu,
tree_type,
@ -1300,7 +1303,7 @@ static double search_cu(
cost = bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc);
cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc, chroma_loc, has_chroma);
//if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
// cost = inter_zero_coeff_cost;
@ -1335,7 +1338,7 @@ static double search_cu(
// Recursively split all the way to max search depth.
if (can_split_cu) {
const int split_type = depth == 0 ? QT_SPLIT : TT_HOR_SPLIT;
const int split_type = depth == 2 ? TT_HOR_SPLIT : QT_SPLIT;
const split_tree_t new_split = {
split_tree.split_tree | split_type << (split_tree.current_depth * 3),
split_tree.current_depth + 1,
@ -1378,7 +1381,7 @@ static double search_cu(
&state->search_cabac,
left_cu,
above_cu,
tree_type != UVG_CHROMA_T ? cu_loc : &chroma_loc,
tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc,
split_tree,
tree_type,
&split_bits);
@ -1393,11 +1396,17 @@ static double search_cu(
// It is ok to interrupt the search as soon as it is known that
// the split costs at least as much as not splitting.
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type);
cu_loc_t new_cu_loc[4];
const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc);
uint8_t separate_chroma = 0;
const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc, &separate_chroma);
initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type);
for (int split = 0; split < splits; ++split) {
split_cost += search_cu(state, &new_cu_loc[split], &split_lcu, tree_type, new_split);
split_cost += search_cu(state,
&new_cu_loc[split], separate_chroma ? cu_loc : &new_cu_loc[split],
&split_lcu,
tree_type, new_split,
!separate_chroma || split == splits - 1);
// If there is no separate chroma the block will always have chroma, otherwise it is the last block of the split that has the chroma
if (split_cost > cost) {
break;
}
@ -1460,7 +1469,7 @@ static double search_cu(
double mode_bits = calc_mode_bits(state, lcu, cur_cu, cu_loc) + bits;
cost += mode_bits * state->lambda;
cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc);
cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc, chroma_loc, has_chroma);
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));
@ -1724,9 +1733,11 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
double cost = search_cu(
state,
&start,
NULL,
&work_tree,
tree_type,
split_tree);
split_tree,
false);
// Save squared cost for rate control.
if(state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) {
@ -1743,8 +1754,10 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) {
cost = search_cu(
state, &start,
&work_tree,
UVG_CHROMA_T, split_tree);
NULL,
&work_tree, UVG_CHROMA_T,
split_tree,
false);
if (state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) {
uvg_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight += cost * cost;

View file

@ -590,7 +590,7 @@ static double search_intra_trdepth(
}
cu_loc_t split_cu_loc[4];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc);
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
split_cost += search_intra_trdepth(state, &split_cu_loc[i], nosplit_cost, search_data, lcu, tree_type);
}
@ -1418,10 +1418,11 @@ int8_t uvg_search_intra_chroma_rdo(
encoder_state_t * const state,
int8_t num_modes,
lcu_t *const lcu,
const cu_loc_t* const cu_loc,
intra_search_data_t* chroma_data,
int8_t luma_mode,
enum uvg_tree_type tree_type,
const cu_loc_t* const cu_loc)
bool is_separate)
{
const bool reconstruct_chroma = true;
@ -1446,7 +1447,7 @@ int8_t uvg_search_intra_chroma_rdo(
const int offset = ((cu_loc->local_x & ~7) >> 1) + ((cu_loc->local_y & ~7) >> 1)* LCU_WIDTH_C;
int lfnst_modes_to_check[3];
if((cu_loc->width == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) {
if((is_separate || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) {
for (int i = 0; i < 3; ++i) {
lfnst_modes_to_check[i] = i;
}
@ -1528,7 +1529,7 @@ int8_t uvg_search_intra_chroma_rdo(
u_resi,
v_resi,
&chorma_ts_out,
tree_type);
is_separate ? UVG_CHROMA_T : tree_type);
// LFNST constraint failed
if(chorma_ts_out.best_u_index == -1 && chorma_ts_out.best_combined_index == -1) {
@ -1590,7 +1591,8 @@ int8_t uvg_search_cu_intra_chroma(
const cu_loc_t* const cu_loc,
lcu_t *lcu,
intra_search_data_t *search_data,
enum uvg_tree_type tree_type)
enum uvg_tree_type tree_type,
bool is_separate)
{
const cu_info_t *cur_pu = &search_data->pred_cu;
@ -1604,9 +1606,7 @@ int8_t uvg_search_cu_intra_chroma(
break;
}
}
cu_loc_t chroma_loc;
uvg_cu_loc_ctor(&chroma_loc, cu_loc->x & ~7, cu_loc->y & ~7, cu_loc->width, cu_loc->height);
// The number of modes to select for slower chroma search. Luma mode
// is always one of the modes, so 2 means the final decision is made
@ -1638,11 +1638,11 @@ int8_t uvg_search_cu_intra_chroma(
num_modes = search_intra_chroma_rough(state, chroma_data, lcu, intra_mode,
tree_type,
&chroma_loc);
cu_loc);
}
if (num_modes > 1 || state->encoder_control->cfg.jccr) {
uvg_search_intra_chroma_rdo(state, num_modes, lcu, chroma_data, intra_mode, tree_type, &chroma_loc);
uvg_search_intra_chroma_rdo(state, num_modes, lcu, cu_loc, chroma_data, intra_mode, tree_type, is_separate);
}
else if(cur_pu->lfnst_idx) {
chroma_data[0].pred_cu.cr_lfnst_idx = cur_pu->lfnst_idx;

View file

@ -55,7 +55,8 @@ int8_t uvg_search_cu_intra_chroma(
const cu_loc_t* const cu_loc,
lcu_t *lcu,
intra_search_data_t* best_cclm,
enum uvg_tree_type tree_type);
enum uvg_tree_type tree_type,
bool is_separate);
void uvg_search_cu_intra(
encoder_state_t * const state,

View file

@ -2586,7 +2586,7 @@ static void mts_dct_generic(
//const int log2_width_minus2 = uvg_g_convert_to_bit[width];
//const int log2_height_minus2 = uvg_g_convert_to_bit[height];
if(tu->lfnst_idx || tu->cr_lfnst_idx) {
if((tu->lfnst_idx && color == COLOR_Y) || (tu->cr_lfnst_idx && color != COLOR_Y)) {
if ((width == 4 && height > 4) || (width > 4 && height == 4))
{
skip_width = width - 4;
@ -2639,7 +2639,7 @@ static void mts_idct_generic(
const int log2_width_minus1 = uvg_g_convert_to_log2[width] - 1;
const int log2_height_minus1 = uvg_g_convert_to_log2[height] - 1;
if (tu->lfnst_idx || tu->cr_lfnst_idx) {
if ((tu->lfnst_idx && color == COLOR_Y) || (tu->cr_lfnst_idx && color != COLOR_Y)) {
if ((width == 4 && height > 4) || (width > 4 && height == 4)) {
skip_width = width - 4;
skip_height = height - 4;

View file

@ -66,8 +66,7 @@ static void uvg_angular_pred_generic(
const int log2_width = uvg_g_convert_to_log2[width];
const int log2_height = uvg_g_convert_to_log2[height];
// Log2_dim 1 is possible with ISP blocks
assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5));
assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5);
// assert(intra_mode >= 2 && intra_mode <= 66);
static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
@ -249,7 +248,7 @@ static void uvg_angular_pred_generic(
// PDPC
bool PDPC_filter = ((tmp_width >= TR_MIN_WIDTH && tmp_height >= TR_MIN_WIDTH) || channel_type != 0);
bool PDPC_filter = ((tmp_width >= TR_MIN_WIDTH && tmp_height >= TR_MIN_WIDTH) || channel_type != 0) && multi_ref_index == 0;
if (pred_mode > 1 && pred_mode < 67) {
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
PDPC_filter = false;

View file

@ -574,7 +574,7 @@ void uvg_chroma_transform_search(
pred_cu->cr_lfnst_idx);
if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue;
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (cu_loc->width == 4 || tree_type == UVG_CHROMA_T)) {
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) {
bool constraints[2] = { false, false };
uvg_derive_lfnst_constraints(pred_cu, constraints, u_quant_coeff, width, height, NULL, COLOR_U);
if(!is_jccr) {
@ -863,6 +863,8 @@ void uvg_fwd_lfnst(
const uint32_t log2_height = uvg_g_convert_to_log2[height];
int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
bool mts_skip = cur_cu->tr_idx == MTS_SKIP;
// This check is safe for 8x16 cus split with TT, since it is checking the dimensions of the
// last luma CU which will be 8x4, i.e., 3 + 2 < 6
bool is_separate_tree = cur_cu->log2_height + cur_cu->log2_width < 6 || tree_type != UVG_BOTH_T;
bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83]
@ -879,12 +881,12 @@ void uvg_fwd_lfnst(
if (is_cclm_mode) {
intra_mode = cur_cu->intra.mode;
}
if (is_mip) {
if (is_mip && color == COLOR_Y) {
intra_mode = 0; // Set to planar mode
}
assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]");
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height);
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true);
// Transform wide angle mode to intra mode
intra_mode = get_lfnst_intra_mode(wide_adjusted_mode);
@ -1010,12 +1012,12 @@ void uvg_inv_lfnst(
if (is_cclm_mode) {
intra_mode = cur_cu->intra.mip_flag ? 0 : cur_cu->intra.mode;
}
if (is_mip) {
if (is_mip && color == COLOR_Y) {
intra_mode = 0; // Set to planar mode
}
assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]");
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height);
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true);
intra_mode = get_lfnst_intra_mode(wide_adjusted_mode);
@ -1175,6 +1177,7 @@ static void quantize_tr_residual(
cur_pu->log2_width + cur_pu-> log2_height < 6&&
(x % 4 != 0 || y % 4 != 0);
if (handled_elsewhere) {
assert(0);
return;
}
@ -1413,7 +1416,7 @@ void uvg_quantize_lcu_residual(
cu_loc_t split_cu_loc[4];
uint16_t child_cbfs[3];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc);
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
uvg_quantize_lcu_residual(state, luma, chroma, 0, &split_cu_loc[i], NULL, lcu, early_skip, tree_type);
if(i != 0) {