mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
[mtt] Fill chroma data for the whole area covered by the local separate tree chroma cu
This commit is contained in:
parent
412dd20f09
commit
ba0d43d846
2
src/cu.h
2
src/cu.h
|
@ -150,7 +150,7 @@ typedef struct
|
|||
uint8_t mts_last_scan_pos : 1;
|
||||
|
||||
uint8_t violates_lfnst_constrained_luma : 1;
|
||||
uint8_t violates_lfnst_constrained_chroma : 1;
|
||||
uint8_t violates_lfnst_constrained_chroma;
|
||||
uint8_t lfnst_last_scan_pos : 1;
|
||||
uint8_t lfnst_idx : 2;
|
||||
uint8_t cr_lfnst_idx : 2;
|
||||
|
|
|
@ -1438,7 +1438,7 @@ void uvg_encode_coding_tree(
|
|||
|
||||
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, cur_cu->type-1);
|
||||
|
||||
//fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma);
|
||||
//fprintf(stderr, "%4d %4d %2d %2d %d %d\n", x, y, cu_width, cu_height, has_chroma, cur_cu->split_tree);
|
||||
|
||||
if (ctrl->cfg.lossless) {
|
||||
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
|
||||
|
@ -1668,11 +1668,11 @@ void uvg_encode_coding_tree(
|
|||
int8_t luma_dir = uvg_get_co_located_luma_mode(tree_type != UVG_CHROMA_T ? chroma_loc : cu_loc, cu_loc, cur_cu, NULL, frame->cu_array, UVG_CHROMA_T);
|
||||
encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm && uvg_cclm_is_allowed(state, cu_loc, cur_cu, tree_type), luma_dir,NULL);
|
||||
// LFNST constraints must be reset here. Otherwise the left over values will interfere when calculating new constraints
|
||||
cu_info_t* tmp = (cu_info_t*)cur_cu;
|
||||
cu_info_t* tmp = uvg_cu_array_at((cu_array_t *)used_array, chroma_loc->x, chroma_loc->y);
|
||||
tmp->violates_lfnst_constrained_luma = false;
|
||||
tmp->violates_lfnst_constrained_chroma = false;
|
||||
tmp->lfnst_last_scan_pos = false;
|
||||
encode_transform_coeff(state, chroma_loc, 1, coeff, cur_cu, tree_type, true, false, &luma_cbf_ctx, chroma_loc, chroma_loc);
|
||||
encode_transform_coeff(state, chroma_loc, 1, coeff, NULL, tree_type, true, false, &luma_cbf_ctx, chroma_loc, chroma_loc);
|
||||
// Write LFNST only once for single tree structure
|
||||
encode_lfnst_idx(state, cabac, tmp, is_local_dual_tree ? UVG_CHROMA_T : tree_type, COLOR_UV, chroma_loc);
|
||||
}
|
||||
|
|
15
src/intra.c
15
src/intra.c
|
@ -1884,7 +1884,14 @@ void uvg_intra_recon_cu(
|
|||
bool recon_chroma)
|
||||
{
|
||||
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
const vector2d_t lcu_px = { cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T) };
|
||||
const vector2d_t lcu_px = {
|
||||
cu_loc->local_x >>
|
||||
(tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree &&
|
||||
state->frame->slicetype == UVG_SLICE_I),
|
||||
cu_loc->local_y >>
|
||||
(tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree &&
|
||||
state->frame->slicetype == UVG_SLICE_I),
|
||||
};
|
||||
const int8_t width = cu_loc->width;
|
||||
const int8_t height = cu_loc->height;
|
||||
if (cur_cu == NULL) {
|
||||
|
@ -1917,7 +1924,11 @@ void uvg_intra_recon_cu(
|
|||
cu_loc_t split_cu_loc[4];
|
||||
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
|
||||
for (int i = 0; i < split_count; ++i) {
|
||||
uvg_intra_recon_cu(state, search_data, &split_cu_loc[i], NULL, lcu, tree_type, recon_luma, recon_chroma);
|
||||
uvg_intra_recon_cu(
|
||||
state, search_data, &split_cu_loc[i],
|
||||
NULL, lcu,
|
||||
state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I ? tree_type : UVG_BOTH_T,
|
||||
recon_luma, recon_chroma);
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
113
src/search.c
113
src/search.c
|
@ -135,6 +135,14 @@ static INLINE void initialize_partial_work_tree(lcu_t* from, lcu_t *to, const cu
|
|||
uvg_pixels_blit(&from->ref.u[offset], &to->ref.u[offset], chroma_loc->chroma_width, chroma_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
uvg_pixels_blit(&from->ref.v[offset], &to->ref.v[offset], chroma_loc->chroma_width, chroma_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
}
|
||||
if(chroma_loc->local_y != cu_loc->local_y || chroma_loc->local_x != cu_loc->local_x && tree_type == UVG_BOTH_T) {
|
||||
for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += SCU_WIDTH) {
|
||||
for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += SCU_WIDTH) {
|
||||
memset(LCU_GET_CU_AT_PX(to, x, y), 0, sizeof(cu_info_t));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const int y_start = (cu_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4;
|
||||
const int x_start = (cu_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4;
|
||||
|
@ -217,6 +225,8 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void lcu_fill_chroma_cu_info(lcu_t* lcu, const cu_loc_t* const cu_loc);
|
||||
/**
|
||||
* Copy all non-reference CU data from next level to current level.
|
||||
*/
|
||||
|
@ -235,7 +245,20 @@ static void work_tree_copy_up(
|
|||
if (chroma_loc && tree_type != UVG_LUMA_T) {
|
||||
copy_cu_pixels(from, to, chroma_loc, UVG_CHROMA_T);
|
||||
copy_cu_coeffs(chroma_loc, from, to, joint, UVG_CHROMA_T);
|
||||
|
||||
for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += 4) {
|
||||
for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += 4) {
|
||||
cu_info_t* to_cu = LCU_GET_CU_AT_PX(to, x, y);
|
||||
cu_info_t* from_cu = LCU_GET_CU_AT_PX(from, x, y);
|
||||
to_cu->intra.mode_chroma = from_cu->intra.mode_chroma;
|
||||
to_cu->joint_cb_cr = from_cu->joint_cb_cr;
|
||||
to_cu->cr_lfnst_idx = from_cu->cr_lfnst_idx;
|
||||
cbf_copy(&to_cu->cbf, from_cu->cbf, COLOR_U);
|
||||
cbf_copy(&to_cu->cbf, from_cu->cbf, COLOR_V);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -250,6 +273,8 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
|
|||
to->split_tree = cu->split_tree;
|
||||
//to->tr_idx = cu->tr_idx;
|
||||
to->lfnst_idx = cu->lfnst_idx;
|
||||
to->cr_lfnst_idx = cu->cr_lfnst_idx;
|
||||
to->joint_cb_cr = cu->joint_cb_cr;
|
||||
to->lfnst_last_scan_pos = cu->lfnst_last_scan_pos;
|
||||
to->violates_lfnst_constrained_luma = cu->violates_lfnst_constrained_luma;
|
||||
to->violates_lfnst_constrained_chroma = cu->violates_lfnst_constrained_chroma;
|
||||
|
@ -274,23 +299,42 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu)
|
||||
static void lcu_fill_chroma_cu_info(lcu_t *lcu, const cu_loc_t * const cu_loc)
|
||||
{
|
||||
const uint32_t x_mask = ~((MIN(width, TR_MAX_WIDTH))-1);
|
||||
const uint32_t y_mask = ~((MIN(height, TR_MAX_WIDTH))-1);
|
||||
// The bottom right cu will always have the chroma info
|
||||
cu_info_t *bottom_right = LCU_GET_CU_AT_PX(
|
||||
lcu,
|
||||
cu_loc->local_x + cu_loc->width - 1,
|
||||
cu_loc->local_y + cu_loc->height - 1);
|
||||
if(bottom_right->type != CU_INTRA) return;
|
||||
|
||||
|
||||
for(int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += 4 ) {
|
||||
for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += 4) {
|
||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
|
||||
cu->intra.mode_chroma = bottom_right->intra.mode_chroma;
|
||||
cu->joint_cb_cr = bottom_right->joint_cb_cr;
|
||||
cu->cr_lfnst_idx = bottom_right->cr_lfnst_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu, enum
|
||||
uvg_tree_type tree_type)
|
||||
{
|
||||
// Set coeff flags in every CU covered by part_mode in this depth.
|
||||
for (uint32_t y = y_local; y < y_local + height; y += SCU_WIDTH) {
|
||||
for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
||||
for (uint32_t y = 0; y < height; y += SCU_WIDTH) {
|
||||
for (uint32_t x = 0; x < width; x += SCU_WIDTH) {
|
||||
// Use TU top-left CU to propagate coeff flags
|
||||
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & x_mask, y & y_mask);
|
||||
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y);
|
||||
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x_local + (x & ~(TR_MAX_WIDTH - 1)), y_local + (y & ~(TR_MAX_WIDTH - 1)));
|
||||
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y);
|
||||
if (cu_from != cu_to) {
|
||||
// Chroma and luma coeff data is needed for deblocking
|
||||
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_Y);
|
||||
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_U);
|
||||
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_V);
|
||||
if(tree_type != UVG_CHROMA_T) cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_Y);
|
||||
if(tree_type != UVG_LUMA_T) cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_U);
|
||||
if (tree_type != UVG_LUMA_T)cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_V);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1090,7 +1134,6 @@ static double search_cu(
|
|||
if (can_use_intra && !skip_intra) {
|
||||
intra_search.pred_cu = *cur_cu;
|
||||
if(tree_type != UVG_CHROMA_T) {
|
||||
intra_search.pred_cu.joint_cb_cr = 4;
|
||||
uvg_search_cu_intra(state, &intra_search, lcu, tree_type, cu_loc);
|
||||
}
|
||||
#ifdef COMPLETE_PRED_MODE_BITS
|
||||
|
@ -1136,11 +1179,6 @@ static double search_cu(
|
|||
intra_search.pred_cu.intra.mode_chroma = intra_mode;
|
||||
if (ctrl->cfg.rdo >= 2 || ctrl->cfg.jccr || ctrl->cfg.lfnst) {
|
||||
uvg_search_cu_intra_chroma(state, chroma_loc, lcu, &intra_search, intra_mode, tree_type, is_separate_tree);
|
||||
|
||||
if (intra_search.pred_cu.joint_cb_cr == 0) {
|
||||
intra_search.pred_cu.joint_cb_cr = 4;
|
||||
}
|
||||
|
||||
}
|
||||
else if (!intra_search.pred_cu.intra.mip_flag) {
|
||||
intra_search.pred_cu.intra.mode_chroma = intra_mode;
|
||||
|
@ -1221,16 +1259,26 @@ static double search_cu(
|
|||
if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T)
|
||||
|| tree_type == UVG_CHROMA_T) {
|
||||
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
|
||||
lcu_fill_chroma_cu_info(
|
||||
lcu,
|
||||
chroma_loc);
|
||||
uvg_intra_recon_cu(state,
|
||||
&intra_search, chroma_loc,
|
||||
cur_cu, lcu,
|
||||
NULL, lcu,
|
||||
UVG_CHROMA_T,
|
||||
false,
|
||||
true);
|
||||
lcu_fill_cbf(
|
||||
lcu,
|
||||
chroma_loc->local_x,
|
||||
chroma_loc->local_y,
|
||||
chroma_loc->width,
|
||||
chroma_loc->height,
|
||||
cur_cu,
|
||||
UVG_CHROMA_T);
|
||||
} else {
|
||||
assert(cur_cu->cr_lfnst_idx == 0 && "If we don't have separate tree chroma lfnst index must be 0");
|
||||
}
|
||||
if (cur_cu->joint_cb_cr == 4) cur_cu->joint_cb_cr = 0;
|
||||
|
||||
// Set isp split cbfs here
|
||||
const int split_type = intra_search.pred_cu.intra.isp_mode;
|
||||
|
@ -1302,7 +1350,7 @@ static double search_cu(
|
|||
}
|
||||
}
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
|
||||
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cu_height, cur_cu, UVG_BOTH_T);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1369,21 +1417,27 @@ static double search_cu(
|
|||
uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split);
|
||||
can_split_cu &= can_split[1] || can_split[2] || can_split[3] || can_split[4] || can_split[5];
|
||||
|
||||
// Recursively split all the way to max search depth.
|
||||
if (can_split_cu) {
|
||||
|
||||
// If skip mode was selected for the block, skip further search.
|
||||
// Skip mode means there's no coefficients in the block, so splitting
|
||||
// might not give any better results but takes more time to do.
|
||||
// It is ok to interrupt the search as soon as it is known that
|
||||
// the split costs at least as much as not splitting.
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf);
|
||||
if (can_split_cu && (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF)) {
|
||||
lcu_t * split_lcu = MALLOC(lcu_t, 5);
|
||||
enum split_type best_split = 0;
|
||||
double best_split_cost = MAX_DOUBLE;
|
||||
cabac_data_t post_seach_cabac;
|
||||
cabac_data_t best_split_cabac;
|
||||
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
|
||||
// Recursively split all the way to max search depth.
|
||||
for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) {
|
||||
if (!can_split[split_type]
|
||||
|| (tree_type == UVG_CHROMA_T && split_type == TT_HOR_SPLIT && cu_loc->chroma_height == 8)
|
||||
|| (tree_type == UVG_CHROMA_T && split_type == BT_HOR_SPLIT && cu_loc->chroma_height == 4))
|
||||
continue;
|
||||
double split_cost = 0.0;
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf);
|
||||
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
|
||||
|
||||
|
||||
|
@ -1438,12 +1492,6 @@ static double search_cu(
|
|||
state->search_cabac.update = 0;
|
||||
split_cost += split_bits * state->lambda;
|
||||
|
||||
// If skip mode was selected for the block, skip further search.
|
||||
// Skip mode means there's no coefficients in the block, so splitting
|
||||
// might not give any better results but takes more time to do.
|
||||
// It is ok to interrupt the search as soon as it is known that
|
||||
// the split costs at least as much as not splitting.
|
||||
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
|
||||
cu_loc_t new_cu_loc[4];
|
||||
uint8_t separate_chroma = 0;
|
||||
const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc, &separate_chroma);
|
||||
|
@ -1461,9 +1509,7 @@ static double search_cu(
|
|||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
split_cost = INT_MAX;
|
||||
}
|
||||
|
||||
if (split_cost < best_split_cost) {
|
||||
best_split_cost = split_cost;
|
||||
best_split = split_type;
|
||||
|
@ -1492,9 +1538,10 @@ static double search_cu(
|
|||
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(pre_search_cabac));
|
||||
cost = 0;
|
||||
double bits = 0;
|
||||
bool is_implicit = false;
|
||||
uvg_write_split_flag(state, &state->search_cabac,
|
||||
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
|
||||
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, cu_loc, split_tree, tree_type, NULL,
|
||||
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, cu_loc, split_tree, tree_type, &is_implicit,
|
||||
&bits);
|
||||
|
||||
cur_cu->intra = cu_d1->intra;
|
||||
|
|
|
@ -338,7 +338,6 @@ static double search_intra_trdepth(
|
|||
num_transforms = MAX(num_transforms, 2);
|
||||
}
|
||||
pred_cu->intra.mode_chroma = -1;
|
||||
pred_cu->joint_cb_cr = 4;
|
||||
|
||||
const int max_tb_size = TR_MAX_WIDTH;
|
||||
// LFNST search params
|
||||
|
@ -489,7 +488,6 @@ static double search_intra_trdepth(
|
|||
if (reconstruct_chroma) {
|
||||
int8_t luma_mode = pred_cu->intra.mode;
|
||||
pred_cu->intra.mode_chroma = chroma_mode;
|
||||
pred_cu->joint_cb_cr = 4;
|
||||
// TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
|
||||
uvg_intra_recon_cu(
|
||||
state,
|
||||
|
@ -544,7 +542,6 @@ static double search_intra_trdepth(
|
|||
if(reconstruct_chroma) {
|
||||
int8_t luma_mode = pred_cu->intra.mode;
|
||||
pred_cu->intra.mode_chroma = chroma_mode;
|
||||
pred_cu->joint_cb_cr= 4; // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
|
||||
uvg_intra_recon_cu(state,
|
||||
search_data, cu_loc,
|
||||
pred_cu, lcu,
|
||||
|
@ -1623,7 +1620,7 @@ int8_t uvg_search_cu_intra_chroma(
|
|||
chroma_data[i].pred_cu = *cur_pu;
|
||||
chroma_data[i].pred_cu.intra.mode_chroma = num_modes == 1 ? luma_mode : modes[i];
|
||||
chroma_data[i].cost = 0;
|
||||
if(cu_loc->width != 4 && tree_type == UVG_BOTH_T) {
|
||||
if(!is_separate && tree_type == UVG_BOTH_T) {
|
||||
memcpy(chroma_data[i].lfnst_costs, search_data->lfnst_costs, sizeof(double) * 3);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -863,7 +863,7 @@ void uvg_fwd_lfnst(
|
|||
const uint32_t log2_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_height = uvg_g_convert_to_log2[height];
|
||||
int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
|
||||
bool mts_skip = cur_cu->tr_idx == MTS_SKIP;
|
||||
bool mts_skip = cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y;
|
||||
// This check is safe for 8x16 cus split with TT, since it is checking the dimensions of the
|
||||
// last luma CU which will be 8x4, i.e., 3 + 2 < 6
|
||||
bool is_separate_tree = cur_cu->log2_height + cur_cu->log2_width < 6 || tree_type != UVG_BOTH_T;
|
||||
|
@ -1005,7 +1005,7 @@ void uvg_inv_lfnst(
|
|||
const uint32_t log2_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_height = uvg_g_convert_to_log2[height];
|
||||
int8_t intra_mode = (color == COLOR_Y) ? cur_cu->intra.mode : cur_cu->intra.mode_chroma;
|
||||
bool mts_skip = cur_cu->tr_idx == MTS_SKIP;
|
||||
bool mts_skip = cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y;
|
||||
bool is_separate_tree = cur_cu->log2_height + cur_cu->log2_width < 6 || tree_type != UVG_BOTH_T;
|
||||
bool is_cclm_mode = (intra_mode >= 81 && intra_mode <= 83); // CCLM modes are in [81, 83]
|
||||
|
||||
|
|
Loading…
Reference in a new issue