[dual-tree] WIP simplification

This commit is contained in:
Joose Sainio 2023-02-22 14:48:00 +02:00 committed by Marko Viitanen
parent 0f50caa2d0
commit 146e1cb85e
9 changed files with 129 additions and 168 deletions

View file

@ -276,10 +276,10 @@ cu_array_t * uvg_cu_array_copy_ref(cu_array_t* cua)
* \param dst_y y-coordinate of the top edge of the copied area in dst
* \param src source lcu
*/
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type tree_type)
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src)
{
const int dst_stride = dst->stride >> 2;
const int width = tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C;
const int width = LCU_WIDTH;
for (int y = 0; y < width; y += SCU_WIDTH) {
for (int x = 0; x < width; x += SCU_WIDTH) {
const cu_info_t *from_cu = LCU_GET_CU_AT_PX(src, x, y);
@ -373,11 +373,10 @@ int uvg_get_split_locs(
int uvg_get_implicit_split(
const encoder_state_t* const state,
const cu_loc_t* const cu_loc,
uint8_t max_mtt_depth,
bool uses_chroma_coordinates)
uint8_t max_mtt_depth)
{
bool right_ok = (state->tile->frame->width >> uses_chroma_coordinates) >= cu_loc->x + cu_loc->width;
bool bottom_ok = (state->tile->frame->height >> uses_chroma_coordinates) >= cu_loc->y + cu_loc->height;
bool right_ok = (state->tile->frame->width) >= cu_loc->x + cu_loc->width;
bool bottom_ok = (state->tile->frame->height) >= cu_loc->y + cu_loc->height;
if (right_ok && bottom_ok) return NO_SPLIT;
if (right_ok && max_mtt_depth != 0) return BT_HOR_SPLIT;
@ -387,22 +386,21 @@ int uvg_get_implicit_split(
int uvg_get_possible_splits(const encoder_state_t * const state,
const cu_loc_t * const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6], bool
use_chroma_coordinates)
const cu_loc_t * const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6])
{
const unsigned width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const unsigned height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const unsigned width = cu_loc->width;
const unsigned height = cu_loc->height;
const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1;
const unsigned max_btd =
state->encoder_control->cfg.max_btt_depth[slice_type] + split_tree.implicit_mtt_depth;
const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type] >> (tree_type == UVG_CHROMA_T);
const unsigned min_bt_size = 1 << MIN_SIZE >> (tree_type == UVG_CHROMA_T);
const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type] >> (tree_type == UVG_CHROMA_T);
const unsigned min_tt_size = 1 << MIN_SIZE >> (tree_type == UVG_CHROMA_T);
const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type];
const unsigned min_bt_size = 1 << MIN_SIZE;
const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type];
const unsigned min_tt_size = 1 << MIN_SIZE;
const unsigned min_qt_size = state->encoder_control->cfg.min_qt_size[slice_type];
const enum split_type implicitSplit = uvg_get_implicit_split(state, cu_loc, max_btd, use_chroma_coordinates);
const enum split_type implicitSplit = uvg_get_implicit_split(state, cu_loc, max_btd);
splits[NO_SPLIT] = splits[QT_SPLIT] = splits[BT_HOR_SPLIT] = splits[TT_HOR_SPLIT] = splits[BT_VER_SPLIT] = splits[TT_VER_SPLIT] = true;
bool can_btt = split_tree.mtt_depth < max_btd;
@ -414,7 +412,7 @@ int uvg_get_possible_splits(const encoder_state_t * const state,
if (split_tree.current_depth != 0 && last_split != QT_SPLIT /* && !(width > 64 || height > 64)*/) splits[QT_SPLIT] = false;
if (width <= min_qt_size) splits[QT_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width <= 4) splits[QT_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width <= 8) splits[QT_SPLIT] = false;
if (implicitSplit != NO_SPLIT)
{
@ -422,7 +420,7 @@ int uvg_get_possible_splits(const encoder_state_t * const state,
splits[BT_HOR_SPLIT] = implicitSplit == BT_HOR_SPLIT && height <= max_bt_size;
splits[BT_VER_SPLIT] = implicitSplit == BT_VER_SPLIT && width <= max_bt_size;
if (tree_type == UVG_CHROMA_T && width == 4) splits[BT_VER_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width <= 8) splits[BT_VER_SPLIT] = false;
if (!splits[BT_HOR_SPLIT] && !splits[BT_VER_SPLIT] && !splits[QT_SPLIT]) splits[QT_SPLIT] = true;
return 1;
}
@ -459,23 +457,23 @@ int uvg_get_possible_splits(const encoder_state_t * const state,
// specific check for BT splits
if (height <= min_bt_size) splits[BT_HOR_SPLIT] = false;
if (width > 64 && height <= 64) splits[BT_HOR_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width * height <= 16) splits[BT_HOR_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width * height <= 64) splits[BT_HOR_SPLIT] = false;
if (width <= min_bt_size) splits[BT_VER_SPLIT] = false;
if (width <= 64 && height > 64) splits[BT_VER_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && (width * height <= 16 || width == 4)) splits[BT_VER_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && (width * height <= 64 || width <= 8)) splits[BT_VER_SPLIT] = false;
//if (modeType == MODE_TYPE_INTER && width * height == 32) splits[BT_VER_SPLIT] = splits[BT_HOR_SPLIT] = false;
if (height <= 2 * min_tt_size || height > max_tt_size || width > max_tt_size)
splits[TT_HOR_SPLIT] = false;
if (width > 64 || height > 64) splits[TT_HOR_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width * height <= 16 * 2) splits[TT_HOR_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && width * height <= 64 * 2) splits[TT_HOR_SPLIT] = false;
if (width <= 2 * min_tt_size || width > max_tt_size || height > max_tt_size)
splits[TT_VER_SPLIT] = false;
if (width > 64 || height > 64) splits[TT_VER_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && (width * height <= 16 * 2 || width == 8)) splits[TT_VER_SPLIT] = false;
if (tree_type == UVG_CHROMA_T && (width * height <= 64 * 2 || width <= 16)) splits[TT_VER_SPLIT] = false;
//if (modeType == MODE_TYPE_INTER && width * height == 64) splits[TT_VER_SPLIT] = splits[TT_HOR_SPLIT] = false;
return 0;

View file

@ -203,8 +203,7 @@ int uvg_get_split_locs(
cu_loc_t out[4],
uint8_t* separate_chroma);
int uvg_get_possible_splits(const encoder_state_t* const state,
const cu_loc_t* const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6], bool
use_chroma_coordinates);
const cu_loc_t* const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6]);
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
@ -383,8 +382,7 @@ typedef struct {
cu_info_t cu[LCU_T_CU_WIDTH * LCU_T_CU_WIDTH + 1];
} lcu_t;
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type
tree_type);
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src);
int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left);
int uvg_count_chroma_tree_available_edge_cus(int x, int y, int width, int height, const lcu_t* const lcu, bool left);

View file

@ -475,14 +475,12 @@ static void encode_chroma_tu(
cu_info_t* cur_pu,
int8_t* scan_idx,
lcu_coeff_t* coeff,
uint8_t joint_chroma,
enum
uvg_tree_type tree_type)
uint8_t joint_chroma)
{
int width_c = cu_loc->chroma_width;
int height_c = cu_loc->chroma_height;
int x_local = (cu_loc->x >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C;
int y_local = (cu_loc->y >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C;
int x_local = (cu_loc->x >> 1) % LCU_WIDTH_C;
int y_local = (cu_loc->y >> 1) % LCU_WIDTH_C;
cabac_data_t* const cabac = &state->cabac;
*scan_idx = SCAN_DIAG;
if(!joint_chroma){
@ -615,7 +613,7 @@ static void encode_transform_unit(
if ((chroma_cbf_set || joint_chroma) && last_split && chroma_loc) {
//Need to drop const to get lfnst constraints
// Use original dimensions instead of ISP split dimensions
encode_chroma_tu(state, chroma_loc, (cu_info_t*)cur_pu, &scan_idx, coeff, joint_chroma, tree_type);
encode_chroma_tu(state, chroma_loc, (cu_info_t*)cur_pu, &scan_idx, coeff, joint_chroma);
}
}
@ -657,7 +655,7 @@ static void encode_transform_coeff(
cur_tu = uvg_cu_array_at_const(used_array, x, y);
}
const int tr_limit = (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T));
const int tr_limit = TR_MAX_WIDTH;
const bool ver_split = cu_loc->height > tr_limit;
const bool hor_split = cu_loc->width > tr_limit;
@ -681,10 +679,6 @@ static void encode_transform_coeff(
cu_loc_t split_cu_loc[4];
const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL);
for (int i = 0; i < split_count; ++i) {
if(tree_type == UVG_CHROMA_T) {
split_cu_loc[i].chroma_width = split_cu_loc[i].width;
split_cu_loc[i].chroma_height = split_cu_loc[i].height;
}
encode_transform_coeff(state, &split_cu_loc[i], only_chroma,
coeff, NULL, tree_type, true, false, luma_cbf_ctx, &split_cu_loc[i], chroma_loc ? &split_cu_loc[i] : NULL);
}
@ -1246,12 +1240,12 @@ uint8_t uvg_write_split_flag(
// Implisit split flag when on border
// Exception made in VVC with flag not being implicit if the BT can be used for
// horizontal or vertical split, then this flag tells if QT or BT is used
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const int cu_width = cu_loc->width;
const int cu_height = cu_loc->height;
bool can_split[6];
const bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split, tree_type == UVG_CHROMA_T);
const bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split);
bool allow_split = can_split[1] || can_split[2] || can_split[3] || can_split[4] || can_split[5];
@ -1354,11 +1348,11 @@ void uvg_encode_coding_tree(
const videoframe_t * const frame = state->tile->frame;
const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const int cu_width = cu_loc->width;
const int cu_height = cu_loc->height;
const int x = tree_type != UVG_CHROMA_T ? cu_loc->x : chroma_loc->x;
const int y = tree_type != UVG_CHROMA_T ? cu_loc->y : chroma_loc->y;
const int x = cu_loc->x;
const int y = cu_loc->y;
const cu_info_t* cur_cu = uvg_cu_array_at_const(used_array, x, y);
@ -1375,11 +1369,11 @@ void uvg_encode_coding_tree(
// Absolute coordinates
uint16_t abs_x = x + (state->tile->offset_x >> (tree_type == UVG_CHROMA_T));
uint16_t abs_y = y + (state->tile->offset_y >> (tree_type == UVG_CHROMA_T));
uint16_t abs_x = x + state->tile->offset_x;
uint16_t abs_y = y + state->tile->offset_y ;
int32_t frame_width = tree_type != UVG_CHROMA_T ? ctrl->in.width : ctrl->in.width / 2;
int32_t frame_height = tree_type != UVG_CHROMA_T ? ctrl->in.height : ctrl->in.height / 2;
int32_t frame_width = ctrl->in.width;
int32_t frame_height = ctrl->in.height;
// Stop if we are outside of the frame
if (abs_x >= frame_width || abs_y >= frame_height) return;
@ -1412,25 +1406,14 @@ void uvg_encode_coding_tree(
0};
cu_loc_t new_cu_loc[4];
cu_loc_t chroma_tree_loc;
uint8_t separate_chroma = 0;
const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc, &separate_chroma);
separate_chroma |= !has_chroma;
for (int split = 0; split <splits; ++split) {
new_split_tree.part_index = split;
if (tree_type == UVG_CHROMA_T) {
chroma_tree_loc = new_cu_loc[split];
chroma_tree_loc.x >>= 1;
chroma_tree_loc.y >>= 1;
chroma_tree_loc.local_x = chroma_tree_loc.x & LCU_WIDTH_C;
chroma_tree_loc.local_y = chroma_tree_loc.y & LCU_WIDTH_C;
chroma_tree_loc.width >>= 1;
chroma_tree_loc.height >>= 1;
assert(!separate_chroma);
}
uvg_encode_coding_tree(state, coeff, tree_type,
&new_cu_loc[split],
separate_chroma ? chroma_loc :(tree_type == UVG_CHROMA_T ? &chroma_tree_loc : &new_cu_loc[split]),
separate_chroma ? chroma_loc : &new_cu_loc[split],
new_split_tree, !separate_chroma || (split == splits - 1 && has_chroma));
}
return;
@ -1714,8 +1697,8 @@ double uvg_mock_encode_coding_unit(
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T);
int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T);
int x_local = cu_loc->local_x;
int y_local = cu_loc->local_y;
const bool is_separate_tree = chroma_loc == NULL || cu_loc->height != chroma_loc->height || cu_loc->width != chroma_loc->width;
const cu_info_t* left_cu = NULL, *above_cu = NULL;

View file

@ -890,12 +890,6 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
cu_loc_t chroma_tree_loc = start;
chroma_tree_loc.x >>= 1;
chroma_tree_loc.y >>= 1;
chroma_tree_loc.local_x = chroma_tree_loc.x & LCU_WIDTH_C;
chroma_tree_loc.local_y = chroma_tree_loc.y & LCU_WIDTH_C;
chroma_tree_loc.width >>= 1;
chroma_tree_loc.height >>= 1;
uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, &chroma_tree_loc, split_tree, true);
}
@ -1175,6 +1169,12 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
uvg_threadqueue_submit(state->encoder_control->threadqueue, job[0]);
uvg_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[lcu->id]);
#ifdef UVG_DEBUG_PRINT_CABAC
// Ensures that the ctus are encoded in raster scan order
if(i >= state->tile->frame->width_in_lcu) {
uvg_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[(lcu->id / state->tile->frame->width_in_lcu - 1) * state->tile->frame->width_in_lcu]);
}
#endif
}
uvg_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
@ -1307,10 +1307,10 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
if(main_state->encoder_control->cfg.dual_tree){
sub_state->tile->frame->chroma_cu_array = uvg_cu_subarray(
main_state->tile->frame->chroma_cu_array,
offset_x / 2,
offset_y / 2,
sub_state->tile->frame->width_in_lcu * LCU_WIDTH_C,
sub_state->tile->frame->height_in_lcu * LCU_WIDTH_C
offset_x,
offset_y,
sub_state->tile->frame->width_in_lcu * LCU_WIDTH,
sub_state->tile->frame->height_in_lcu * LCU_WIDTH
);
}
}
@ -1949,10 +1949,9 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
if (cfg->dual_tree && state->encoder_control->chroma_format != UVG_CSP_400 && state->frame->is_irap) {
assert(state->tile->frame->chroma_cu_array == NULL);
state->tile->frame->chroma_cu_array = uvg_cu_array_chroma_alloc(
state->tile->frame->width / 2,
state->tile->frame->height / 2,
state->encoder_control->chroma_format
state->tile->frame->chroma_cu_array = uvg_cu_array_alloc(
state->tile->frame->width,
state->tile->frame->height
);
}
// Set pictype.

View file

@ -273,8 +273,6 @@ static bool is_tu_boundary(
color_t color,
enum uvg_tree_type tree_type)
{
x >>= tree_type == UVG_CHROMA_T;
y >>= tree_type == UVG_CHROMA_T;
// if (x & 3 || y & 3) return false;
const cu_info_t *const scu =
uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y);
@ -1081,8 +1079,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
// CUs on both sides of the edge
cu_info_t *cu_p;
cu_info_t *cu_q;
int32_t x_coord = x << (tree_type != UVG_CHROMA_T);
int32_t y_coord = y << (tree_type != UVG_CHROMA_T);
int32_t x_coord = x << 1;
int32_t y_coord = y << 1;
cu_array_t* cua = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
if (dir == EDGE_VER) {
y_coord = (y + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T);

View file

@ -532,9 +532,8 @@ static void predict_cclm(
const lcu_t* const lcu,
uvg_intra_references* chroma_ref,
uvg_pixel* dst,
cclm_parameters_t* cclm_params,
enum uvg_tree_type tree_type
)
cclm_parameters_t* cclm_params
)
{
assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX);
assert(state->encoder_control->cfg.cclm);
@ -552,17 +551,14 @@ static void predict_cclm(
const uvg_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH;
const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
tree_type = state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I ? tree_type : UVG_BOTH_T;
const int ctu_size = tree_type == UVG_CHROMA_T ? LCU_WIDTH_C : LCU_WIDTH;
const int ctu_size = LCU_WIDTH;
if (y0) {
if (y_scu == 0) available_above_right = MIN(MIN(width / 2, (64-x_scu - width * 2) / 4), (state->tile->frame->width - x0 - width* 2) / 4);
for (; available_above_right < width / 2; available_above_right++) {
int x_extension = x_scu + width * 2 + 4 * available_above_right;
x_extension >>= tree_type == UVG_CHROMA_T;
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, (y_scu >> (tree_type==UVG_CHROMA_T)) - 4);
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, (y_scu) - 4);
if (x_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break;
}
if(y_scu == 0) {
@ -588,8 +584,7 @@ static void predict_cclm(
if (x_scu == 0) available_left_below = MIN(MIN(height / 2, (64 - y_scu - height * 2) / 4), (state->tile->frame->height - y0 - height * 2) / 4);
for (; available_left_below < height / 2; available_left_below++) {
int y_extension = y_scu + height * 2 + 4 * available_left_below;
y_extension >>= tree_type == UVG_CHROMA_T;
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, (x_scu >> (tree_type == UVG_CHROMA_T)) - 4, y_extension);
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, (x_scu) - 4, y_extension);
if (y_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break;
if(x_scu == 32 && y_scu == 0 && pu->log2_height == 6 && pu->log2_width == 6 ) break;
}
@ -1617,9 +1612,8 @@ void uvg_intra_predict(
const color_t color,
uvg_pixel* dst,
const intra_search_data_t* data,
const lcu_t* lcu,
enum uvg_tree_type tree_type
)
const lcu_t* lcu
)
{
const int stride = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
// TODO: what is this used for?
@ -1652,8 +1646,7 @@ void uvg_intra_predict(
if (width != 1 << data->pred_cu.log2_chroma_width || height != 1 << data->pred_cu.log2_chroma_height || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
predict_cclm(
state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst,
(cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1],
tree_type);
(cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1]);
}
else {
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height);
@ -1781,8 +1774,7 @@ static void intra_recon_tb_leaf(
const cu_loc_t* cu_loc,
lcu_t *lcu,
color_t color,
const intra_search_data_t* search_data,
enum uvg_tree_type tree_type)
const intra_search_data_t* search_data)
{
const uvg_config *cfg = &state->encoder_control->cfg;
const int shift = color == COLOR_Y ? 0 : 1;
@ -1829,7 +1821,7 @@ static void intra_recon_tb_leaf(
uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode);
uvg_pixel pred[32 * 32];
uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu, tree_type);
uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu);
const int index = lcu_px.x + lcu_px.y * lcu_width;
uvg_pixel *block = NULL;
@ -1883,12 +1875,8 @@ void uvg_intra_recon_cu(
{
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const vector2d_t lcu_px = {
cu_loc->local_x >>
(tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree &&
state->frame->slicetype == UVG_SLICE_I),
cu_loc->local_y >>
(tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree &&
state->frame->slicetype == UVG_SLICE_I),
cu_loc->local_x,
cu_loc->local_y,
};
const int8_t width = cu_loc->width;
const int8_t height = cu_loc->height;
@ -1945,7 +1933,7 @@ void uvg_intra_recon_cu(
uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false);
cur_cu->intra.isp_index = 0;
if(tu_loc.x % 4 == 0) {
intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type);
intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data);
}
uvg_quantize_lcu_residual(state, true, false, false,
&tu_loc, cur_cu, lcu,
@ -1959,11 +1947,11 @@ void uvg_intra_recon_cu(
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data);
}
if (has_chroma) {
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data, tree_type);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data, tree_type);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data);
intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data);
}
// TODO: not necessary to call if only luma and ISP is on
@ -2056,7 +2044,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false);
search_data->pred_cu.intra.isp_index = 0;
if (tu_loc.x % 4 == 0) {
intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, UVG_LUMA_T);
intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data);
}
uvg_quantize_lcu_residual(state, true, false, false,
&tu_loc, &search_data->pred_cu, lcu,

View file

@ -138,9 +138,8 @@ void uvg_intra_predict(
const color_t color,
uvg_pixel* dst,
const intra_search_data_t* data,
const lcu_t* lcu,
enum uvg_tree_type tree_type
);
const lcu_t* lcu
);
void uvg_intra_recon_cu(
encoder_state_t* const state,

View file

@ -67,10 +67,10 @@ static const int INTRA_THRESHOLD = 8;
static INLINE void copy_cu_info(lcu_t *from, lcu_t *to, const cu_loc_t* const cu_loc, enum uvg_tree_type
tree_type)
{
const int y_limit = (cu_loc->local_y + cu_loc->height) >> (tree_type == UVG_CHROMA_T);
const int x_limit = (cu_loc->local_x + cu_loc->width) >> (tree_type == UVG_CHROMA_T);
for (int y = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); y < y_limit; y += SCU_WIDTH) {
for (int x = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); x < x_limit; x += SCU_WIDTH) {
const int y_limit = (cu_loc->local_y + cu_loc->height);
const int x_limit = (cu_loc->local_x + cu_loc->width);
for (int y = cu_loc->local_y ; y < y_limit; y += SCU_WIDTH) {
for (int x = cu_loc->local_x ; x < x_limit; x += SCU_WIDTH) {
*LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y);
}
}
@ -86,8 +86,8 @@ static INLINE void initialize_partial_work_tree(
chroma_loc,
const enum uvg_tree_type tree_type) {
const int y_limit = MIN(LCU_WIDTH, state->tile->frame->height - cu_loc->y / 64 * 64) >> (tree_type == UVG_CHROMA_T);
const int x_limit = MIN(LCU_WIDTH, state->tile->frame->width - cu_loc->x / 64 * 64) >> (tree_type == UVG_CHROMA_T);
const int y_limit = MIN(LCU_WIDTH, state->tile->frame->height - cu_loc->y / 64 * 64);
const int x_limit = MIN(LCU_WIDTH, state->tile->frame->width - cu_loc->x / 64 * 64);
if (cu_loc->local_x == 0) {
to->left_ref = from->left_ref;
@ -150,8 +150,8 @@ static INLINE void initialize_partial_work_tree(
}
const int y_start = (cu_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4;
const int x_start = (cu_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4;
const int y_start = (cu_loc->local_y) - 4;
const int x_start = (cu_loc->local_x) - 4;
for (int y = y_start; y < y_limit; y += SCU_WIDTH) {
*LCU_GET_CU_AT_PX(to, x_start, y) = *LCU_GET_CU_AT_PX(from, x_start, y);
}
@ -159,15 +159,15 @@ static INLINE void initialize_partial_work_tree(
*LCU_GET_CU_AT_PX(to, x, y_start) = *LCU_GET_CU_AT_PX(from, x, y_start);
}
for (int y = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); y < y_limit; y += SCU_WIDTH) {
for (int x = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); x < x_limit; x += SCU_WIDTH) {
for (int y = cu_loc->local_y; y < y_limit; y += SCU_WIDTH) {
for (int x = cu_loc->local_x ; x < x_limit; x += SCU_WIDTH) {
memset(LCU_GET_CU_AT_PX(to, x, y), 0, sizeof(cu_info_t));
}
}
if(chroma_loc->local_y != cu_loc->local_y || chroma_loc->local_x != cu_loc->local_x && tree_type == UVG_BOTH_T) {
const int y_start = (chroma_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4;
const int x_start = (chroma_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4;
const int y_start = (chroma_loc->local_y) - 4;
const int x_start = (chroma_loc->local_x) - 4;
for (int y = y_start; y < y_limit; y += SCU_WIDTH) {
*LCU_GET_CU_AT_PX(to, x_start, y) = *LCU_GET_CU_AT_PX(from, x_start, y);
}
@ -190,24 +190,24 @@ static INLINE void initialize_partial_work_tree(
to->top_ref = from->top_ref;
*LCU_GET_TOP_RIGHT_CU(to) = *LCU_GET_TOP_RIGHT_CU(from);
}
if (x_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) {
if (x_limit != LCU_WIDTH) {
for (int y = y_start; y < y_limit; y += SCU_WIDTH) {
memset(LCU_GET_CU_AT_PX(to, x_limit, y), 0, sizeof(cu_info_t));
}
}
if (y_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) {
if (y_limit != LCU_WIDTH) {
for (int x = x_start; x < x_limit; x += SCU_WIDTH) {
memset(LCU_GET_CU_AT_PX(to, x, y_limit), 0, sizeof(cu_info_t));
}
}
}
else {
if (x_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) {
if (x_limit != LCU_WIDTH) {
for (int y = y_start; y < y_limit; y += SCU_WIDTH) {
memset(LCU_GET_CU_AT_PX(to, x_limit, y), 0, sizeof(cu_info_t));
}
}
if (y_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) {
if (y_limit != LCU_WIDTH) {
for (int x = x_start; x < x_limit; x += SCU_WIDTH) {
memset(LCU_GET_CU_AT_PX(to, x, y_limit), 0, sizeof(cu_info_t));
}
@ -222,10 +222,10 @@ static INLINE void copy_cu_pixels(
enum uvg_tree_type
tree_type)
{
const int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T);
const int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T);
const int x_local = cu_loc->local_x;
const int y_local = cu_loc->local_y;
const int luma_index = x_local + y_local * LCU_WIDTH;
const int chroma_index = tree_type == UVG_CHROMA_T ? x_local + y_local * LCU_WIDTH_C : (x_local / 2) + (y_local / 2) * LCU_WIDTH_C;
const int chroma_index = (x_local / 2) + (y_local / 2) * LCU_WIDTH_C;
if(tree_type != UVG_CHROMA_T) {
uvg_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index],
@ -372,11 +372,11 @@ static void lcu_fill_chroma_cu_info(lcu_t *lcu, const cu_loc_t * const cu_loc)
static void lcu_fill_chroma_cbfs(lcu_t *lcu, const cu_loc_t * const chroma_loc, enum uvg_tree_type tree_type)
{
int8_t height = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_height : chroma_loc->height;
int8_t width = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_width : chroma_loc->width;
int8_t height = chroma_loc->height;
int8_t width = chroma_loc->width;
uint32_t x_local = chroma_loc->local_x;
uint32_t y_local = chroma_loc->local_y;
const int offset = ~((TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) - 1);
const int offset = ~((TR_MAX_WIDTH) - 1);
// Set coeff flags in every CU covered by part_mode in this depth.
for (uint32_t y = 0; y < height; y += SCU_WIDTH) {
for (uint32_t x = 0; x < width; x += SCU_WIDTH) {
@ -728,7 +728,7 @@ static double cu_rd_cost_tr_split_accurate(
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
// cur_cu is used for TU parameters.
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T));
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y);
double coeff_bits = 0;
double tr_tree_bits = 0;
@ -1132,28 +1132,28 @@ static void mark_deblocking(const cu_loc_t* const cu_loc, const cu_loc_t* const
else {
if (chroma_loc->x) {
for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += TR_MAX_WIDTH / 2) {
for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) {
for (int x = x_local; x < x_local + chroma_loc->width; x += TR_MAX_WIDTH) {
for (int y = y_local; y < y_local + chroma_loc->height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER;
}
}
}
else if(chroma_loc->width == 64) {
for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH / 2, y)->chroma_deblocking |= EDGE_VER;
for (int y = y_local; y < y_local + chroma_loc->height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->chroma_deblocking |= EDGE_VER;
}
}
if(chroma_loc->y) {
for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += TR_MAX_WIDTH / 2) {
for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) {
for (int y = y_local; y < y_local + chroma_loc->height; y += TR_MAX_WIDTH) {
for (int x = x_local; x < x_local + chroma_loc->width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR;
}
}
}
else if (chroma_loc->height == 64) {
for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH / 2)->chroma_deblocking |= EDGE_HOR;
for (int x = x_local; x < x_local + chroma_loc->width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->chroma_deblocking |= EDGE_HOR;
}
}
}
@ -1218,8 +1218,8 @@ static double search_cu(
const int depth = split_tree.current_depth;
const encoder_control_t* ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const int cu_width = cu_loc->width;
const int cu_height = cu_loc->height;
const int x = cu_loc->x;
const int y = cu_loc->y;
const int luma_width = cu_loc->width;
@ -1251,8 +1251,8 @@ static double search_cu(
int32_t max;
} pu_depth_inter, pu_depth_intra;
int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T);
int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T);
int x_local = SUB_SCU(x);
int y_local = SUB_SCU(y);
int32_t frame_width = frame->width;
int32_t frame_height = frame->height;
@ -1611,7 +1611,7 @@ static double search_cu(
bits += uvg_mock_encode_coding_unit(
state,
cabac,
tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc,
cu_loc,
is_separate_tree && !has_chroma ? NULL : chroma_loc,
lcu,
cur_cu,
@ -1691,7 +1691,7 @@ static double search_cu(
}
bool can_split[6];
bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split, false);
bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split);
const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1;
const int max_btd = state->encoder_control->cfg.max_btt_depth[slice_type];
@ -1736,7 +1736,8 @@ static double search_cu(
for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) {
if (!can_split[split_type]
|| (tree_type == UVG_CHROMA_T && split_type == TT_HOR_SPLIT && cu_loc->chroma_height == 8)
|| (tree_type == UVG_CHROMA_T && split_type == BT_HOR_SPLIT && cu_loc->chroma_height == 4))
|| (tree_type == UVG_CHROMA_T && split_type == BT_HOR_SPLIT && cu_loc->chroma_height == 4)
)
continue;
if (completely_inside && check_for_early_termission(
@ -1788,7 +1789,7 @@ static double search_cu(
&state->search_cabac,
left_cu,
above_cu,
tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc,
cu_loc,
count_tree,
tree_type,
&is_implicit,
@ -1834,8 +1835,8 @@ static double search_cu(
if (split_type == QT_SPLIT && completely_inside) {
const cu_info_t * const t = LCU_GET_CU_AT_PX(
&split_lcu[0],
new_cu_loc[split].local_x >> (tree_type == UVG_CHROMA_T),
new_cu_loc[split].local_y >> (tree_type == UVG_CHROMA_T));
new_cu_loc[split].local_x,
new_cu_loc[split].local_y);
stop_to_qt |= GET_SPLITDATA(t, depth + 1) == QT_SPLIT;
}
@ -2113,10 +2114,9 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i
// Copy non-reference CUs to picture.
uvg_cu_array_copy_from_lcu(
tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array,
tree_type != UVG_CHROMA_T ? x_px : x_px / 2,
tree_type != UVG_CHROMA_T ? y_px : y_px / 2,
lcu,
tree_type);
x_px,
y_px,
lcu);
// Copy pixels to picture.
{

View file

@ -678,7 +678,7 @@ static int search_intra_chroma_rough(
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
uvg_intra_predict(state, &refs_u, cu_loc, &loc, COLOR_U, pred, &chroma_data[i], lcu, tree_type);
uvg_intra_predict(state, &refs_u, cu_loc, &loc, COLOR_U, pred, &chroma_data[i], lcu);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
@ -697,7 +697,7 @@ static int search_intra_chroma_rough(
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
uvg_intra_predict(state, &refs_v, cu_loc, &loc, COLOR_V, pred, &chroma_data[i], lcu, tree_type);
uvg_intra_predict(state, &refs_v, cu_loc, &loc, COLOR_V, pred, &chroma_data[i], lcu);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
@ -1044,9 +1044,9 @@ static uint8_t search_intra_rough(
int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels;
search_proxy.pred_cu.intra.mode = 0;
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL);
search_proxy.pred_cu.intra.mode = 1;
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs);
mode_checked[0] = true;
mode_checked[1] = true;
@ -1096,7 +1096,7 @@ static uint8_t search_intra_rough(
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
search_proxy.pred_cu.intra.mode = mode + i*offset;
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL);
}
}
@ -1168,7 +1168,7 @@ static uint8_t search_intra_rough(
for (int block = 0; block < PARALLEL_BLKS; ++block) {
search_proxy.pred_cu.intra.mode = modes_to_check[block + i];
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL);
}
@ -1259,7 +1259,7 @@ static void get_rough_cost_for_2n_modes(
double bits[PARALLEL_BLKS] = { 0 };
for(int mode = 0; mode < num_modes; mode += PARALLEL_BLKS) {
for (int i = 0; i < PARALLEL_BLKS; ++i) {
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T);
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL);
}
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
@ -1505,8 +1505,7 @@ int8_t uvg_search_intra_chroma_rdo(
COLOR_U,
u_pred,
&chroma_data[mode_i],
lcu,
tree_type);
lcu);
uvg_intra_predict(
state,
&refs[COLOR_V - 1],
@ -1515,8 +1514,7 @@ int8_t uvg_search_intra_chroma_rdo(
COLOR_V,
v_pred,
&chroma_data[mode_i],
lcu,
tree_type);
lcu);
uvg_generate_residual(
&lcu->ref.u[offset],
u_pred,