[mtt] Fix isp for MTT

This commit is contained in:
Joose Sainio 2022-12-06 11:23:30 +02:00 committed by Marko Viitanen
parent b988c60dd1
commit 2da1a34ff3
13 changed files with 160 additions and 90 deletions

View file

@ -111,7 +111,7 @@ bool uvg_is_lfnst_allowed(
const cu_info_t* const pred_cu,
enum uvg_tree_type tree_type,
const color_t color,
const cu_loc_t* const cu_loc)
const cu_loc_t* const cu_loc, const lcu_t* const lcu)
{
if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA && PU_IS_TU(pred_cu)) {
const int isp_mode = pred_cu->intra.isp_mode;
@ -121,22 +121,51 @@ bool uvg_is_lfnst_allowed(
bool is_sep_tree = tree_type != UVG_BOTH_T;
bool mip_flag = pred_cu->type == CU_INTRA && color == COLOR_Y ? pred_cu->intra.mip_flag : false;
if ((isp_mode && !uvg_can_use_isp_with_lfnst(cu_width, cu_height, isp_mode, tree_type)) ||
(pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip) ||
if ((isp_mode && !uvg_can_use_isp_with_lfnst(cu_width, cu_height, isp_mode, tree_type) && color == COLOR_Y) ||
(pred_cu->type == CU_INTRA && mip_flag && !can_use_lfnst_with_mip && color == COLOR_Y) ||
(is_sep_tree && MIN(cu_width, cu_height) < 4) ||
(cu_width > (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) || cu_height > (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)))) {
return false;
}
bool luma_flag = tree_type != UVG_CHROMA_T;
bool chroma_flag = tree_type != UVG_LUMA_T;
bool non_zero_coeff_non_ts_corner_8x8 = (luma_flag && pred_cu->violates_lfnst_constrained_luma) || (chroma_flag && pred_cu->violates_lfnst_constrained_chroma);
bool non_zero_coeff_non_ts_corner_8x8 = false;
bool last_scan_pos = false;
bool is_tr_skip = false;
int split_num = color == COLOR_Y && isp_mode ? uvg_get_isp_split_num(cu_width, cu_height, isp_mode, false) : 0;
const videoframe_t* const frame = state->tile->frame;
if (split_num) {
// Constraints for ISP split blocks
for (int i = 0; i < split_num; ++i) {
cu_loc_t split_loc;
uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_width, cu_height, i, isp_mode, false);
int local_split_x = split_loc.x;
int local_split_y = split_loc.y;
uvg_get_isp_cu_arr_coords(&local_split_x, &local_split_y);
const cu_info_t* split_cu = lcu ? LCU_GET_CU_AT_PX(lcu, local_split_x, local_split_y) :
uvg_cu_array_at_const(frame->cu_array, local_split_x, local_split_y);
//if (cbf_is_set(split_cu->cbf, depth, COLOR_Y)) {
// ISP_TODO: remove this if clause altogether if it seems it is not needed
if (true) {
non_zero_coeff_non_ts_corner_8x8 |= (luma_flag && split_cu->violates_lfnst_constrained_luma) || (chroma_flag && split_cu->violates_lfnst_constrained_chroma);
//last_scan_pos |= split_cu->lfnst_last_scan_pos;
last_scan_pos |= true;
}
}
}
else {
non_zero_coeff_non_ts_corner_8x8 |= (luma_flag && pred_cu->violates_lfnst_constrained_luma) || (chroma_flag && pred_cu->violates_lfnst_constrained_chroma);
last_scan_pos |= pred_cu->lfnst_last_scan_pos;
}
if (color == COLOR_Y && pred_cu->tr_idx == MTS_SKIP) {
is_tr_skip = true;
}
if ((!pred_cu->lfnst_last_scan_pos && !isp_mode) || non_zero_coeff_non_ts_corner_8x8 || is_tr_skip) {
if ((!last_scan_pos) || non_zero_coeff_non_ts_corner_8x8 || is_tr_skip) {
return false;
}
return true;
@ -155,7 +184,7 @@ static bool encode_lfnst_idx(
const cu_loc_t* const cu_loc)
{
if (uvg_is_lfnst_allowed(state, pred_cu, tree_type, color, cu_loc)) {
if (uvg_is_lfnst_allowed(state, pred_cu, tree_type, color, cu_loc, NULL)) {
// Getting separate tree bool from block size is a temporary fix until a proper dual tree check is possible (there is no dual tree structure at time of writing this).
// VTM seems to force explicit dual tree structure for small 4x4 blocks
bool is_separate_tree = tree_type != UVG_BOTH_T;
@ -1399,7 +1428,7 @@ void uvg_encode_coding_tree(
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, cur_cu->type-1);
// fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma);
//fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma);
if (ctrl->cfg.lossless) {
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
@ -1611,6 +1640,7 @@ void uvg_encode_coding_tree(
encode_transform_coeff(state, &split_loc,
0, coeff, NULL, tree_type, last_split, can_skip_last_cbf, &luma_cbf_ctx,
cu_loc, is_local_dual_tree ? NULL : chroma_loc);
can_skip_last_cbf &= luma_cbf_ctx == 2;
}
}

View file

@ -47,7 +47,7 @@ bool uvg_is_lfnst_allowed(
const cu_info_t* const pred_cu,
enum uvg_tree_type tree_type,
const color_t color,
const cu_loc_t* const cu_loc);
const cu_loc_t* const cu_loc, const lcu_t* const lcu);
void uvg_encode_coding_tree(
encoder_state_t * const state,

View file

@ -128,9 +128,9 @@ typedef int16_t coeff_t;
typedef int32_t mv_t;
#define VERBOSE 1
//#define VERBOSE 1
#define UVG_DEBUG_PRINT_CABAC 1
#define UVG_DEBUG 1
//#define UVG_DEBUG 1
//#define UVG_DEBUG_PRINT_YUVIEW_CSV 1
//#define UVG_DEBUG_PRINT_MV_INFO 1

View file

@ -300,13 +300,13 @@ bool uvg_cclm_is_allowed(const encoder_state_t* const state, const cu_loc_t * co
}
const cu_info_t* const luma_cu = uvg_cu_array_at_const(state->tile->frame->cu_array, luma_loc->x, luma_loc->y);
uint32_t split = GET_SPLITDATA(luma_cu, 0);
if (split != QT_SPLIT && split != NO_SPLIT) {
return false;
if (split != NO_SPLIT) {
allow = split == QT_SPLIT;
}
if (split != NO_SPLIT && luma_cu->intra.isp_mode != ISP_MODE_NO_ISP) {
return false;
else if (split != NO_SPLIT && luma_cu->intra.isp_mode != ISP_MODE_NO_ISP) {
allow = false;
}
return true;
return allow;
}
@ -943,11 +943,15 @@ static void mip_predict(
}
int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int log2_width, const int log2_height, const
bool account_for_dc_planar)
int8_t uvg_wide_angle_correction(
int_fast8_t mode,
const int log2_width,
const int log2_height,
const
bool account_for_dc_planar)
{
int8_t pred_mode = mode;
if (!is_isp && log2_width != log2_height) {
if (log2_width != log2_height) {
if (mode > 1 && mode <= 66) {
const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
const int deltaSize = abs(log2_width - log2_height);
@ -965,15 +969,17 @@ int8_t uvg_wide_angle_correction(int_fast8_t mode, const bool is_isp, const int
static void intra_predict_regular(
const encoder_state_t* const state,
uvg_intra_references *refs,
const cu_info_t* const cur_cu,
const cu_loc_t* const cu_loc,
const cu_loc_t* const pu_loc,
int_fast8_t mode,
color_t color,
uvg_pixel *dst,
const uint8_t multi_ref_idx,
const uint8_t isp_mode)
{
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width;
const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height;
const int log2_width = uvg_g_convert_to_log2[width];
const int log2_height = uvg_g_convert_to_log2[height];
const uvg_config *cfg = &state->encoder_control->cfg;
@ -983,11 +989,12 @@ static void intra_predict_regular(
uint8_t isp = color == COLOR_Y ? isp_mode : 0;
// Wide angle correction
int8_t pred_mode = uvg_wide_angle_correction(mode,
isp_mode,
log2_width,
log2_height,
false);
int8_t pred_mode = uvg_wide_angle_correction(
mode,
color == COLOR_Y ? cur_cu->log2_width : log2_width,
color == COLOR_Y ? cur_cu->log2_height : log2_height,
false
);
const uvg_intra_ref *used_ref = &refs->ref;
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || isp_mode /*ISP_TODO: replace this fake ISP check*/) {
@ -1019,11 +1026,20 @@ static void intra_predict_regular(
}
if (mode == 0) {
uvg_intra_pred_planar(cu_loc, color, used_ref->top, used_ref->left, dst);
uvg_intra_pred_planar(pu_loc, color, used_ref->top, used_ref->left, dst);
} else if (mode == 1) {
intra_pred_dc(cu_loc, color, used_ref->top, used_ref->left, dst, multi_ref_index);
intra_pred_dc(pu_loc, color, used_ref->top, used_ref->left, dst, multi_ref_index);
} else {
uvg_angular_pred(cu_loc, pred_mode, color, used_ref->top, used_ref->left, dst, multi_ref_index, isp);
uvg_angular_pred(
pu_loc,
pred_mode,
color,
used_ref->top,
used_ref->left,
dst,
multi_ref_index,
isp,
isp_mode == ISP_MODE_HOR ? cu_loc->height : cu_loc->width);
}
// pdpc
@ -1032,7 +1048,7 @@ static void intra_predict_regular(
pdpcCondition &= width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH;
if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL.
{
uvg_pdpc_planar_dc(mode, cu_loc, color, used_ref, dst);
uvg_pdpc_planar_dc(mode, pu_loc, color, used_ref, dst);
}
}
@ -1065,7 +1081,7 @@ void uvg_intra_build_reference_any(
bool is_first_isp_block = isp_mode ? pu_x == cu_x && pu_y == cu_y : false;
assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5));
assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5);
refs->filtered_initialized = false;
uvg_pixel *out_left_ref = &refs->ref.left[0];
@ -1138,11 +1154,8 @@ void uvg_intra_build_reference_any(
px_available_left = height;
}
else {
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4];
// This table does not have values for dimensions less than 4
if (lcu_px.y % 4 != 0) {
px_available_left -= 2;
}
px_available_left = uvg_count_available_edge_cus(cu_loc, lcu, true) * 4;
px_available_left -= pu_loc->y - cu_loc->y;
}
}
else {
@ -1270,7 +1283,8 @@ void uvg_intra_build_reference_any(
px_available_top = width;
}
else {
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4];
px_available_top = uvg_count_available_edge_cus(cu_loc, lcu, false) * 4;
px_available_top -= pu_loc->x - cu_loc->x;
}
}
else {
@ -1343,7 +1357,7 @@ void uvg_intra_build_reference_inner(
bool is_first_isp_block = isp_mode ? pu_x == cu_x && pu_y == cu_y : false;
// Log2_dim 1 is possible with ISP blocks
assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5));
assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5);
refs->filtered_initialized = false;
uvg_pixel * __restrict out_left_ref = &refs->ref.left[0];
@ -1457,11 +1471,8 @@ void uvg_intra_build_reference_inner(
px_available_left = height;
}
else {
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4];
// This table does not have values for dimensions less than 4
if (lcu_px.y % 4 != 0) {
px_available_left -= 2;
}
px_available_left = uvg_count_available_edge_cus(cu_loc, lcu, true) * 4;
px_available_left -= pu_loc->y - cu_loc->y;
}
}
@ -1477,7 +1488,7 @@ void uvg_intra_build_reference_inner(
// Limit the number of available pixels based on block size and dimensions
// of the picture.
px_available_left = MIN(px_available_left, height * 2);
px_available_left = MIN(px_available_left, cu_height * 2);
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
// Copy pixels from coded CUs.
@ -1529,7 +1540,8 @@ void uvg_intra_build_reference_inner(
px_available_top = width;
}
else {
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4];
px_available_top = uvg_count_available_edge_cus(cu_loc, lcu, false) * 4;
px_available_top -= pu_loc->x - cu_loc->x;
}
}
else {
@ -1603,6 +1615,7 @@ void uvg_intra_predict(
const encoder_state_t* const state,
uvg_intra_references* const refs,
const cu_loc_t* const cu_loc,
const cu_loc_t* const pu_loc,
const color_t color,
uvg_pixel* dst,
const intra_search_data_t* data,
@ -1614,10 +1627,10 @@ void uvg_intra_predict(
// TODO: what is this used for?
// const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
bool use_mip = false;
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int x = cu_loc->x;
const int y = cu_loc->y;
const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width;
const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height;
const int x = pu_loc->x;
const int y = pu_loc->y;
int8_t intra_mode = color == COLOR_Y ? data->pred_cu.intra.mode : data->pred_cu.intra.mode_chroma;
if (data->pred_cu.intra.mip_flag) {
if (color == COLOR_Y) {
@ -1633,7 +1646,7 @@ void uvg_intra_predict(
mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed);
}
else {
intra_predict_regular(state, refs, cu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode);
intra_predict_regular(state, refs, &data->pred_cu, cu_loc, pu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode);
}
}
else {
@ -1748,7 +1761,7 @@ void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int bl
if (split_type != ISP_MODE_NO_ISP) {
part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type, is_transform_split);
}
if(split_type == ISP_MODE_VER && block_w < 16 && !is_transform_split) {
if(split_type == ISP_MODE_VER && block_w < 16 && block_h != 4 && !is_transform_split) {
split_idx /= 2;
}
const int offset = part_dim * split_idx;
@ -1818,7 +1831,7 @@ static void intra_recon_tb_leaf(
uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode);
uvg_pixel pred[32 * 32];
uvg_intra_predict(state, &refs, pu_loc, color, pred, search_data, lcu, tree_type);
uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu, tree_type);
const int index = lcu_px.x + lcu_px.y * lcu_width;
uvg_pixel *block = NULL;

View file

@ -134,6 +134,7 @@ void uvg_intra_predict(
const encoder_state_t* const state,
uvg_intra_references* const refs,
const cu_loc_t* const cu_loc,
const cu_loc_t* const pu_loc,
const color_t color,
uvg_pixel* dst,
const intra_search_data_t* data,
@ -168,7 +169,6 @@ uint8_t uvg_get_mip_flag_context(
int8_t uvg_wide_angle_correction(
int_fast8_t mode,
const bool is_isp,
const int log2_width,
const int log2_height,
const bool account_for_dc_planar);

View file

@ -741,7 +741,7 @@ static double cu_rd_cost_tr_split_accurate(
if(is_local_sep_tree || tree_type == UVG_LUMA_T) {
if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc)) {
if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_LUMA_T : tree_type, COLOR_Y, cu_loc, lcu)) {
const int lfnst_idx = tr_cu->lfnst_idx;
CABAC_FBITS_UPDATE(
cabac,
@ -814,7 +814,7 @@ static double cu_rd_cost_tr_split_accurate(
}
const bool is_chroma_tree = is_local_sep_tree || tree_type == UVG_CHROMA_T;
if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, is_chroma_tree ? cu_loc : chroma_loc)) {
if (uvg_is_lfnst_allowed(state, tr_cu, is_local_sep_tree ? UVG_CHROMA_T : tree_type, is_chroma_tree ? COLOR_UV : COLOR_Y, is_chroma_tree ? cu_loc : chroma_loc, lcu)) {
const int lfnst_idx = is_chroma_tree ? tr_cu->cr_lfnst_idx : tr_cu->lfnst_idx;
CABAC_FBITS_UPDATE(
cabac,
@ -1151,7 +1151,7 @@ static double search_cu(
uvg_intra_recon_cu(state,
&intra_search, chroma_loc,
&intra_search.pred_cu, lcu,
tree_type,
is_separate_tree ? UVG_CHROMA_T : tree_type,
false,
true);
if(tree_type != UVG_CHROMA_T) {
@ -1224,7 +1224,7 @@ static double search_cu(
uvg_intra_recon_cu(state,
&intra_search, chroma_loc,
cur_cu, lcu,
tree_type,
UVG_CHROMA_T,
false,
true);
} else {

View file

@ -660,7 +660,7 @@ static int search_intra_chroma_rough(
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
uvg_intra_predict(state, &refs_u, &loc, COLOR_U, pred, &chroma_data[i], lcu, tree_type);
uvg_intra_predict(state, &refs_u, cu_loc, &loc, COLOR_U, pred, &chroma_data[i], lcu, tree_type);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
@ -679,7 +679,7 @@ static int search_intra_chroma_rough(
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
uvg_intra_predict(state, &refs_v, &loc, COLOR_V, pred, &chroma_data[i], lcu, tree_type);
uvg_intra_predict(state, &refs_v, cu_loc, &loc, COLOR_V, pred, &chroma_data[i], lcu, tree_type);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
@ -1026,9 +1026,9 @@ static uint8_t search_intra_rough(
int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels;
search_proxy.pred_cu.intra.mode = 0;
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T);
search_proxy.pred_cu.intra.mode = 1;
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs);
mode_checked[0] = true;
mode_checked[1] = true;
@ -1078,7 +1078,7 @@ static uint8_t search_intra_rough(
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
search_proxy.pred_cu.intra.mode = mode + i*offset;
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T);
}
}
@ -1150,7 +1150,7 @@ static uint8_t search_intra_rough(
for (int block = 0; block < PARALLEL_BLKS; ++block) {
search_proxy.pred_cu.intra.mode = modes_to_check[block + i];
uvg_intra_predict(state, refs, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T);
}
@ -1241,7 +1241,7 @@ static void get_rough_cost_for_2n_modes(
double bits[PARALLEL_BLKS] = { 0 };
for(int mode = 0; mode < num_modes; mode += PARALLEL_BLKS) {
for (int i = 0; i < PARALLEL_BLKS; ++i) {
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T);
uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T);
}
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out);
@ -1482,6 +1482,7 @@ int8_t uvg_search_intra_chroma_rdo(
state,
&refs[COLOR_U - 1],
cu_loc,
cu_loc,
COLOR_U,
u_pred,
&chroma_data[mode_i],
@ -1491,6 +1492,7 @@ int8_t uvg_search_intra_chroma_rdo(
state,
&refs[COLOR_V - 1],
cu_loc,
cu_loc,
COLOR_V,
v_pred,
&chroma_data[mode_i],

View file

@ -60,7 +60,8 @@ static void uvg_angular_pred_avx2(
const uvg_pixel *const in_ref_left,
uvg_pixel *const dst,
const uint8_t multi_ref_idx,
const uint8_t isp_mode)
const uint8_t isp_mode,
const int cu_dim)
{
// ISP_TODO: non-square block implementation, height is passed but not used
const int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;

View file

@ -2605,9 +2605,14 @@ static void mts_dct_generic(
int16_t tmp[32 * 32];
const int32_t shift_1st = log2_width_minus1 + bitdepth - 8;
const int32_t shift_2nd = log2_height_minus1 + 7;
dct_hor(input, tmp, shift_1st, height, 0, skip_width);
dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height);
if (height == 1) {
dct_hor(input, output, shift_1st, height, 0, skip_width);
} else if (width == 1) {
dct_ver(input, output, shift_2nd, width, 0, skip_height);
} else {
dct_hor(input, tmp, shift_1st, height, 0, skip_width);
dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height);
}
}
}
@ -2660,8 +2665,14 @@ static void mts_idct_generic(
const int32_t shift_1st = transform_matrix_shift + 1;
const int32_t shift_2nd = (transform_matrix_shift + max_log2_tr_dynamic_range - 1) - bitdepth;
idct_ver(input, tmp, shift_1st, width, skip_width, skip_height);
idct_hor(tmp, output, shift_2nd, height, 0, skip_width);
if (height == 1) {
idct_hor(input, output, shift_1st, height, 0, skip_width);
} else if (width == 1) {
idct_ver(input, output, shift_2nd, width, 0, skip_height);
} else {
idct_ver(input, tmp, shift_1st, width, skip_width, skip_height);
idct_hor(tmp, output, shift_2nd, height, 0, skip_width);
}
}
}

View file

@ -59,7 +59,8 @@ static void uvg_angular_pred_generic(
const uvg_pixel *const in_ref_left,
uvg_pixel *const dst,
const uint8_t multi_ref_idx,
const uint8_t isp_mode)
const uint8_t isp_mode,
const int cu_dim)
{
int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
@ -141,10 +142,9 @@ static void uvg_angular_pred_generic(
// Pointer for the other reference.
const uvg_pixel *ref_side;
uvg_pixel* work = width == height || vertical_mode ? dst : temp_dst;
const int cu_dim = MAX(width, height);
const int top_ref_length = isp_mode ? width + cu_dim : width << 1;
const int left_ref_length = isp_mode ? height + cu_dim : height << 1;
const int top_ref_length = isp_mode == ISP_MODE_VER ? width + cu_dim : width << 1;
const int left_ref_length = isp_mode == ISP_MODE_HOR ? height + cu_dim : height << 1;
// Set ref_main and ref_side such that, when indexed with 0, they point to
// index 0 in block coordinates.
@ -338,7 +338,7 @@ static void uvg_intra_pred_planar_generic(
const int final_shift = 1 + log2_width + log2_height;
// If ISP is enabled log_dim 1 is possible (limit was previously 2)
assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5));
assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5);
const uvg_pixel top_right = ref_top[width + 1];
const uvg_pixel bottom_left = ref_left[height + 1];

View file

@ -313,15 +313,16 @@ int uvg_quant_cbcr_residual_generic(
uvg_transform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
if(cur_cu->cr_lfnst_idx) {
uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type, state->collocated_luma_mode);
uint8_t lfnst_idx = tree_type == UVG_CHROMA_T ? cur_cu->cr_lfnst_idx : cur_cu->lfnst_idx;
if(lfnst_idx) {
uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, lfnst_idx, coeff, tree_type, state->collocated_luma_mode);
}
if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
{
uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx);
scan_order, cur_cu->type, cur_cu->cbf, lfnst_idx);
}
else if (state->encoder_control->cfg.rdoq_enable && false) {
uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
@ -329,7 +330,7 @@ int uvg_quant_cbcr_residual_generic(
}
else {
uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->cr_lfnst_idx);
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, lfnst_idx);
}
int8_t has_coeffs = 0;
@ -348,8 +349,8 @@ int uvg_quant_cbcr_residual_generic(
// Get quantized residual. (coeff_out -> coeff -> residual)
uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
if (cur_cu->cr_lfnst_idx) {
uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type, state->collocated_luma_mode);
if (lfnst_idx) {
uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, lfnst_idx, coeff, tree_type, state->collocated_luma_mode);
}
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
@ -487,7 +488,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
uvg_transform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
}
const uint8_t lfnst_index = color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;
const uint8_t lfnst_index = tree_type != UVG_CHROMA_T || color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {
// Forward low frequency non-separable transform

View file

@ -52,7 +52,8 @@ typedef void (angular_pred_func)(
const uvg_pixel *const in_ref_left,
uvg_pixel *const dst,
const uint8_t multi_ref_idx,
const uint8_t isp_mode);
const uint8_t isp_mode,
const int cu_dim);
typedef void (intra_pred_planar_func)(
const cu_loc_t* const cu_loc,

View file

@ -571,7 +571,7 @@ void uvg_chroma_transform_search(
SCAN_DIAG,
&u_has_coeffs,
&v_has_coeffs,
pred_cu->cr_lfnst_idx);
tree_type == UVG_CHROMA_T ? pred_cu->cr_lfnst_idx : pred_cu->lfnst_idx);
if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue;
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && tree_type == UVG_CHROMA_T) {
@ -720,7 +720,7 @@ void uvg_chroma_transform_search(
COEFF_ORDER_LINEAR);
}
if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) {
if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc)) {
if(uvg_is_lfnst_allowed(state, pred_cu, UVG_CHROMA_T, COLOR_UV, cu_loc, lcu)) {
const int lfnst_idx = pred_cu->cr_lfnst_idx;
CABAC_FBITS_UPDATE(
&state->search_cabac,
@ -873,7 +873,7 @@ void uvg_fwd_lfnst(
const int scan_order = SCAN_DIAG;
if (lfnst_index && !mts_skip)
if (lfnst_index && !mts_skip && (color == COLOR_Y || is_separate_tree))
{
assert(log2_width != -1 && "LFNST: invalid block width.");
const bool whge3 = width >= 8 && height >= 8;
@ -887,7 +887,12 @@ void uvg_fwd_lfnst(
}
assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]");
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true);
int32_t wide_adjusted_mode = uvg_wide_angle_correction(
intra_mode,
color == COLOR_Y ? cur_cu->log2_width : log2_width,
color == COLOR_Y ? cur_cu->log2_height : log2_height,
true
);
// Transform wide angle mode to intra mode
intra_mode = get_lfnst_intra_mode(wide_adjusted_mode);
@ -1007,7 +1012,7 @@ void uvg_inv_lfnst(
bool is_mip = block_is_mip(cur_cu, color, is_separate_tree);
const int scan_order = SCAN_DIAG;
if (lfnst_index && !mts_skip) {
if (lfnst_index && !mts_skip && (color == COLOR_Y || is_separate_tree)) {
const bool whge3 = width >= 8 && height >= 8;
const uint32_t* scan = whge3 ? uvg_coef_top_left_diag_scan_8x8[log2_width] : uvg_g_sig_last_scan[scan_order][log2_width - 1];
@ -1019,7 +1024,12 @@ void uvg_inv_lfnst(
}
assert(intra_mode < NUM_INTRA_MODE && "LFNST: Invalid intra mode.");
assert(lfnst_index < 3 && "LFNST: Invalid LFNST index. Must be in [0, 2]");
int32_t wide_adjusted_mode = uvg_wide_angle_correction(intra_mode, cur_cu->intra.isp_mode != 0, log2_width, log2_height, true);
int32_t wide_adjusted_mode = uvg_wide_angle_correction(
intra_mode,
color == COLOR_Y ? cur_cu->log2_width : log2_width,
color == COLOR_Y ? cur_cu->log2_height : log2_height,
true
);
intra_mode = get_lfnst_intra_mode(wide_adjusted_mode);
@ -1386,7 +1396,8 @@ void uvg_quantize_lcu_residual(
// Tell clang-analyzer what is up. For some reason it can't figure out from
// asserting just depth.
// Width 2 is possible with ISP blocks // ISP_TODO: no, they actually are not
assert(width == 2 ||
assert(width == 1 ||
width == 2 ||
width == 4 ||
width == 8 ||
width == 16 ||