[isp] Resolve TODOs. Make scan order tables const.

This commit is contained in:
siivonek 2022-08-19 15:05:22 +03:00 committed by Marko Viitanen
parent 93317cafa4
commit 7062697beb
13 changed files with 32 additions and 90 deletions

View file

@ -58,7 +58,7 @@ bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pr
uint8_t mts_type = state->encoder_control->cfg.mts;
bool mts_allowed = mts_type == UVG_MTS_BOTH || (pred_cu->type == CU_INTRA ? mts_type == UVG_MTS_INTRA : pred_cu->type == CU_INTER && mts_type == UVG_MTS_INTER);
mts_allowed &= cu_width <= max_size && cu_height <= max_size;
//mts_allowed &= !cu.ispMode; // ISP_TODO: Uncomment this when ISP is implemented.
mts_allowed &= pred_cu->type == CU_INTRA ? !pred_cu->intra.isp_mode : true;
//mts_allowed &= !cu.sbtInfo;
mts_allowed &= !(pred_cu->bdpcmMode && cu_width <= ts_max_size && cu_height <= ts_max_size);
mts_allowed &= pred_cu->tr_idx != MTS_SKIP && !pred_cu->violates_mts_coeff_constraint && pred_cu->mts_last_scan_pos ;
@ -233,8 +233,8 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
// TODO: log2_cg_size is wrong if width != height
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1];
const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t* scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
double bits = 0;

View file

@ -1497,7 +1497,6 @@ int uvg_get_isp_split_dim(const int width, const int height, const int split_typ
non_split_dim_size = height;
}
// ISP_TODO: make a define for this. Depends on minimum transform block log2 side length
const int min_num_samples = 16; // Minimum allowed number of samples for split block
const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1;
partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift);
@ -1654,7 +1653,6 @@ void uvg_intra_recon_cu(
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf,
};
// ISP_TODO: does not work with ISP yet, ask Joose when this is relevant.
if (recon_luma && depth <= MAX_DEPTH) {
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y);
}

View file

@ -1187,8 +1187,8 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
const coeff_t entropy_coding_maximum = (1 << max_log2_tr_dynamic_range) - 1;
const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t* scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
uint32_t coeff_levels[3];
double coeff_level_error[4];
@ -1391,14 +1391,13 @@ void uvg_rdoq(
{
const encoder_control_t * const encoder = state->encoder_control;
cabac_data_t * const cabac = &state->cabac;
// ISP_TODO: these dimensions can be removed, they are same as log2_block_dimensions
uint32_t log2_tr_width = uvg_math_floor_log2(width);
uint32_t log2_tr_height = uvg_math_floor_log2(height);
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); // Represents scaling through forward transform
uint16_t go_rice_param = 0;
uint32_t reg_bins = (width * height * 28) >> 4;
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1); // Represents scaling through forward transform
uint16_t go_rice_param = 0;
uint32_t reg_bins = (width * height * 28) >> 4;
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + color;
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
@ -1407,8 +1406,8 @@ void uvg_rdoq(
const double lambda = color ? state->c_lambda : state->lambda;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
const double *err_scale = encoder->scaling_list.error_scale[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6];
const double *err_scale = encoder->scaling_list.error_scale[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6];
double block_uncoded_cost = 0;
@ -1422,7 +1421,6 @@ void uvg_rdoq(
memset(dest_coeff, 0, sizeof(coeff_t) * width * height);
// ISP_TODO: height
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
const uint32_t log2_cg_width = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0];
const uint32_t log2_cg_height = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
@ -1430,8 +1428,8 @@ void uvg_rdoq(
const uint32_t cg_width = (MIN((uint8_t)TR_MAX_WIDTH, width) >> log2_cg_width);
const uint32_t cg_height = (MIN((uint8_t)TR_MAX_WIDTH, height) >> log2_cg_height);
const uint32_t *scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t *scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
const uint32_t * const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t * const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
const uint32_t cg_size = 16;
const int32_t shift = 4 >> 1;

View file

@ -190,30 +190,27 @@ static void get_cost_dual(encoder_state_t * const state,
* \param lcu_px Position of the top left pixel of current CU within current LCU.
*/
static void derive_mts_constraints(cu_info_t *const pred_cu,
lcu_t *const lcu, const int depth,
lcu_t *const lcu, const int width, const int height,
const vector2d_t lcu_px)
{
const int width = LCU_WIDTH >> depth;
const int height = width; // ISP_TODO: height
int8_t scan_idx = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
int8_t scan_idx = SCAN_DIAG;
int32_t i;
// ToDo: large block support in VVC?
uint32_t sig_coeffgroup_flag[32 * 32] = { 0 };
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0]
+ uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; // ISP_TODO: height
const uint32_t *scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_block_width, log2_block_height);
const uint32_t *scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_idx, log2_block_width, log2_block_height);
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0]
+ uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
const uint32_t * const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_block_width, log2_block_height);
const uint32_t * const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_idx, log2_block_width, log2_block_height);
const coeff_t* coeff = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, lcu_px.x, lcu_px.y)];
signed scan_cg_last = -1;
signed scan_pos_last = -1;
// ISP_TODO: height
for (int i = 0; i < width * width; i++) {
for (int i = 0; i < width * height; i++) {
if (coeff[scan[i]]) {
scan_pos_last = i;
sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1;
@ -405,7 +402,7 @@ static double search_intra_trdepth(
if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue;
derive_mts_constraints(pred_cu, lcu, depth, lcu_px);
derive_mts_constraints(pred_cu, lcu, width, height, lcu_px);
if (pred_cu->tr_idx > 1) {
if (pred_cu->violates_mts_coeff_constraint || !pred_cu->
mts_last_scan_pos) {

View file

@ -1598,7 +1598,6 @@ static void mts_dct_avx2(
{
tr_type_t type_hor;
tr_type_t type_ver;
// ISP_TODO: height passed but not used
uvg_get_tr_type(width, height, color, tu, &type_hor, &type_ver, mts_idx);

View file

@ -501,7 +501,6 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
__m256i v_coef, q_coefs;
__m256i v_quant_coeff_lo, v_quant_coeff_hi;
// ISP_TODO: do these avx common functions need height?
scanord_read_vector(coeffs, scan, scan_idx, subpos, width, result_coeffs, 2);
v_coef = result_coeffs[0];

View file

@ -739,12 +739,6 @@ static void idct_ ## n ## x ## n ## _generic(int8_t bitdepth, const int16_t *inp
partial_butterfly_inverse_ ## n ## _generic(tmp, output, shift_2nd); \
}
//static void dct_non_square_generic(int8_t bitdepth, const int16_t* input, int16_t* output)
//{
// // ISP_TODO: non-square transform here
//}
DCT_NXN_GENERIC(4);
DCT_NXN_GENERIC(8);
DCT_NXN_GENERIC(16);

View file

@ -80,8 +80,8 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t* scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
// Init base contexts according to block type

View file

@ -124,7 +124,6 @@ static void uvg_angular_pred_generic(
const bool vertical_mode = intra_mode >= 34;
// Modes distance to horizontal or vertical mode.
const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -((int32_t)pred_mode - 18);
//const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
// Sample displacement per column in fractions of 32.
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
@ -140,23 +139,6 @@ static void uvg_angular_pred_generic(
// Set ref_main and ref_side such that, when indexed with 0, they point to
// index 0 in block coordinates.
if (sample_disp < 0) {
// ISP_TODO: might be able to use memcpy instead of loops here, should be a bit faster.
/*if (vertical_mode) {
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
temp_main[width + i] = in_ref_above[i];
}
for (int j = 0; j <= height + 1 + multi_ref_index; j++) {
temp_side[height + j] = in_ref_left[j];
}
} else {
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
temp_side[width + i] = in_ref_above[i];
}
for (int j = 0; j <= height + 1 + multi_ref_index; j++) {
temp_main[height + j] = in_ref_left[j];
}
}*/
memcpy(&temp_above[height], &in_ref_above[0], (width + 2 + multi_ref_index) * sizeof(uvg_pixel));
memcpy(&temp_left[width], &in_ref_left[0], (height + 2 + multi_ref_index) * sizeof(uvg_pixel));
@ -259,10 +241,6 @@ static void uvg_angular_pred_generic(
int filter_threshold = uvg_intra_hor_ver_dist_thres[log2_width];
int dist_from_vert_or_hor = MIN(abs((int32_t)pred_mode - 50), abs((int32_t)pred_mode - 18));
if (dist_from_vert_or_hor > filter_threshold) {
// ISP_TODO: these are introduced in the beginning of this function or am I missing something?
static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
const int_fast8_t mode_disp = (pred_mode >= 34) ? pred_mode - 50 : 18 - pred_mode;
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
if ((abs(sample_disp) & 0x1F) != 0)
{
use_cubic = false;
@ -361,8 +339,8 @@ static void uvg_angular_pred_generic(
// Mode is horizontal or vertical, just copy the pixels.
// Do not apply PDPC if multi ref line index is other than 0
// ISP_TODO: do not do PDPC if block is in BDPCM mode
bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0);
// TODO: do not do PDPC if block is in BDPCM mode
bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/);
if (do_pdpc) {
int scale = (log2_width + log2_height - 2) >> 2;
@ -381,25 +359,6 @@ static void uvg_angular_pred_generic(
memcpy(&dst[y * width], &ref_main[1], width * sizeof(uvg_pixel));
}
}
// ISP_TODO: there is no reason to run these loops AND then check if PDPC is applied. Do the check first and then run either the normal or PDPC loops
//for (int_fast32_t y = 0; y < height; ++y) {
// for (int_fast32_t x = 0; x < width; ++x) {
// dst[y * width + x] = ref_main[x + 1];
// }
// // Do not apply PDPC if multi ref line index is other than 0
// // ISP_TODO: do not do PDPC if block is in BDPCM mode
// if (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) {
// int scale = (log2_width + log2_height - 2) >> 2;
// const uvg_pixel top_left = ref_main[0];
// const uvg_pixel left = ref_side[1 + y];
// for (int i = 0; i < MIN(3 << scale, width); i++) { // ISP_TODO: is one loop enough for PDPC?
// const int wL = 32 >> (2 * i >> scale);
// const uvg_pixel val = dst[y * width + i];
// dst[y * width + i] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
// }
// }
//}
}
// Flip the block if this is was a horizontal mode.

View file

@ -255,7 +255,7 @@ int uvg_quant_cbcr_residual_generic(
ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) int16_t combined_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
// ISP_TODO: this function is not fully converted to handle non-square blocks
// TODO: this function is not fully converted to handle non-square blocks
{
int y, x;
for (y = 0; y < height; ++y) {
@ -494,7 +494,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
// Quantize coeffs. (coeff -> coeff_out)
if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip) // ISP_TODO: width check here might not be necessary, therefore also height check unnecessary. Investigate.
(width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
{
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);

View file

@ -2573,7 +2573,7 @@ static const uint32_t const g_scan_order_buffer[32258] = {
// Get scan order table based on scan group type (grouped or ungrouped)
// and log2 block width and height index
static const uint32_t* g_scan_order[SCAN_GROUP_TYPES][MAX_LOG2_INDEX][MAX_LOG2_INDEX] =
static const uint32_t* const g_scan_order[SCAN_GROUP_TYPES][MAX_LOG2_INDEX][MAX_LOG2_INDEX] =
{
{
{ g_scan_order_buffer + 0, g_scan_order_buffer + 1, g_scan_order_buffer + 3, g_scan_order_buffer + 7, g_scan_order_buffer + 15, g_scan_order_buffer + 31, g_scan_order_buffer + 63, },
@ -2606,16 +2606,15 @@ static const uint32_t* g_scan_order[SCAN_GROUP_TYPES][MAX_LOG2_INDEX][MAX_LOG2_I
*
* \return Returns pointer to scan order table based on given dimensions.
*/
uint32_t* uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h)
const uint32_t* const uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h)
{
// ISP_TODO: horizontal and vertical scan types
// TODO: horizontal and vertical scan types
assert(scan_type == SCAN_DIAG && "Horizontal and vertical scan not implemented.");
if (scan_group == SCAN_GROUP_4X4) {
return g_scan_order[scan_group][log2_w][log2_h];
}
else {
// ISP_TODO: returning coef group type does not work yet. It will break for non-square blocks
return g_scan_order[scan_group][log2_w - 2][log2_h - 2];
}
}

View file

@ -143,6 +143,6 @@ extern const uint32_t uvg_g_log2_sbb_size[7 + 1][7 + 1][2];
#define SCAN_GROUP_UNGROUPED 0
#define SCAN_GROUP_4X4 1
uint32_t* uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h);
const uint32_t* const uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h);
#endif //TABLES_H_

View file

@ -1194,7 +1194,6 @@ static void quantize_tr_residual(
}
if (cfg->lossless) {
// ISP_TODO: is there any sensible case where in and out strides would be different?
has_coeffs = bypass_transquant(tr_width,
tr_height,
lcu_width, // in stride