From 7062697beb366249e79a0ce20207a39e0161ad70 Mon Sep 17 00:00:00 2001 From: siivonek Date: Fri, 19 Aug 2022 15:05:22 +0300 Subject: [PATCH] [isp] Resolve TODOs. Make scan order tables const. --- src/encode_coding_tree.c | 6 +-- src/intra.c | 2 - src/rdo.c | 24 +++++----- src/search_intra.c | 19 ++++---- src/strategies/avx2/dct-avx2.c | 1 - src/strategies/avx2/quant-avx2.c | 1 - src/strategies/generic/dct-generic.c | 6 --- .../generic/encode_coding_tree-generic.c | 4 +- src/strategies/generic/intra-generic.c | 45 +------------------ src/strategies/generic/quant-generic.c | 4 +- src/tables.c | 7 ++- src/tables.h | 2 +- src/transform.c | 1 - 13 files changed, 32 insertions(+), 90 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index dcc0edeb..1b360926 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -58,7 +58,7 @@ bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pr uint8_t mts_type = state->encoder_control->cfg.mts; bool mts_allowed = mts_type == UVG_MTS_BOTH || (pred_cu->type == CU_INTRA ? mts_type == UVG_MTS_INTRA : pred_cu->type == CU_INTER && mts_type == UVG_MTS_INTER); mts_allowed &= cu_width <= max_size && cu_height <= max_size; - //mts_allowed &= !cu.ispMode; // ISP_TODO: Uncomment this when ISP is implemented. + mts_allowed &= pred_cu->type == CU_INTRA ? !pred_cu->intra.isp_mode : true; //mts_allowed &= !cu.sbtInfo; mts_allowed &= !(pred_cu->bdpcmMode && cu_width <= ts_max_size && cu_height <= ts_max_size); mts_allowed &= pred_cu->tr_idx != MTS_SKIP && !pred_cu->violates_mts_coeff_constraint && pred_cu->mts_last_scan_pos ; @@ -233,8 +233,8 @@ void uvg_encode_ts_residual(encoder_state_t* const state, // TODO: log2_cg_size is wrong if width != height const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; - const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); - const uint32_t* scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); + const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); + const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); double bits = 0; diff --git a/src/intra.c b/src/intra.c index a638661c..f0b79d2e 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1497,7 +1497,6 @@ int uvg_get_isp_split_dim(const int width, const int height, const int split_typ non_split_dim_size = height; } - // ISP_TODO: make a define for this. Depends on minimum transform block log2 side length const int min_num_samples = 16; // Minimum allowed number of samples for split block const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1; partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift); @@ -1654,7 +1653,6 @@ void uvg_intra_recon_cu( LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf, }; - // ISP_TODO: does not work with ISP yet, ask Joose when this is relevant. if (recon_luma && depth <= MAX_DEPTH) { cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y); } diff --git a/src/rdo.c b/src/rdo.c index 4f5422bd..160cc0bc 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -1187,8 +1187,8 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_ const coeff_t entropy_coding_maximum = (1 << max_log2_tr_dynamic_range) - 1; - const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); - const uint32_t* scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); + const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); + const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); uint32_t coeff_levels[3]; double coeff_level_error[4]; @@ -1391,14 +1391,13 @@ void uvg_rdoq( { const encoder_control_t * const encoder = state->encoder_control; cabac_data_t * const cabac = &state->cabac; - // ISP_TODO: these dimensions can be removed, they are same as log2_block_dimensions - uint32_t log2_tr_width = uvg_math_floor_log2(width); - uint32_t log2_tr_height = uvg_math_floor_log2(height); - int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); // Represents scaling through forward transform + const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; + const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; + + int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1); // Represents scaling through forward transform uint16_t go_rice_param = 0; uint32_t reg_bins = (width * height * 28) >> 4; - const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; - const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; + int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + color; int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); @@ -1407,8 +1406,8 @@ void uvg_rdoq( const double lambda = color ? state->c_lambda : state->lambda; - const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6]; - const double *err_scale = encoder->scaling_list.error_scale[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6]; + const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6]; + const double *err_scale = encoder->scaling_list.error_scale[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6]; double block_uncoded_cost = 0; @@ -1422,7 +1421,6 @@ void uvg_rdoq( memset(dest_coeff, 0, sizeof(coeff_t) * width * height); - // ISP_TODO: height const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1]; const uint32_t log2_cg_width = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0]; const uint32_t log2_cg_height = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1]; @@ -1430,8 +1428,8 @@ void uvg_rdoq( const uint32_t cg_width = (MIN((uint8_t)TR_MAX_WIDTH, width) >> log2_cg_width); const uint32_t cg_height = (MIN((uint8_t)TR_MAX_WIDTH, height) >> log2_cg_height); - const uint32_t *scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); - const uint32_t *scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); + const uint32_t * const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); + const uint32_t * const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); const uint32_t cg_size = 16; const int32_t shift = 4 >> 1; diff --git a/src/search_intra.c b/src/search_intra.c index ee06077b..cf25936d 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -190,30 +190,27 @@ static void get_cost_dual(encoder_state_t * const state, * \param lcu_px Position of the top left pixel of current CU within current LCU. */ static void derive_mts_constraints(cu_info_t *const pred_cu, - lcu_t *const lcu, const int depth, + lcu_t *const lcu, const int width, const int height, const vector2d_t lcu_px) { - const int width = LCU_WIDTH >> depth; - const int height = width; // ISP_TODO: height - int8_t scan_idx = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); + int8_t scan_idx = SCAN_DIAG; int32_t i; // ToDo: large block support in VVC? uint32_t sig_coeffgroup_flag[32 * 32] = { 0 }; const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; - const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] - + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; // ISP_TODO: height - const uint32_t *scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_block_width, log2_block_height); - const uint32_t *scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_idx, log2_block_width, log2_block_height); + const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1]; + const uint32_t * const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_block_width, log2_block_height); + const uint32_t * const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_idx, log2_block_width, log2_block_height); const coeff_t* coeff = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, lcu_px.x, lcu_px.y)]; signed scan_cg_last = -1; signed scan_pos_last = -1; - // ISP_TODO: height - for (int i = 0; i < width * width; i++) { + for (int i = 0; i < width * height; i++) { if (coeff[scan[i]]) { scan_pos_last = i; sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1; @@ -405,7 +402,7 @@ static double search_intra_trdepth( if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue; - derive_mts_constraints(pred_cu, lcu, depth, lcu_px); + derive_mts_constraints(pred_cu, lcu, width, height, lcu_px); if (pred_cu->tr_idx > 1) { if (pred_cu->violates_mts_coeff_constraint || !pred_cu-> mts_last_scan_pos) { diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c index 35890e91..4197f17a 100644 --- a/src/strategies/avx2/dct-avx2.c +++ b/src/strategies/avx2/dct-avx2.c @@ -1598,7 +1598,6 @@ static void mts_dct_avx2( { tr_type_t type_hor; tr_type_t type_ver; - // ISP_TODO: height passed but not used uvg_get_tr_type(width, height, color, tu, &type_hor, &type_ver, mts_idx); diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index 75a3ff8f..b6d062b0 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -501,7 +501,6 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr __m256i v_coef, q_coefs; __m256i v_quant_coeff_lo, v_quant_coeff_hi; - // ISP_TODO: do these avx common functions need height? scanord_read_vector(coeffs, scan, scan_idx, subpos, width, result_coeffs, 2); v_coef = result_coeffs[0]; diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index c790034f..8798da11 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -739,12 +739,6 @@ static void idct_ ## n ## x ## n ## _generic(int8_t bitdepth, const int16_t *inp partial_butterfly_inverse_ ## n ## _generic(tmp, output, shift_2nd); \ } - -//static void dct_non_square_generic(int8_t bitdepth, const int16_t* input, int16_t* output) -//{ -// // ISP_TODO: non-square transform here -//} - DCT_NXN_GENERIC(4); DCT_NXN_GENERIC(8); DCT_NXN_GENERIC(16); diff --git a/src/strategies/generic/encode_coding_tree-generic.c b/src/strategies/generic/encode_coding_tree-generic.c index 145bafc4..242e86bc 100644 --- a/src/strategies/generic/encode_coding_tree-generic.c +++ b/src/strategies/generic/encode_coding_tree-generic.c @@ -80,8 +80,8 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1]; - const uint32_t* scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); - const uint32_t* scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); + const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); + const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height); // Init base contexts according to block type diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index 14418f35..faf476e1 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -124,7 +124,6 @@ static void uvg_angular_pred_generic( const bool vertical_mode = intra_mode >= 34; // Modes distance to horizontal or vertical mode. const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -((int32_t)pred_mode - 18); - //const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; // Sample displacement per column in fractions of 32. const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; @@ -140,23 +139,6 @@ static void uvg_angular_pred_generic( // Set ref_main and ref_side such that, when indexed with 0, they point to // index 0 in block coordinates. if (sample_disp < 0) { - - // ISP_TODO: might be able to use memcpy instead of loops here, should be a bit faster. - /*if (vertical_mode) { - for (int i = 0; i <= width + 1 + multi_ref_index; i++) { - temp_main[width + i] = in_ref_above[i]; - } - for (int j = 0; j <= height + 1 + multi_ref_index; j++) { - temp_side[height + j] = in_ref_left[j]; - } - } else { - for (int i = 0; i <= width + 1 + multi_ref_index; i++) { - temp_side[width + i] = in_ref_above[i]; - } - for (int j = 0; j <= height + 1 + multi_ref_index; j++) { - temp_main[height + j] = in_ref_left[j]; - } - }*/ memcpy(&temp_above[height], &in_ref_above[0], (width + 2 + multi_ref_index) * sizeof(uvg_pixel)); memcpy(&temp_left[width], &in_ref_left[0], (height + 2 + multi_ref_index) * sizeof(uvg_pixel)); @@ -259,10 +241,6 @@ static void uvg_angular_pred_generic( int filter_threshold = uvg_intra_hor_ver_dist_thres[log2_width]; int dist_from_vert_or_hor = MIN(abs((int32_t)pred_mode - 50), abs((int32_t)pred_mode - 18)); if (dist_from_vert_or_hor > filter_threshold) { - // ISP_TODO: these are introduced in the beginning of this function or am I missing something? - static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; - const int_fast8_t mode_disp = (pred_mode >= 34) ? pred_mode - 50 : 18 - pred_mode; - const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; if ((abs(sample_disp) & 0x1F) != 0) { use_cubic = false; @@ -361,8 +339,8 @@ static void uvg_angular_pred_generic( // Mode is horizontal or vertical, just copy the pixels. // Do not apply PDPC if multi ref line index is other than 0 - // ISP_TODO: do not do PDPC if block is in BDPCM mode - bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0); + // TODO: do not do PDPC if block is in BDPCM mode + bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/); if (do_pdpc) { int scale = (log2_width + log2_height - 2) >> 2; @@ -381,25 +359,6 @@ static void uvg_angular_pred_generic( memcpy(&dst[y * width], &ref_main[1], width * sizeof(uvg_pixel)); } } - // ISP_TODO: there is no reason to run these loops AND then check if PDPC is applied. Do the check first and then run either the normal or PDPC loops - - //for (int_fast32_t y = 0; y < height; ++y) { - // for (int_fast32_t x = 0; x < width; ++x) { - // dst[y * width + x] = ref_main[x + 1]; - // } - // // Do not apply PDPC if multi ref line index is other than 0 - // // ISP_TODO: do not do PDPC if block is in BDPCM mode - // if (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) { - // int scale = (log2_width + log2_height - 2) >> 2; - // const uvg_pixel top_left = ref_main[0]; - // const uvg_pixel left = ref_side[1 + y]; - // for (int i = 0; i < MIN(3 << scale, width); i++) { // ISP_TODO: is one loop enough for PDPC? - // const int wL = 32 >> (2 * i >> scale); - // const uvg_pixel val = dst[y * width + i]; - // dst[y * width + i] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6)); - // } - // } - //} } // Flip the block if this is was a horizontal mode. diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index e0c2744c..3de27958 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -255,7 +255,7 @@ int uvg_quant_cbcr_residual_generic( ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; ALIGNED(64) int16_t combined_residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; - // ISP_TODO: this function is not fully converted to handle non-square blocks + // TODO: this function is not fully converted to handle non-square blocks { int y, x; for (y = 0; y < height; ++y) { @@ -494,7 +494,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state, // Quantize coeffs. (coeff -> coeff_out) if (state->encoder_control->cfg.rdoq_enable && - (width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip) // ISP_TODO: width check here might not be necessary, therefore also height check unnecessary. Investigate. + (width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip) { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0); diff --git a/src/tables.c b/src/tables.c index 0d51f2f4..dec8b467 100644 --- a/src/tables.c +++ b/src/tables.c @@ -2573,7 +2573,7 @@ static const uint32_t const g_scan_order_buffer[32258] = { // Get scan order table based on scan group type (grouped or ungrouped) // and log2 block width and height index -static const uint32_t* g_scan_order[SCAN_GROUP_TYPES][MAX_LOG2_INDEX][MAX_LOG2_INDEX] = +static const uint32_t* const g_scan_order[SCAN_GROUP_TYPES][MAX_LOG2_INDEX][MAX_LOG2_INDEX] = { { { g_scan_order_buffer + 0, g_scan_order_buffer + 1, g_scan_order_buffer + 3, g_scan_order_buffer + 7, g_scan_order_buffer + 15, g_scan_order_buffer + 31, g_scan_order_buffer + 63, }, @@ -2606,16 +2606,15 @@ static const uint32_t* g_scan_order[SCAN_GROUP_TYPES][MAX_LOG2_INDEX][MAX_LOG2_I * * \return Returns pointer to scan order table based on given dimensions. */ -uint32_t* uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h) +const uint32_t* const uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h) { - // ISP_TODO: horizontal and vertical scan types + // TODO: horizontal and vertical scan types assert(scan_type == SCAN_DIAG && "Horizontal and vertical scan not implemented."); if (scan_group == SCAN_GROUP_4X4) { return g_scan_order[scan_group][log2_w][log2_h]; } else { - // ISP_TODO: returning coef group type does not work yet. It will break for non-square blocks return g_scan_order[scan_group][log2_w - 2][log2_h - 2]; } } diff --git a/src/tables.h b/src/tables.h index 0d52ea87..44621251 100644 --- a/src/tables.h +++ b/src/tables.h @@ -143,6 +143,6 @@ extern const uint32_t uvg_g_log2_sbb_size[7 + 1][7 + 1][2]; #define SCAN_GROUP_UNGROUPED 0 #define SCAN_GROUP_4X4 1 -uint32_t* uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h); +const uint32_t* const uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h); #endif //TABLES_H_ diff --git a/src/transform.c b/src/transform.c index 4738f942..ffe3c05b 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1194,7 +1194,6 @@ static void quantize_tr_residual( } if (cfg->lossless) { - // ISP_TODO: is there any sensible case where in and out strides would be different? has_coeffs = bypass_transquant(tr_width, tr_height, lcu_width, // in stride