diff --git a/src/cu.c b/src/cu.c index 10d99943..3a0f03fa 100644 --- a/src/cu.c +++ b/src/cu.c @@ -97,6 +97,42 @@ cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px) } +void uvg_get_isp_cu_arr_coords(int *x, int *y) +{ + // Do nothing if dimensions are divisible by 4 + if (*y % 4 == 0 && *x % 4 == 0) return; + const int remainder_y = *y % 4; + const int remainder_x = *x % 4; + + if (remainder_y != 0) { + // Horizontal ISP split + if (remainder_y % 2 == 0) { + // 8x2 block + *y -= 2; + *x += 4; + } + else { + // 16x1 block + *y -= remainder_y; + *x += remainder_y * 4; + } + } + else { + // Vertical ISP split + if (*x % 2 == 0) { + // 2x8 block + *y += 4; + *x -= 2; + } + else { + // 1x16 block + *y += remainder_x * 4; + *x -= remainder_x; + } + } +} + + const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px) { assert(x_px < cua->width); diff --git a/src/cu.h b/src/cu.h index dae446c4..de22dd89 100644 --- a/src/cu.h +++ b/src/cu.h @@ -249,6 +249,7 @@ typedef struct cu_array_t { } cu_array_t; cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px); +void uvg_get_isp_cu_arr_coords(int* x, int* y); const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px); cu_array_t * uvg_cu_array_alloc(const int width, const int height); diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 096e4f5c..cd11ddc3 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -542,7 +542,10 @@ static void encode_transform_unit( const uint8_t height_c = cu_loc->chroma_height; cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; - const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, x, y); + int isp_x = x; + int isp_y = y; + uvg_get_isp_cu_arr_coords(&isp_x, &isp_y); + const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, isp_x, isp_y); int8_t scan_idx = uvg_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); @@ -627,13 +630,17 @@ static void encode_transform_coeff( cu_loc_t *original_loc) // Original dimensions before ISP split { cabac_data_t * const cabac = &state->cabac; - const int x = cu_loc->x; - const int y = cu_loc->y; + int x = cu_loc->x; + int y = cu_loc->y; const int width = cu_loc->width; const int height = cu_loc->height; bool isp_split = cu_loc->x != original_loc->x || cu_loc->y != original_loc->y; + if (isp_split) { + uvg_get_isp_cu_arr_coords(&x, &y); + } + //const encoder_control_t *const ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; @@ -643,7 +650,7 @@ static void encode_transform_coeff( // containing CU. const int x_cu = 8 * (x / 8); const int y_cu = 8 * (y / 8); - const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, x, y); + const cu_info_t *cur_cu = uvg_cu_array_at_const(used_array, x, y); // TODO: very suspect, chroma cbfs stored in upper left corner, everything else in bottom right for depth 4 // NxN signifies implicit transform split at the first transform level. // There is a similar implicit split for inter, but it is only used when diff --git a/src/intra.c b/src/intra.c index b1ca6361..2239eeac 100644 --- a/src/intra.c +++ b/src/intra.c @@ -971,6 +971,7 @@ static void intra_predict_regular( // pdpc // bool pdpcCondition = (mode == 0 || mode == 1 || mode == 18 || mode == 50); bool pdpcCondition = (mode == 0 || mode == 1); // Planar and DC + pdpcCondition &= width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH; if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL. { uvg_pdpc_planar_dc(mode, cu_loc, color, used_ref, dst); diff --git a/src/search.c b/src/search.c index ff426dda..5c2f3942 100644 --- a/src/search.c +++ b/src/search.c @@ -1172,8 +1172,12 @@ static double search_cu( for (int i = 0; i < split_num; ++i) { cu_loc_t isp_loc; uvg_get_isp_split_loc(&isp_loc, x, y, cu_width, cu_height, i, split_type); - //search_data->best_isp_cbfs |= cbf_is_set(cur_cu->cbf, depth, COLOR_Y) << (i++); - cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, isp_loc.x % LCU_WIDTH, isp_loc.y % LCU_WIDTH); + // Fetching from CU array does not work for dimensions less than 4 + // Fetch proper x, y coords for isp blocks + int tmp_x = isp_loc.x; + int tmp_y = isp_loc.y; + uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y); + cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH); bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1; cbf_clear(&split_cu->cbf, depth, COLOR_Y); cbf_clear(&split_cu->cbf, depth, COLOR_U); diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c index 57fee201..6e0f10c3 100644 --- a/src/strategies/avx2/intra-avx2.c +++ b/src/strategies/avx2/intra-avx2.c @@ -357,7 +357,7 @@ static void uvg_angular_pred_avx2( // PDPC - bool PDPC_filter = (width >= 4 || channel_type != 0); + bool PDPC_filter = ((width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) || channel_type != 0); if (pred_mode > 1 && pred_mode < 67) { if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL. PDPC_filter = false; diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index 507ed174..db725359 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -1027,7 +1027,7 @@ const int16_t uvg_g_DCT8P16[256] = DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77 const int16_t uvg_g_DCT8P32[1024] = DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4); // ********************************** DCT-2 ********************************** -void fastForwardDCT2_B2(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2) +static void fastForwardDCT2_B2(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2) { int32_t j; int32_t E, O; @@ -1061,7 +1061,7 @@ void fastForwardDCT2_B2(const int16_t* src, int16_t* dst, int32_t shift, int lin } } -void fastInverseDCT2_B2(const int16_t* src, int16_t* dst, int shift, int line, int skip_line, int skip_line2) +static void fastInverseDCT2_B2(const int16_t* src, int16_t* dst, int shift, int line, int skip_line, int skip_line2) { int32_t j; int32_t E, O; diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index 81de7c4b..eff47941 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -302,7 +302,7 @@ static void uvg_angular_pred_generic( // PDPC - bool PDPC_filter = (tmp_width >= 4 || channel_type != 0); + bool PDPC_filter = ((tmp_width >= TR_MIN_WIDTH && tmp_height >= TR_MIN_WIDTH) || channel_type != 0); if (pred_mode > 1 && pred_mode < 67) { if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL. PDPC_filter = false;