diff --git a/src/strategies/generic/dct-generic.c b/src/strategies/generic/dct-generic.c index 03453b90..507ed174 100644 --- a/src/strategies/generic/dct-generic.c +++ b/src/strategies/generic/dct-generic.c @@ -771,6 +771,12 @@ static void fast_inverse_dst_4x4_generic(int8_t bitdepth, const int16_t* input, // DCT-2 +#define DEFINE_DCT2_P2_MATRIX(a) \ +{ \ + a, a, \ + a, -a \ +} + #define DEFINE_DCT2_P4_MATRIX(a,b,c) \ { \ a, a, a, a, \ @@ -1002,6 +1008,7 @@ static void fast_inverse_dst_4x4_generic(int8_t bitdepth, const int16_t* input, } // DCT-2 +const int16_t uvg_g_DCT2P2[4] = DEFINE_DCT2_P2_MATRIX(64); const int16_t uvg_g_DCT2P4[16] = DEFINE_DCT2_P4_MATRIX(64, 83, 36); const int16_t uvg_g_DCT2P8[64] = DEFINE_DCT2_P8_MATRIX(64, 83, 36, 89, 75, 50, 18); const int16_t uvg_g_DCT2P16[256] = DEFINE_DCT2_P16_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9); @@ -1020,6 +1027,68 @@ const int16_t uvg_g_DCT8P16[256] = DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77 const int16_t uvg_g_DCT8P32[1024] = DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4); // ********************************** DCT-2 ********************************** +void fastForwardDCT2_B2(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2) +{ + int32_t j; + int32_t E, O; + int32_t add = (shift > 0) ? (1 << (shift - 1)) : 0; + + const int16_t* iT = uvg_g_DCT2P2; + + int16_t *p_coef = dst; + const int reduced_line = line - skip_line; + for (j = 0; j < reduced_line; j++) + { + /* E and O */ + E = src[0] + src[1]; + O = src[0] - src[1]; + + dst[0] = (iT[0] * E + add) >> shift; + dst[line] = (iT[2] * O + add) >> shift; + + + src += 2; + dst++; + } + if (skip_line) + { + dst = p_coef + reduced_line; + for (j = 0; j < 2; j++) + { + memset(dst, 0, sizeof(int16_t) * skip_line); + dst += line; + } + } +} + +void fastInverseDCT2_B2(const int16_t* src, int16_t* dst, int shift, int line, int skip_line, int skip_line2) +{ + int32_t j; + int32_t E, O; + int32_t add = 1 << (shift - 1); + + const int16_t* iT = uvg_g_DCT2P2; + + const int reduced_line = line - skip_line; + for (j = 0; j < reduced_line; j++) + { + /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ + E = iT[0] * (src[0] + src[line]); + O = iT[2] * (src[0] - src[line]); + + /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */ + dst[0] = (short)CLIP(-32768, 32767, (E + add) >> shift); + dst[1] = (short)CLIP(-32768, 32767, (O + add) >> shift); + + src++; + dst += 2; + } + if (skip_line) + { + memset(dst, 0, (skip_line << 1) * sizeof(int16_t)); + } +} + static void fastForwardDCT2_B4(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2) { int32_t j; @@ -2417,16 +2486,16 @@ DCT_MTS_NXN_GENERIC(DST1, 32); typedef void partial_tr_func(const int16_t*, int16_t*, int32_t, int, int, int); // ToDo: Enable MTS 2x2 and 64x64 transforms -static partial_tr_func* dct_table[3][5] = { - { fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, NULL }, - { fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, NULL }, - { fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, NULL }, +static partial_tr_func* dct_table[3][6] = { + { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, NULL }, + { NULL, fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, NULL }, + { NULL, fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, NULL }, }; -static partial_tr_func* idct_table[3][5] = { - { fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, NULL/*fastInverseDCT2_B64*/ }, - { fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, NULL }, - { fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, NULL }, +static partial_tr_func* idct_table[3][6] = { + { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, NULL/*fastInverseDCT2_B64*/ }, + { NULL, fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, NULL }, + { NULL, fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, NULL }, }; @@ -2458,6 +2527,7 @@ void uvg_get_tr_type( if (implicit_mts) { + // ISP_TODO: do these apply for ISP blocks? bool width_ok = width >= 4 && width <= 16; bool height_ok = height >= 4 && height <= 16; @@ -2506,8 +2576,10 @@ static void mts_dct_generic( { int skip_width = (type_hor != DCT2 && width == 32) ? 16 : (width > 32 ? width - 32 : 0); int skip_height = (type_ver != DCT2 && height == 32) ? 16 : (height > 32 ? height - 32 : 0); - const int log2_width_minus2 = uvg_g_convert_to_bit[width]; - const int log2_height_minus2 = uvg_g_convert_to_bit[height]; + const int log2_width_minus1 = uvg_g_convert_to_log2[width] - 1; + const int log2_height_minus1 = uvg_g_convert_to_log2[height] - 1; + //const int log2_width_minus2 = uvg_g_convert_to_bit[width]; + //const int log2_height_minus2 = uvg_g_convert_to_bit[height]; if(tu->lfnst_idx || tu->cr_lfnst_idx) { if ((width == 4 && height > 4) || (width > 4 && height == 4)) @@ -2522,12 +2594,12 @@ static void mts_dct_generic( } } - partial_tr_func* dct_hor = dct_table[type_hor][log2_width_minus2]; - partial_tr_func* dct_ver = dct_table[type_ver][log2_height_minus2]; + partial_tr_func* dct_hor = dct_table[type_hor][log2_width_minus1]; + partial_tr_func* dct_ver = dct_table[type_ver][log2_height_minus1]; int16_t tmp[32 * 32]; - const int32_t shift_1st = log2_width_minus2 + bitdepth - 7; - const int32_t shift_2nd = log2_height_minus2 + 8; + const int32_t shift_1st = log2_width_minus1 + bitdepth - 8; + const int32_t shift_2nd = log2_height_minus1 + 7; dct_hor(input, tmp, shift_1st, height, 0, skip_width); dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height); @@ -2559,8 +2631,8 @@ static void mts_idct_generic( { int skip_width = (type_hor != DCT2 && width == 32) ? 16 : width > 32 ? width - 32 : 0; int skip_height = (type_ver != DCT2 && height == 32) ? 16 : height > 32 ? height - 32 : 0; - const int log2_width_minus2 = uvg_g_convert_to_bit[width]; - const int log2_height_minus2 = uvg_g_convert_to_bit[height]; + const int log2_width_minus1 = uvg_g_convert_to_log2[width] - 1; + const int log2_height_minus1 = uvg_g_convert_to_log2[height] - 1; if (tu->lfnst_idx || tu->cr_lfnst_idx) { if ((width == 4 && height > 4) || (width > 4 && height == 4)) { @@ -2573,8 +2645,8 @@ static void mts_idct_generic( } } - partial_tr_func* idct_hor = idct_table[type_hor][log2_width_minus2]; - partial_tr_func* idct_ver = idct_table[type_ver][log2_height_minus2]; + partial_tr_func* idct_hor = idct_table[type_hor][log2_width_minus1]; + partial_tr_func* idct_ver = idct_table[type_ver][log2_height_minus1]; int16_t tmp[32 * 32]; const int max_log2_tr_dynamic_range = 15; diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index 1891ee5a..833c42c2 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -414,7 +414,8 @@ static void uvg_intra_pred_planar_generic( const int offset = 1 << (log2_width + log2_height); const int final_shift = 1 + log2_width + log2_height; - assert((log2_width >= 2 && log2_width <= 5) && (log2_height >= 2 && log2_height <= 5)); + // If ISP is enabled log_dim 1 is possible (limit was previously 2) + assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5)); const uvg_pixel top_right = ref_top[width + 1]; const uvg_pixel bottom_left = ref_left[height + 1]; diff --git a/src/transform.c b/src/transform.c index 8b903579..526e71c2 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1353,7 +1353,9 @@ void uvg_quantize_lcu_residual( // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. - assert(width == 4 || + // Width 2 is possible with ISP blocks + assert(width == 2 || + width == 4 || width == 8 || width == 16 || width == 32 ||