From e06d980a9692c8ef0641bd09b7e14e35f332260f Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 8 Aug 2024 14:48:48 +0300 Subject: [PATCH 1/5] [sao] Use correct pixel type in sao_edge_ddistortion_generic --- src/strategies/generic/sao_shared_generics.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/strategies/generic/sao_shared_generics.h b/src/strategies/generic/sao_shared_generics.h index ca6ccfd6..c86a25b7 100644 --- a/src/strategies/generic/sao_shared_generics.h +++ b/src/strategies/generic/sao_shared_generics.h @@ -67,10 +67,10 @@ static int sao_edge_ddistortion_generic(const uvg_pixel *orig_data, uint32_t a_pos = (y + a_ofs.y) * block_width + x + a_ofs.x; uint32_t b_pos = (y + b_ofs.y) * block_width + x + b_ofs.x; - uint8_t a = rec_data[a_pos]; - uint8_t b = rec_data[b_pos]; - uint8_t c = rec_data[c_pos]; - uint8_t orig = orig_data[c_pos]; + uvg_pixel a = rec_data[a_pos]; + uvg_pixel b = rec_data[b_pos]; + uvg_pixel c = rec_data[c_pos]; + uvg_pixel orig = orig_data[c_pos]; int32_t eo_cat = sao_calc_eo_cat(a, b, c); int32_t offset = offsets[eo_cat]; From 67496d18749a27d8ca0ec28ecb52ca23c54f4705 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 8 Aug 2024 14:49:33 +0300 Subject: [PATCH 2/5] [10bit] Fix reading 8bit data when using BIT_DEPTH 10+ and we need to fill the frame --- src/yuv_io.c | 53 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/src/yuv_io.c b/src/yuv_io.c index 1b72deb9..38da9ea6 100644 --- a/src/yuv_io.c +++ b/src/yuv_io.c @@ -53,28 +53,49 @@ static void fill_after_frame(unsigned height, unsigned array_width, } -static int read_and_fill_frame_data(FILE *file, - unsigned width, unsigned height, unsigned bytes_per_sample, - unsigned array_width, uvg_pixel *data) +static int read_and_fill_frame_data( + FILE* file, + unsigned width, + unsigned height, + unsigned bytes_per_sample, + unsigned array_width, + uvg_pixel* data) { - uvg_pixel* p = data; - uvg_pixel* end = data + array_width * height; - uvg_pixel fill_char; - unsigned i; - while (p < end) { - // Read the beginning of the line from input. - if (width != fread(p, bytes_per_sample, width, file)) - return 0; + unsigned i; + if (bytes_per_sample != sizeof(uvg_pixel)) { + uint8_t* p = (uint8_t*)data; + uint8_t* end = (uint8_t*)data + array_width * height; + uint8_t fill_char; + while (p < end) { + // Read the beginning of the line from input. + if (width != fread(p, bytes_per_sample, width, file)) return 0; + // Fill the rest with the last pixel value. + // Fill the rest with the last pixel value. + fill_char = p[width - 1]; - // Fill the rest with the last pixel value. - fill_char = p[width - 1]; + for (i = width; i < array_width; ++i) { + p[i] = fill_char; + } - for (i = width; i < array_width; ++i) { - p[i] = fill_char; + p += array_width; } + } else { + uvg_pixel* p = data; + uvg_pixel* end = data + array_width * height; + uvg_pixel fill_char; + while (p < end) { + // Read the beginning of the line from input. + if (width != fread(p, bytes_per_sample, width, file)) return 0; + // Fill the rest with the last pixel value. + fill_char = p[width - 1]; - p += array_width; + for (i = width; i < array_width; ++i) { + p[i] = fill_char; + } + + p += array_width; + } } return 1; } From cd65044000daab20a5239895662e1bdabd783c66 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Wed, 14 Aug 2024 11:26:25 +0300 Subject: [PATCH 3/5] [10bit] Fix second instance of satd shifting based on bit depth Already shifted in the base function --- src/image.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/image.c b/src/image.c index 85f88c6b..68faa245 100644 --- a/src/image.c +++ b/src/image.c @@ -504,7 +504,7 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic, pic_data, pic->stride, ref_data, - ref->stride) >> (UVG_BIT_DEPTH - 8); + ref->stride); } else { // Extrapolate pixels from outside the frame. @@ -550,7 +550,7 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic, pic_data, pic->stride, ext_origin, - ext_s) >> (UVG_BIT_DEPTH - 8); + ext_s); return satd; } From 862cc92e9748af3ec3e781368f19623be9bf38ac Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 5 Sep 2024 14:42:50 +0300 Subject: [PATCH 4/5] [cli] Fix yuv_io_seek() buffer type --- src/yuv_io.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/yuv_io.c b/src/yuv_io.c index 38da9ea6..1b94fc78 100644 --- a/src/yuv_io.c +++ b/src/yuv_io.c @@ -53,16 +53,16 @@ static void fill_after_frame(unsigned height, unsigned array_width, } -static int read_and_fill_frame_data( - FILE* file, - unsigned width, - unsigned height, - unsigned bytes_per_sample, - unsigned array_width, - uvg_pixel* data) +static int read_and_fill_frame_data(FILE* file, + unsigned width, + unsigned height, + unsigned bytes_per_sample, + unsigned array_width, + uvg_pixel* data) { unsigned i; + // Handle separately the case where we use UVG_BIT_DEPTH 10+ but the input is 8-bit. if (bytes_per_sample != sizeof(uvg_pixel)) { uint8_t* p = (uint8_t*)data; uint8_t* end = (uint8_t*)data + array_width * height; @@ -71,7 +71,6 @@ static int read_and_fill_frame_data( // Read the beginning of the line from input. if (width != fread(p, bytes_per_sample, width, file)) return 0; // Fill the rest with the last pixel value. - // Fill the rest with the last pixel value. fill_char = p[width - 1]; for (i = width; i < array_width; ++i) { @@ -334,7 +333,7 @@ int yuv_io_seek(FILE* file, unsigned frames, // Seek failed. Skip data by reading. error = 0; - unsigned char* tmp[4096]; + unsigned char tmp[4096]; size_t bytes_left = skip_bytes; while (bytes_left > 0 && !error) { const size_t skip = MIN(4096, bytes_left); From f6ef70adfa28652c74caa919d08fced0d67786cc Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 5 Sep 2024 16:55:46 +0300 Subject: [PATCH 5/5] [10bit] Enable AVX2 optimized DCT functions for all bit depths --- src/strategies/avx2/dct-avx2.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/strategies/avx2/dct-avx2.c b/src/strategies/avx2/dct-avx2.c index 01b1b4bb..036eed98 100644 --- a/src/strategies/avx2/dct-avx2.c +++ b/src/strategies/avx2/dct-avx2.c @@ -54,7 +54,6 @@ extern const int16_t uvg_g_dct_32_t[32][32]; #if COMPILE_INTEL_AVX2 #include "uvg266.h" -#if UVG_BIT_DEPTH == 8 #include #include "strategies/avx2/dct_avx2_tables.h" #define MAX_LOG2_TR_DYNAMIC_RANGE 15 @@ -8039,34 +8038,28 @@ static void mts_idct_avx2( } } -#endif // UVG_BIT_DEPTH == 8 #endif //COMPILE_INTEL_AVX2 int uvg_strategy_register_dct_avx2(void* opaque, uint8_t bitdepth) { bool success = true; #if COMPILE_INTEL_AVX2 -#if UVG_BIT_DEPTH == 8 - if (bitdepth == 8){ - //success &= uvg_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2); - success &= uvg_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); - success &= uvg_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2); - success &= uvg_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2); - success &= uvg_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2); + success &= uvg_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2); + success &= uvg_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2); + success &= uvg_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2); + success &= uvg_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2); - // success &= uvg_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2); - success &= uvg_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); - success &= uvg_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); - success &= uvg_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); - success &= uvg_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); + success &= uvg_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2); + success &= uvg_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2); + success &= uvg_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2); + success &= uvg_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2); + + success &= uvg_strategyselector_register(opaque, "mts_dct", "avx2", 40, &mts_dct_avx2); + success &= uvg_strategyselector_register(opaque, "mts_idct", "avx2", 40, &mts_idct_avx2); - success &= uvg_strategyselector_register(opaque, "mts_dct", "avx2", 40, &mts_dct_avx2); - success &= uvg_strategyselector_register(opaque, "mts_idct", "avx2", 40, &mts_idct_avx2); - } -#endif // UVG_BIT_DEPTH == 8 #endif //COMPILE_INTEL_AVX2 return success; }