From 3951e899237a394402dedce8d43f5a8369959eab Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 5 Aug 2024 08:53:03 +0300 Subject: [PATCH 1/2] [IBC] Add the 16bit 8x8 crc32 and implement (somehow) the empty check_mv_cost_satd() --- src/search_ibc.c | 23 +++++++++ src/strategies/generic/picture-generic.c | 64 +++++++++++++----------- src/strategies/sse42/picture-sse42.c | 23 ++++++--- 3 files changed, 74 insertions(+), 36 deletions(-) diff --git a/src/search_ibc.c b/src/search_ibc.c index 2d80ec28..39038aae 100644 --- a/src/search_ibc.c +++ b/src/search_ibc.c @@ -226,7 +226,30 @@ static bool check_mv_cost_satd(ibc_search_info_t *info, double* best_bits, vector2d_t *best_mv) { + if (!intmv_within_ibc_range(info, x, y)) return false; + double cost = calculate_ibc_cost_satd(info->state, info->lcu, &info->origin, x, y); + + if (cost >= *best_cost) return false; + + cost += info->mvd_cost_func( + info->state, + x, y, INTERNAL_MV_PREC, + info->mv_cand, + NULL, + 0, + 0, + best_bits + ); + + if (cost >= *best_cost) return false; + + // Set to motion vector in internal pixel precision. + best_mv->x = x * (1 << INTERNAL_MV_PREC); + best_mv->y = y * (1 << INTERNAL_MV_PREC); + *best_cost = cost; + + return true; } /** * \brief Calculate cost for an integer motion vector. diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c index 5e06ebbe..320b2504 100644 --- a/src/strategies/generic/picture-generic.c +++ b/src/strategies/generic/picture-generic.c @@ -1374,17 +1374,29 @@ INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf) return crc; } +INLINE static uint32_t uvg_crc32c_4_16bit_generic(uint32_t crc, const uvg_pixel *buf) +{ + const uint8_t *buf8 = (const uint8_t *)buf; + for (int i = 0; i < 8; i++) { + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf8[i]) & 0xFF]; + } + return crc; +} INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf) { - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF]; - crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF]; + for (int i = 0; i < 8; i++) { + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[i]) & 0xFF]; + } + return crc; +} + +INLINE static uint32_t uvg_crc32c_8_16bit_generic(uint32_t crc, const uvg_pixel *buf) +{ + const uint8_t* buf8 = (const uint8_t*)buf; + for (int i = 0; i < 16; i++) { + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf8[i]) & 0xFF]; + } return crc; } @@ -1401,34 +1413,29 @@ static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_s static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride) { uint32_t crc = 0xFFFFFFFF; - crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]); - crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride] + 4); + crc = uvg_crc32c_4_16bit_generic(crc, &buf[0 * pic_stride]); + crc = uvg_crc32c_4_16bit_generic(crc, &buf[1 * pic_stride]); + crc = uvg_crc32c_4_16bit_generic(crc, &buf[2 * pic_stride]); + crc = uvg_crc32c_4_16bit_generic(crc, &buf[3 * pic_stride]); - crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]); - crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride] + 4); - - crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]); - crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride] + 4); - - crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]); - crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride] + 4); return crc ^ 0xFFFFFFFF; } static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride) { uint32_t crc = 0xFFFFFFFF; - crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]); - crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]); + for (int i = 0; i < 8; i++) { + crc = uvg_crc32c_8_generic(crc, &buf[i * pic_stride]); + } + return crc ^ 0xFFFFFFFF; +} - crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]); - crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]); - - crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]); - crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]); - - crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]); - crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]); +static uint32_t uvg_crc32c_8x8_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + for (int i = 0; i < 8; i++) { + crc = uvg_crc32c_8_16bit_generic(crc, &buf[i * pic_stride]); + } return crc ^ 0xFFFFFFFF; } @@ -1440,6 +1447,7 @@ int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth) success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic); } else { success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic); + success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_16bit_generic); } diff --git a/src/strategies/sse42/picture-sse42.c b/src/strategies/sse42/picture-sse42.c index 30549cb3..1bbbca9f 100644 --- a/src/strategies/sse42/picture-sse42.c +++ b/src/strategies/sse42/picture-sse42.c @@ -67,14 +67,20 @@ static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_st static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) { uint64_t crc = 0xFFFFFFFF; - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[4 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[5 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[6 * pic_stride])); - crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[7 * pic_stride])); + for (int i = 0; i < 8; i++) { + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride])); + } + return (uint32_t)(crc ^ 0xFFFFFFFF); +} + +static uint32_t uvg_crc32c_8x8_16bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint64_t crc = 0xFFFFFFFF; + for (int i = 0; i < 8; i++) { + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride + 4])); + } + return (uint32_t)(crc ^ 0xFFFFFFFF); } @@ -89,6 +95,7 @@ int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) { success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42); } else { success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42); + success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_16bit_sse42); } #endif return success; From eee2e2c51ec7fb198a5f0d0a0cd7f7835d91da91 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 5 Aug 2024 09:05:40 +0300 Subject: [PATCH 2/2] [IBC] Re-enable the crc32c strategies in 8+ bit depth --- src/strategies/strategies-picture.h | 38 ----------------------------- 1 file changed, 38 deletions(-) diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index da8f9be8..a7e6f4d2 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -207,7 +207,6 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth); cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height); cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height); -#if UVG_BIT_DEPTH == 8 #define STRATEGIES_PICTURE_EXPORTS \ {"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \ {"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \ @@ -243,42 +242,5 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigne {"pixel_var", (void**) &uvg_pixel_var}, \ {"generate_residual", (void**) &uvg_generate_residual}, \ -#else -#define STRATEGIES_PICTURE_EXPORTS \ - {"reg_sad", (void**) &uvg_reg_sad}, \ - {"sad_4x4", (void**) &uvg_sad_4x4}, \ - {"sad_8x8", (void**) &uvg_sad_8x8}, \ - {"sad_16x16", (void**) &uvg_sad_16x16}, \ - {"sad_32x32", (void**) &uvg_sad_32x32}, \ - {"sad_64x64", (void**) &uvg_sad_64x64}, \ - {"satd_4x4", (void**) &uvg_satd_4x4}, \ - {"satd_8x8", (void**) &uvg_satd_8x8}, \ - {"satd_16x16", (void**) &uvg_satd_16x16}, \ - {"satd_32x32", (void**) &uvg_satd_32x32}, \ - {"satd_64x64", (void**) &uvg_satd_64x64}, \ - {"satd_any_size", (void**) &uvg_satd_any_size}, \ - {"satd_any_size_vtm", (void**) &uvg_satd_any_size_vtm}, \ - {"sad_4x4_dual", (void**) &uvg_sad_4x4_dual}, \ - {"sad_8x8_dual", (void**) &uvg_sad_8x8_dual}, \ - {"sad_16x16_dual", (void**) &uvg_sad_16x16_dual}, \ - {"sad_32x32_dual", (void**) &uvg_sad_32x32_dual}, \ - {"sad_64x64_dual", (void**) &uvg_sad_64x64_dual}, \ - {"satd_4x4_dual", (void**) &uvg_satd_4x4_dual}, \ - {"satd_8x8_dual", (void**) &uvg_satd_8x8_dual}, \ - {"satd_16x16_dual", (void**) &uvg_satd_16x16_dual}, \ - {"satd_32x32_dual", (void**) &uvg_satd_32x32_dual}, \ - {"satd_64x64_dual", (void**) &uvg_satd_64x64_dual}, \ - {"satd_any_size_quad", (void**) &uvg_satd_any_size_quad}, \ - {"pixels_calc_ssd", (void**) &uvg_pixels_calc_ssd}, \ - {"bipred_average", (void**) &uvg_bipred_average}, \ - {"get_optimized_sad", (void**) &uvg_get_optimized_sad}, \ - {"ver_sad", (void**) &uvg_ver_sad}, \ - {"hor_sad", (void**) &uvg_hor_sad}, \ - {"pixel_var", (void**) &uvg_pixel_var}, \ - {"generate_residual", (void**) &uvg_generate_residual}, \ - -#endif - - #endif //STRATEGIES_PICTURE_H_