From 76d66591c513cfe0e0dff0367edd2dadd19aa513 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 26 Jun 2023 21:24:10 +0300 Subject: [PATCH] [ibc] Implement CRC for 8x8 block and generate a full hashmap at the frame load --- src/encoderstate.c | 33 ++++++++++++++++++++++++ src/hashmap.c | 23 ++++++++++------- src/hashmap.h | 8 +++--- src/strategies/generic/picture-generic.c | 32 +++++++++++++++++++++++ src/strategies/sse42/picture-sse42.c | 17 +++++++++++- src/strategies/strategies-picture.c | 1 + src/strategies/strategies-picture.h | 3 +++ 7 files changed, 103 insertions(+), 14 deletions(-) diff --git a/src/encoderstate.c b/src/encoderstate.c index 48839a84..bfd616f8 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -45,17 +45,20 @@ #include "encode_coding_tree.h" #include "encoder_state-bitstream.h" #include "filter.h" +#include "hashmap.h" #include "image.h" #include "rate_control.h" #include "sao.h" #include "search.h" #include "tables.h" +#include "threads.h" #include "threadqueue.h" #include "alf.h" #include "reshape.h" #include "strategies/strategies-picture.h" + /** * \brief Strength of QP adjustments when using adaptive QP for 360 video. * @@ -1936,6 +1939,36 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict assert(0); } + if (state->encoder_control->cfg.ibc != 0) { + int items = 0; + UVG_CLOCK_T hashmap_start_real_time; + UVG_CLOCK_T hashmap_end_real_time; + UVG_GET_TIME(&hashmap_start_real_time); + // Create a new hashmap with UVG_HASHMAP_RATIO buckets per 4x4 block + state->tile->frame->ibc_hashmap = uvg_hashmap_create( + (int)(((float)(state->tile->frame->width * state->tile->frame->height) / + (float)(UVG_HASHMAP_BLOCKSIZE * UVG_HASHMAP_BLOCKSIZE)) * UVG_HASHMAP_RATIO)); + + // Fill the hashmap with the current frame's block information + for (int y = 0; y < state->tile->frame->height; y += 1) { + for (int x = 0; x < state->tile->frame->width; x += 1) { + uint32_t crc = uvg_crc32c_8x8(state->tile->frame->source->y + y * state->tile->frame->width + x, state->tile->frame->width); + + //uint32_t found = uvg_hashmap_search_return_first(state->tile->frame->ibc_hashmap, crc); + //uvg_hashmap_node_t* found = uvg_hashmap_search(state->tile->frame->ibc_hashmap, crc); + + //if (found != NULL) uvg_hashmap_node_free(found); + + uvg_hashmap_insert(state->tile->frame->ibc_hashmap, crc, ((x&0xffff)<<16) | (y&0xffff)); + items++; + } + } + UVG_GET_TIME(&hashmap_end_real_time); + double wall_time = UVG_CLOCK_T_AS_DOUBLE(hashmap_end_real_time) - + UVG_CLOCK_T_AS_DOUBLE(hashmap_start_real_time); + fprintf(stderr, "Hashmap creation time: %f, items: %d, size %d\n", wall_time, items, state->tile->frame->ibc_hashmap->bucket_size); + } + if (state->encoder_control->cfg.lmcs_enable) { uvg_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth); diff --git a/src/hashmap.c b/src/hashmap.c index 840ab5e8..c8c1c0fb 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -72,9 +72,11 @@ uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size) * \param bucket_size the size of the hashmap bucket * \return the hashed index for the given key and bucket size. */ -uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size) { +static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size) +{ key ^= (key >> 20) ^ (key >> 12); return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size; + //return key % bucket_size; } /** @@ -100,20 +102,21 @@ void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) { */ uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key) { uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size); - uvg_hashmap_node_t* temp = map->table[hashIndex]; - uvg_hashmap_node_t* return_node = NULL; - // Search key in chain and return all of them + return map->table[hashIndex]; +} + +uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key) +{ + uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size); + uvg_hashmap_node_t* temp = map->table[hashIndex]; + // Search key in chain and return the first match while (temp) { if (temp->key == key) { - uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, temp->value); - if (return_node != NULL) { - new_node->next = (void*)return_node; - } - return_node = new_node; + return temp->value; } temp = (uvg_hashmap_node_t*)temp->next; } - return return_node; + return -1; } /** diff --git a/src/hashmap.h b/src/hashmap.h index 5881f627..cb84c825 100644 --- a/src/hashmap.h +++ b/src/hashmap.h @@ -37,9 +37,9 @@ #include // The ratio of the hashmap bucket size to the maximum number of elements -#define UVG_HASHMAP_RATIO 0.35 +#define UVG_HASHMAP_RATIO 6.0 // Use Hashmap for 4x4 blocks -#define UVG_HASHMAP_BLOCKSIZE 4 +#define UVG_HASHMAP_BLOCKSIZE 8 typedef struct uvg_hashmap_node { uint32_t key; @@ -56,12 +56,14 @@ uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value); uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size); -uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size); +//uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size); void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value); uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key); +uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key); + void uvg_hashmap_node_free(uvg_hashmap_node_t* node); void uvg_hashmap_free(uvg_hashmap_t* map); diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c index b827c8a9..817befed 100644 --- a/src/strategies/generic/picture-generic.c +++ b/src/strategies/generic/picture-generic.c @@ -802,6 +802,20 @@ INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf) return crc; } + +INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf) +{ + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF]; + return crc; +} + static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride) { uint32_t crc = 0xFFFFFFFF; @@ -829,11 +843,29 @@ static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_ return crc ^ 0xFFFFFFFF; } +static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]); + + crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]); + + crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]); + + crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]); + return crc ^ 0xFFFFFFFF; +} + int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth) { bool success = true; if (bitdepth == 8) { success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_8bit_generic); + success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic); } else { success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic); } diff --git a/src/strategies/sse42/picture-sse42.c b/src/strategies/sse42/picture-sse42.c index bd00d90f..0022af91 100644 --- a/src/strategies/sse42/picture-sse42.c +++ b/src/strategies/sse42/picture-sse42.c @@ -64,6 +64,20 @@ static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_st return crc ^ 0xFFFFFFFF; } +static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[0 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[1 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[2 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[3 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[4 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[5 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[6 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint32_t *)&buf[7 * pic_stride])); + return crc ^ 0xFFFFFFFF; +} + #endif //COMPILE_INTEL_SSE42 @@ -71,7 +85,8 @@ int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) { bool success = true; #if COMPILE_INTEL_SSE42 if (bitdepth == 8){ - success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_8bit_sse42); + success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_8bit_sse42); + success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42); } else { success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42); } diff --git a/src/strategies/strategies-picture.c b/src/strategies/strategies-picture.c index d68f3173..00ad9ccb 100644 --- a/src/strategies/strategies-picture.c +++ b/src/strategies/strategies-picture.c @@ -42,6 +42,7 @@ // Define function pointers. crc32c_4x4_func * uvg_crc32c_4x4 = 0; +crc32c_8x8_func * uvg_crc32c_8x8 = 0; reg_sad_func * uvg_reg_sad = 0; cost_pixel_nxn_func * uvg_sad_4x4 = 0; diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index ebb95b4f..88f52cfc 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -155,9 +155,11 @@ typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* extern const uint32_t uvg_crc_table[256]; typedef uint32_t(crc32c_4x4_func)(const uvg_pixel *buf, uint32_t pic_stride); +typedef uint32_t(crc32c_8x8_func)(const uvg_pixel *buf, uint32_t pic_stride); // Declare function pointers. extern crc32c_4x4_func * uvg_crc32c_4x4; +extern crc32c_8x8_func * uvg_crc32c_8x8; extern reg_sad_func * uvg_reg_sad; @@ -206,6 +208,7 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n); #define STRATEGIES_PICTURE_EXPORTS \ {"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \ + {"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \ {"reg_sad", (void**) &uvg_reg_sad}, \ {"sad_4x4", (void**) &uvg_sad_4x4}, \ {"sad_8x8", (void**) &uvg_sad_8x8}, \