From 8cec02280f411168588c48cb578990c8c4de2310 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Wed, 28 Jun 2023 23:06:04 +0300 Subject: [PATCH] [ibc] Use IBC hashmap in LCU row basis --- src/encoder_state-ctors_dtors.c | 5 ++++ src/encoderstate.c | 47 ++++++++++++--------------------- src/hashmap.c | 2 ++ src/hashmap.h | 5 ++-- src/videoframe.c | 5 ---- src/videoframe.h | 5 ++-- 6 files changed, 29 insertions(+), 40 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index e2b55ada..965b3d08 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -131,7 +131,10 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu); + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { + state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2); state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4 state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE]; state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2]; @@ -219,7 +222,9 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { if (state->encoder_control->cfg.ibc) { for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); + uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]); } + FREE_POINTER(state->tile->frame->ibc_hashmap_row); FREE_POINTER(state->tile->frame->ibc_buffer_y); FREE_POINTER(state->tile->frame->ibc_buffer_u); FREE_POINTER(state->tile->frame->ibc_buffer_v); diff --git a/src/encoderstate.c b/src/encoderstate.c index bfd616f8..e5c0c4d8 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -288,6 +288,23 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state, const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); + int items = 0; + // Hash the current LCU to the IBC hashmap + for (int32_t xx = (lcu->position_px.x>8)?-6:0; xx < (int32_t)(ibc_block_width)-7; xx+=2) { + for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) { + int cur_x = lcu->position_px.x + xx; + int cur_y = lcu->position_px.y + yy; + uint32_t crc = uvg_crc32c_8x8(&frame->rec->y[cur_y * frame->rec->stride + cur_x],frame->rec->stride); + if (state->encoder_control->chroma_format != UVG_CSP_400) { + crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1); + crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1); + } + uvg_hashmap_insert(frame->ibc_hashmap_row[ibc_buffer_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff)); + items++; + } + } + //fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y); + uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x], &frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x], ibc_block_width, ibc_block_height, @@ -1939,36 +1956,6 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict assert(0); } - if (state->encoder_control->cfg.ibc != 0) { - int items = 0; - UVG_CLOCK_T hashmap_start_real_time; - UVG_CLOCK_T hashmap_end_real_time; - UVG_GET_TIME(&hashmap_start_real_time); - // Create a new hashmap with UVG_HASHMAP_RATIO buckets per 4x4 block - state->tile->frame->ibc_hashmap = uvg_hashmap_create( - (int)(((float)(state->tile->frame->width * state->tile->frame->height) / - (float)(UVG_HASHMAP_BLOCKSIZE * UVG_HASHMAP_BLOCKSIZE)) * UVG_HASHMAP_RATIO)); - - // Fill the hashmap with the current frame's block information - for (int y = 0; y < state->tile->frame->height; y += 1) { - for (int x = 0; x < state->tile->frame->width; x += 1) { - uint32_t crc = uvg_crc32c_8x8(state->tile->frame->source->y + y * state->tile->frame->width + x, state->tile->frame->width); - - //uint32_t found = uvg_hashmap_search_return_first(state->tile->frame->ibc_hashmap, crc); - //uvg_hashmap_node_t* found = uvg_hashmap_search(state->tile->frame->ibc_hashmap, crc); - - //if (found != NULL) uvg_hashmap_node_free(found); - - uvg_hashmap_insert(state->tile->frame->ibc_hashmap, crc, ((x&0xffff)<<16) | (y&0xffff)); - items++; - } - } - UVG_GET_TIME(&hashmap_end_real_time); - double wall_time = UVG_CLOCK_T_AS_DOUBLE(hashmap_end_real_time) - - UVG_CLOCK_T_AS_DOUBLE(hashmap_start_real_time); - fprintf(stderr, "Hashmap creation time: %f, items: %d, size %d\n", wall_time, items, state->tile->frame->ibc_hashmap->bucket_size); - } - if (state->encoder_control->cfg.lmcs_enable) { uvg_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth); diff --git a/src/hashmap.c b/src/hashmap.c index c8c1c0fb..c9d88d9c 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -44,6 +44,7 @@ uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value) { new_node->key = key; new_node->value = value; new_node->next = NULL; + new_node->size = 1; return new_node; } @@ -90,6 +91,7 @@ void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) { uint32_t hash_index = uvg_hashmap_hash(key, map->bucket_size); uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, value); new_node->next = (void*)map->table[hash_index]; + if (new_node->next != NULL) new_node->size = ((uvg_hashmap_node_t*)new_node->next)->size + 1; map->table[hash_index] = new_node; } diff --git a/src/hashmap.h b/src/hashmap.h index cb84c825..1294bb87 100644 --- a/src/hashmap.h +++ b/src/hashmap.h @@ -37,14 +37,15 @@ #include // The ratio of the hashmap bucket size to the maximum number of elements -#define UVG_HASHMAP_RATIO 6.0 +#define UVG_HASHMAP_RATIO 12.0 // Use Hashmap for 4x4 blocks #define UVG_HASHMAP_BLOCKSIZE 8 typedef struct uvg_hashmap_node { + void* next; uint32_t key; uint32_t value; - void* next; + uint32_t size; } uvg_hashmap_node_t; typedef struct uvg_hashmap { diff --git a/src/videoframe.c b/src/videoframe.c index eef48d68..f5a4d8af 100644 --- a/src/videoframe.c +++ b/src/videoframe.c @@ -102,11 +102,6 @@ int uvg_videoframe_free(videoframe_t * const frame) FREE_POINTER(frame->sao_luma); FREE_POINTER(frame->sao_chroma); - if (frame->ibc_hashmap != NULL) { - uvg_hashmap_free(frame->ibc_hashmap); - frame->ibc_hashmap = NULL; - } - free(frame); diff --git a/src/videoframe.h b/src/videoframe.h index 140affee..7f7e7581 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -81,7 +81,8 @@ typedef struct videoframe uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row - uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row + uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row + uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size @@ -91,8 +92,6 @@ typedef struct videoframe bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS - uvg_hashmap_t *ibc_hashmap; //!< \brief Hashmap for IBC hash search - } videoframe_t;