[ibc] Use IBC hashmap in LCU row basis

This commit is contained in:
Marko Viitanen 2023-06-28 23:06:04 +03:00
parent 76d66591c5
commit 8cec02280f
6 changed files with 29 additions and 40 deletions

View file

@ -131,7 +131,10 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu);
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2);
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4 state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE]; state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE];
state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2]; state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2];
@ -219,7 +222,9 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
if (state->encoder_control->cfg.ibc) { if (state->encoder_control->cfg.ibc) {
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]);
} }
FREE_POINTER(state->tile->frame->ibc_hashmap_row);
FREE_POINTER(state->tile->frame->ibc_buffer_y); FREE_POINTER(state->tile->frame->ibc_buffer_y);
FREE_POINTER(state->tile->frame->ibc_buffer_u); FREE_POINTER(state->tile->frame->ibc_buffer_u);
FREE_POINTER(state->tile->frame->ibc_buffer_v); FREE_POINTER(state->tile->frame->ibc_buffer_v);

View file

@ -288,6 +288,23 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
int items = 0;
// Hash the current LCU to the IBC hashmap
for (int32_t xx = (lcu->position_px.x>8)?-6:0; xx < (int32_t)(ibc_block_width)-7; xx+=2) {
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) {
int cur_x = lcu->position_px.x + xx;
int cur_y = lcu->position_px.y + yy;
uint32_t crc = uvg_crc32c_8x8(&frame->rec->y[cur_y * frame->rec->stride + cur_x],frame->rec->stride);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
}
uvg_hashmap_insert(frame->ibc_hashmap_row[ibc_buffer_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
items++;
}
}
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x], uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x],
&frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x], &frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x],
ibc_block_width, ibc_block_height, ibc_block_width, ibc_block_height,
@ -1939,36 +1956,6 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
assert(0); assert(0);
} }
if (state->encoder_control->cfg.ibc != 0) {
int items = 0;
UVG_CLOCK_T hashmap_start_real_time;
UVG_CLOCK_T hashmap_end_real_time;
UVG_GET_TIME(&hashmap_start_real_time);
// Create a new hashmap with UVG_HASHMAP_RATIO buckets per 4x4 block
state->tile->frame->ibc_hashmap = uvg_hashmap_create(
(int)(((float)(state->tile->frame->width * state->tile->frame->height) /
(float)(UVG_HASHMAP_BLOCKSIZE * UVG_HASHMAP_BLOCKSIZE)) * UVG_HASHMAP_RATIO));
// Fill the hashmap with the current frame's block information
for (int y = 0; y < state->tile->frame->height; y += 1) {
for (int x = 0; x < state->tile->frame->width; x += 1) {
uint32_t crc = uvg_crc32c_8x8(state->tile->frame->source->y + y * state->tile->frame->width + x, state->tile->frame->width);
//uint32_t found = uvg_hashmap_search_return_first(state->tile->frame->ibc_hashmap, crc);
//uvg_hashmap_node_t* found = uvg_hashmap_search(state->tile->frame->ibc_hashmap, crc);
//if (found != NULL) uvg_hashmap_node_free(found);
uvg_hashmap_insert(state->tile->frame->ibc_hashmap, crc, ((x&0xffff)<<16) | (y&0xffff));
items++;
}
}
UVG_GET_TIME(&hashmap_end_real_time);
double wall_time = UVG_CLOCK_T_AS_DOUBLE(hashmap_end_real_time) -
UVG_CLOCK_T_AS_DOUBLE(hashmap_start_real_time);
fprintf(stderr, "Hashmap creation time: %f, items: %d, size %d\n", wall_time, items, state->tile->frame->ibc_hashmap->bucket_size);
}
if (state->encoder_control->cfg.lmcs_enable) { if (state->encoder_control->cfg.lmcs_enable) {
uvg_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth); uvg_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth);

View file

@ -44,6 +44,7 @@ uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value) {
new_node->key = key; new_node->key = key;
new_node->value = value; new_node->value = value;
new_node->next = NULL; new_node->next = NULL;
new_node->size = 1;
return new_node; return new_node;
} }
@ -90,6 +91,7 @@ void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) {
uint32_t hash_index = uvg_hashmap_hash(key, map->bucket_size); uint32_t hash_index = uvg_hashmap_hash(key, map->bucket_size);
uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, value); uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, value);
new_node->next = (void*)map->table[hash_index]; new_node->next = (void*)map->table[hash_index];
if (new_node->next != NULL) new_node->size = ((uvg_hashmap_node_t*)new_node->next)->size + 1;
map->table[hash_index] = new_node; map->table[hash_index] = new_node;
} }

View file

@ -37,14 +37,15 @@
#include <stdint.h> #include <stdint.h>
// The ratio of the hashmap bucket size to the maximum number of elements // The ratio of the hashmap bucket size to the maximum number of elements
#define UVG_HASHMAP_RATIO 6.0 #define UVG_HASHMAP_RATIO 12.0
// Use Hashmap for 4x4 blocks // Use Hashmap for 4x4 blocks
#define UVG_HASHMAP_BLOCKSIZE 8 #define UVG_HASHMAP_BLOCKSIZE 8
typedef struct uvg_hashmap_node { typedef struct uvg_hashmap_node {
void* next;
uint32_t key; uint32_t key;
uint32_t value; uint32_t value;
void* next; uint32_t size;
} uvg_hashmap_node_t; } uvg_hashmap_node_t;
typedef struct uvg_hashmap { typedef struct uvg_hashmap {

View file

@ -102,11 +102,6 @@ int uvg_videoframe_free(videoframe_t * const frame)
FREE_POINTER(frame->sao_luma); FREE_POINTER(frame->sao_luma);
FREE_POINTER(frame->sao_chroma); FREE_POINTER(frame->sao_chroma);
if (frame->ibc_hashmap != NULL) {
uvg_hashmap_free(frame->ibc_hashmap);
frame->ibc_hashmap = NULL;
}
free(frame); free(frame);

View file

@ -82,6 +82,7 @@ typedef struct videoframe
uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row
uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row
cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row
uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size
@ -91,8 +92,6 @@ typedef struct videoframe
bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated
bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS
uvg_hashmap_t *ibc_hashmap; //!< \brief Hashmap for IBC hash search
} videoframe_t; } videoframe_t;