[ibc] Fill the IBC hashmap at the start of LCU search and use reverse map for "pos to hash"

This commit is contained in:
Marko Viitanen 2023-07-21 20:14:23 +03:00
parent 457d650f49
commit 8ff184a6b3
6 changed files with 84 additions and 40 deletions

View file

@ -133,6 +133,10 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE);
state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) *
((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride);
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2);
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
@ -220,6 +224,8 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
FREE_POINTER(state->tile->frame->hmvp_size_ibc);
if (state->encoder_control->cfg.ibc) {
FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash);
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]);

View file

@ -288,23 +288,6 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
int items = 0;
// Hash the current LCU to the IBC hashmap
for (int32_t xx = (lcu->position_px.x>8)?-6:0; xx < (int32_t)(ibc_block_width)-7; xx+=2) {
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) {
int cur_x = lcu->position_px.x + xx;
int cur_y = lcu->position_px.y + yy;
uint32_t crc = uvg_crc32c_8x8(&frame->rec->y[cur_y * frame->rec->stride + cur_x],frame->rec->stride);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
}
uvg_hashmap_insert(frame->ibc_hashmap_row[ibc_buffer_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
items++;
}
}
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x],
&frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x],
ibc_block_width, ibc_block_height,
@ -771,6 +754,49 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
if (state->encoder_control->cfg.ibc) {
videoframe_t * const frame = state->tile->frame;
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
int items = 0;
// Hash the current LCU to the IBC hashmap
for (int32_t xx = (lcu->position_px.x>8)?-7:0; xx < (int32_t)(ibc_block_width)-7; xx++) {
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) {
int cur_x = lcu->position_px.x + xx;
int cur_y = lcu->position_px.y + yy;
// Skip blocks that seem to be the same value for the whole block
uint64_t first_line =
*(uint64_t *)&frame->source->y[cur_y * frame->source->stride + cur_x];
bool same_data = true;
for (int y_temp = 1; y_temp < 8; y_temp++) {
if (*(uint64_t *)&frame->source->y[(cur_y+y_temp) * frame->source->stride + cur_x] != first_line) {
same_data = false;
break;
}
}
if (!same_data || (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0)) {
uint32_t crc = uvg_crc32c_8x8(&frame->source->y[cur_y * frame->source->stride + cur_x],frame->source->stride);
if (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0) {
state->tile->frame->ibc_hashmap_pos_to_hash[(cur_y / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + cur_x / UVG_HASHMAP_BLOCKSIZE] = crc;
}
/*
if (state->encoder_control->chroma_format != UVG_CSP_400) {
crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
}
*/
uvg_hashmap_insert(frame->ibc_hashmap_row[ctu_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
items++;
}
}
}
}
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
//This part doesn't write to bitstream, it's only search, deblock and sao
uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff);

View file

@ -75,9 +75,9 @@ uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size)
*/
static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size)
{
key ^= (key >> 20) ^ (key >> 12);
return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
//return key % bucket_size;
//key ^= (key >> 20) ^ (key >> 12);
//return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
return key % bucket_size;
}
/**

View file

@ -1012,6 +1012,7 @@ static double search_cu(
// Simple IBC search
if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I
&& state->encoder_control->cfg.ibc
&& cost > 1000
&& cu_width > 4
&& (x >= cu_width || y >= cu_width)
&& !cur_cu->skipped) {
@ -1029,6 +1030,7 @@ static double search_cu(
cost = mode_cost;
inter_bitcost = mode_bitcost;
cur_cu->type = CU_IBC;
cur_cu->inter.mv_dir = 1;
cur_cu->joint_cb_cr = 0;
} else {
*cur_cu = backup_cu;

View file

@ -1082,7 +1082,8 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
info.merge_cand,
lcu);
*inter_cost = MAX_DOUBLE;
*inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_DOUBLE;
bool valid_mv = false;
@ -1112,11 +1113,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
uint32_t ibc_buffer_row = yy / LCU_WIDTH;
//UVG_GET_TIME(&hashmap_start_temp);
uint32_t crc = uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
uint32_t crc = state->tile->frame->ibc_hashmap_pos_to_hash[(yy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xx / UVG_HASHMAP_BLOCKSIZE];
//uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride);
/* if (state->encoder_control->chroma_format != UVG_CSP_400) {
crc ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1);
crc ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1);
}
}*/
/* UVG_GET_TIME(&hashmap_end_temp);
crc_time += UVG_CLOCK_T_AS_DOUBLE(hashmap_end_temp) -
UVG_CLOCK_T_AS_DOUBLE(hashmap_start_temp);*/
@ -1133,11 +1135,11 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
while (result != NULL) {
if (hashes_found == 0 && result->size > 1000) {
fprintf(stderr, "Found a block with %d elements\n", result->size);
//fprintf(stderr, "Found a block with %d elements\n", result->size);
//break;
}
if (result->key == crc && result->value != own_location) {
hashes_found++;
hashes_found++;
hits++;
int pos_x = result->value >> 16;
int pos_y = result->value & 0xffff;
@ -1149,11 +1151,13 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
bool full_block = true; // Is the full block covered by the IBC?
for (int xxx = xx+UVG_HASHMAP_BLOCKSIZE; xxx < xx + width; xxx+=UVG_HASHMAP_BLOCKSIZE) {
for (int yyy = yy; yyy < yy + height; yyy += UVG_HASHMAP_BLOCKSIZE) {
uint32_t crc_other_blocks = uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride);
uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[(yyy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xxx / UVG_HASHMAP_BLOCKSIZE];
//uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride);
/*
if (state->encoder_control->chroma_format != UVG_CSP_400) {
crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1);
crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1);
}
}*/
uvg_hashmap_node_t *result2 = uvg_hashmap_search(state->tile->frame->ibc_hashmap_row[ibc_buffer_row],crc_other_blocks);
evaluations++;
bool found_match = false;
@ -1180,19 +1184,22 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
break;
}
}
double cost = *inter_cost, bits = *inter_bitcost;
vector2d_t mv = { best_mv_x, best_mv_y};
if (full_block && check_mv_cost(&info, mv_x, mv_y, &cost, &bits, &mv)) {
if (full_block) {
double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt;
//double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt;
//cost +=
bool better_mv = cost < *inter_cost;
if (better_mv) {
best_mv_x = mv_x;
best_mv_y = mv_y;
*inter_cost = cost;
*inter_bitcost = 0.0;
*inter_bitcost = bits;
fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y);
found_block = true;
break;
//break;
}
}
}
@ -1256,12 +1263,13 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
*inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT;
// Quick hashmap search
uvg_search_hash_cu_ibc(state,
x, y, depth,
lcu,
inter_cost,
inter_bitcost);
return;
/* uvg_search_hash_cu_ibc(
state,
x, y, depth,
lcu,
inter_cost,
inter_bitcost);
return;*/
// Store information of L0, L1, and bipredictions.
// Best cost will be left at MAX_DOUBLE if no valid CU is found.
// These will be initialized by the following function.
@ -1327,7 +1335,7 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth),
true, state->encoder_control->chroma_format != UVG_CSP_400);
if (*inter_cost < MAX_DOUBLE) {
if (*inter_cost < MAX_DOUBLE) {
assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));
}
}

View file

@ -83,6 +83,8 @@ typedef struct videoframe
uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row
uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row
uint32_t *ibc_hashmap_pos_to_hash; //!< \brief Hashmap reverse search for position to hash
uint32_t ibc_hashmap_pos_to_hash_stride; //!< \brief Hashmap position to hash stride
cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row
uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size