mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
[ibc] Fill the IBC hashmap at the start of LCU search and use reverse map for "pos to hash"
This commit is contained in:
parent
457d650f49
commit
8ff184a6b3
|
@ -133,6 +133,10 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
|
|||
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
||||
state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu);
|
||||
|
||||
state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE);
|
||||
state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) *
|
||||
((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride);
|
||||
|
||||
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
|
||||
state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2);
|
||||
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
|
||||
|
@ -220,6 +224,8 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
|
|||
FREE_POINTER(state->tile->frame->hmvp_size_ibc);
|
||||
|
||||
if (state->encoder_control->cfg.ibc) {
|
||||
FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash);
|
||||
|
||||
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
|
||||
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
|
||||
uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]);
|
||||
|
|
|
@ -288,23 +288,6 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
|
|||
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
|
||||
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
|
||||
|
||||
int items = 0;
|
||||
// Hash the current LCU to the IBC hashmap
|
||||
for (int32_t xx = (lcu->position_px.x>8)?-6:0; xx < (int32_t)(ibc_block_width)-7; xx+=2) {
|
||||
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) {
|
||||
int cur_x = lcu->position_px.x + xx;
|
||||
int cur_y = lcu->position_px.y + yy;
|
||||
uint32_t crc = uvg_crc32c_8x8(&frame->rec->y[cur_y * frame->rec->stride + cur_x],frame->rec->stride);
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
|
||||
crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
|
||||
}
|
||||
uvg_hashmap_insert(frame->ibc_hashmap_row[ibc_buffer_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
|
||||
items++;
|
||||
}
|
||||
}
|
||||
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
|
||||
|
||||
uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x],
|
||||
&frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x],
|
||||
ibc_block_width, ibc_block_height,
|
||||
|
@ -771,6 +754,49 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
|||
if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||
if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||
|
||||
|
||||
if (state->encoder_control->cfg.ibc) {
|
||||
videoframe_t * const frame = state->tile->frame;
|
||||
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
|
||||
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
|
||||
int items = 0;
|
||||
// Hash the current LCU to the IBC hashmap
|
||||
for (int32_t xx = (lcu->position_px.x>8)?-7:0; xx < (int32_t)(ibc_block_width)-7; xx++) {
|
||||
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) {
|
||||
int cur_x = lcu->position_px.x + xx;
|
||||
int cur_y = lcu->position_px.y + yy;
|
||||
|
||||
// Skip blocks that seem to be the same value for the whole block
|
||||
uint64_t first_line =
|
||||
*(uint64_t *)&frame->source->y[cur_y * frame->source->stride + cur_x];
|
||||
bool same_data = true;
|
||||
for (int y_temp = 1; y_temp < 8; y_temp++) {
|
||||
if (*(uint64_t *)&frame->source->y[(cur_y+y_temp) * frame->source->stride + cur_x] != first_line) {
|
||||
same_data = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!same_data || (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0)) {
|
||||
uint32_t crc = uvg_crc32c_8x8(&frame->source->y[cur_y * frame->source->stride + cur_x],frame->source->stride);
|
||||
if (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0) {
|
||||
state->tile->frame->ibc_hashmap_pos_to_hash[(cur_y / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + cur_x / UVG_HASHMAP_BLOCKSIZE] = crc;
|
||||
}
|
||||
/*
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
|
||||
crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1);
|
||||
}
|
||||
*/
|
||||
uvg_hashmap_insert(frame->ibc_hashmap_row[ctu_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
|
||||
items++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
|
||||
|
||||
|
||||
//This part doesn't write to bitstream, it's only search, deblock and sao
|
||||
uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff);
|
||||
|
||||
|
|
|
@ -75,9 +75,9 @@ uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size)
|
|||
*/
|
||||
static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size)
|
||||
{
|
||||
key ^= (key >> 20) ^ (key >> 12);
|
||||
return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
|
||||
//return key % bucket_size;
|
||||
//key ^= (key >> 20) ^ (key >> 12);
|
||||
//return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
|
||||
return key % bucket_size;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1012,6 +1012,7 @@ static double search_cu(
|
|||
// Simple IBC search
|
||||
if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I
|
||||
&& state->encoder_control->cfg.ibc
|
||||
&& cost > 1000
|
||||
&& cu_width > 4
|
||||
&& (x >= cu_width || y >= cu_width)
|
||||
&& !cur_cu->skipped) {
|
||||
|
@ -1029,6 +1030,7 @@ static double search_cu(
|
|||
cost = mode_cost;
|
||||
inter_bitcost = mode_bitcost;
|
||||
cur_cu->type = CU_IBC;
|
||||
cur_cu->inter.mv_dir = 1;
|
||||
cur_cu->joint_cb_cr = 0;
|
||||
} else {
|
||||
*cur_cu = backup_cu;
|
||||
|
|
|
@ -1082,7 +1082,8 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
info.merge_cand,
|
||||
lcu);
|
||||
|
||||
*inter_cost = MAX_DOUBLE;
|
||||
*inter_cost = MAX_DOUBLE;
|
||||
*inter_bitcost = MAX_DOUBLE;
|
||||
|
||||
bool valid_mv = false;
|
||||
|
||||
|
@ -1112,11 +1113,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
uint32_t ibc_buffer_row = yy / LCU_WIDTH;
|
||||
|
||||
//UVG_GET_TIME(&hashmap_start_temp);
|
||||
uint32_t crc = uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride);
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
uint32_t crc = state->tile->frame->ibc_hashmap_pos_to_hash[(yy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xx / UVG_HASHMAP_BLOCKSIZE];
|
||||
//uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride);
|
||||
/* if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
crc ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1);
|
||||
crc ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1);
|
||||
}
|
||||
}*/
|
||||
/* UVG_GET_TIME(&hashmap_end_temp);
|
||||
crc_time += UVG_CLOCK_T_AS_DOUBLE(hashmap_end_temp) -
|
||||
UVG_CLOCK_T_AS_DOUBLE(hashmap_start_temp);*/
|
||||
|
@ -1133,7 +1135,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
|
||||
while (result != NULL) {
|
||||
if (hashes_found == 0 && result->size > 1000) {
|
||||
fprintf(stderr, "Found a block with %d elements\n", result->size);
|
||||
//fprintf(stderr, "Found a block with %d elements\n", result->size);
|
||||
//break;
|
||||
}
|
||||
if (result->key == crc && result->value != own_location) {
|
||||
|
@ -1149,11 +1151,13 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
bool full_block = true; // Is the full block covered by the IBC?
|
||||
for (int xxx = xx+UVG_HASHMAP_BLOCKSIZE; xxx < xx + width; xxx+=UVG_HASHMAP_BLOCKSIZE) {
|
||||
for (int yyy = yy; yyy < yy + height; yyy += UVG_HASHMAP_BLOCKSIZE) {
|
||||
uint32_t crc_other_blocks = uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride);
|
||||
uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[(yyy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xxx / UVG_HASHMAP_BLOCKSIZE];
|
||||
//uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride);
|
||||
/*
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1);
|
||||
crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1);
|
||||
}
|
||||
}*/
|
||||
uvg_hashmap_node_t *result2 = uvg_hashmap_search(state->tile->frame->ibc_hashmap_row[ibc_buffer_row],crc_other_blocks);
|
||||
evaluations++;
|
||||
bool found_match = false;
|
||||
|
@ -1180,19 +1184,22 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
break;
|
||||
}
|
||||
}
|
||||
double cost = *inter_cost, bits = *inter_bitcost;
|
||||
vector2d_t mv = { best_mv_x, best_mv_y};
|
||||
|
||||
if (full_block) {
|
||||
if (full_block && check_mv_cost(&info, mv_x, mv_y, &cost, &bits, &mv)) {
|
||||
|
||||
double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt;
|
||||
//double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt;
|
||||
//cost +=
|
||||
bool better_mv = cost < *inter_cost;
|
||||
if (better_mv) {
|
||||
best_mv_x = mv_x;
|
||||
best_mv_y = mv_y;
|
||||
*inter_cost = cost;
|
||||
*inter_bitcost = 0.0;
|
||||
*inter_bitcost = bits;
|
||||
fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y);
|
||||
found_block = true;
|
||||
break;
|
||||
//break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1256,12 +1263,13 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
|
|||
*inter_cost = MAX_DOUBLE;
|
||||
*inter_bitcost = MAX_INT;
|
||||
// Quick hashmap search
|
||||
uvg_search_hash_cu_ibc(state,
|
||||
x, y, depth,
|
||||
lcu,
|
||||
inter_cost,
|
||||
inter_bitcost);
|
||||
return;
|
||||
/* uvg_search_hash_cu_ibc(
|
||||
state,
|
||||
x, y, depth,
|
||||
lcu,
|
||||
inter_cost,
|
||||
inter_bitcost);
|
||||
return;*/
|
||||
// Store information of L0, L1, and bipredictions.
|
||||
// Best cost will be left at MAX_DOUBLE if no valid CU is found.
|
||||
// These will be initialized by the following function.
|
||||
|
|
|
@ -83,6 +83,8 @@ typedef struct videoframe
|
|||
uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row
|
||||
uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row
|
||||
uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row
|
||||
uint32_t *ibc_hashmap_pos_to_hash; //!< \brief Hashmap reverse search for position to hash
|
||||
uint32_t ibc_hashmap_pos_to_hash_stride; //!< \brief Hashmap position to hash stride
|
||||
cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row
|
||||
uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size
|
||||
|
||||
|
|
Loading…
Reference in a new issue