From 8ff184a6b3bec76dbf2be63606c985d4f960c0de Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 21 Jul 2023 20:14:23 +0300 Subject: [PATCH] [ibc] Fill the IBC hashmap at the start of LCU search and use reverse map for "pos to hash" --- src/encoder_state-ctors_dtors.c | 6 ++++ src/encoderstate.c | 60 +++++++++++++++++++++++---------- src/hashmap.c | 6 ++-- src/search.c | 2 ++ src/search_ibc.c | 48 +++++++++++++++----------- src/videoframe.h | 2 ++ 6 files changed, 84 insertions(+), 40 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index 965b3d08..526c3bc5 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -133,6 +133,10 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE); + state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) * + ((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride); + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2); state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4 @@ -220,6 +224,8 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { FREE_POINTER(state->tile->frame->hmvp_size_ibc); if (state->encoder_control->cfg.ibc) { + FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash); + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]); diff --git a/src/encoderstate.c b/src/encoderstate.c index e5c0c4d8..dd60fd03 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -288,23 +288,6 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state, const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); - int items = 0; - // Hash the current LCU to the IBC hashmap - for (int32_t xx = (lcu->position_px.x>8)?-6:0; xx < (int32_t)(ibc_block_width)-7; xx+=2) { - for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) { - int cur_x = lcu->position_px.x + xx; - int cur_y = lcu->position_px.y + yy; - uint32_t crc = uvg_crc32c_8x8(&frame->rec->y[cur_y * frame->rec->stride + cur_x],frame->rec->stride); - if (state->encoder_control->chroma_format != UVG_CSP_400) { - crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1); - crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1); - } - uvg_hashmap_insert(frame->ibc_hashmap_row[ibc_buffer_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff)); - items++; - } - } - //fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y); - uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x], &frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x], ibc_block_width, ibc_block_height, @@ -771,6 +754,49 @@ static void encoder_state_worker_encode_lcu_search(void * opaque) if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + + if (state->encoder_control->cfg.ibc) { + videoframe_t * const frame = state->tile->frame; + const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); + const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); + int items = 0; + // Hash the current LCU to the IBC hashmap + for (int32_t xx = (lcu->position_px.x>8)?-7:0; xx < (int32_t)(ibc_block_width)-7; xx++) { + for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) { + int cur_x = lcu->position_px.x + xx; + int cur_y = lcu->position_px.y + yy; + + // Skip blocks that seem to be the same value for the whole block + uint64_t first_line = + *(uint64_t *)&frame->source->y[cur_y * frame->source->stride + cur_x]; + bool same_data = true; + for (int y_temp = 1; y_temp < 8; y_temp++) { + if (*(uint64_t *)&frame->source->y[(cur_y+y_temp) * frame->source->stride + cur_x] != first_line) { + same_data = false; + break; + } + } + + if (!same_data || (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0)) { + uint32_t crc = uvg_crc32c_8x8(&frame->source->y[cur_y * frame->source->stride + cur_x],frame->source->stride); + if (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0) { + state->tile->frame->ibc_hashmap_pos_to_hash[(cur_y / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + cur_x / UVG_HASHMAP_BLOCKSIZE] = crc; + } + /* + if (state->encoder_control->chroma_format != UVG_CSP_400) { + crc ^= uvg_crc32c_4x4(&frame->rec->u[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1); + crc ^= uvg_crc32c_4x4(&frame->rec->v[(cur_y>>1) * (frame->rec->stride>>1) + (cur_x>>1)],frame->rec->stride>>1); + } + */ + uvg_hashmap_insert(frame->ibc_hashmap_row[ctu_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff)); + items++; + } + } + } + } + //fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y); + + //This part doesn't write to bitstream, it's only search, deblock and sao uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff); diff --git a/src/hashmap.c b/src/hashmap.c index c9d88d9c..73d8f891 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -75,9 +75,9 @@ uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size) */ static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size) { - key ^= (key >> 20) ^ (key >> 12); - return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size; - //return key % bucket_size; + //key ^= (key >> 20) ^ (key >> 12); + //return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size; + return key % bucket_size; } /** diff --git a/src/search.c b/src/search.c index f4f040eb..ec803c1b 100644 --- a/src/search.c +++ b/src/search.c @@ -1012,6 +1012,7 @@ static double search_cu( // Simple IBC search if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.ibc + && cost > 1000 && cu_width > 4 && (x >= cu_width || y >= cu_width) && !cur_cu->skipped) { @@ -1029,6 +1030,7 @@ static double search_cu( cost = mode_cost; inter_bitcost = mode_bitcost; cur_cu->type = CU_IBC; + cur_cu->inter.mv_dir = 1; cur_cu->joint_cb_cr = 0; } else { *cur_cu = backup_cu; diff --git a/src/search_ibc.c b/src/search_ibc.c index a981f6ca..c6bef680 100644 --- a/src/search_ibc.c +++ b/src/search_ibc.c @@ -1082,7 +1082,8 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, info.merge_cand, lcu); - *inter_cost = MAX_DOUBLE; + *inter_cost = MAX_DOUBLE; + *inter_bitcost = MAX_DOUBLE; bool valid_mv = false; @@ -1112,11 +1113,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, uint32_t ibc_buffer_row = yy / LCU_WIDTH; //UVG_GET_TIME(&hashmap_start_temp); - uint32_t crc = uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride); - if (state->encoder_control->chroma_format != UVG_CSP_400) { + uint32_t crc = state->tile->frame->ibc_hashmap_pos_to_hash[(yy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xx / UVG_HASHMAP_BLOCKSIZE]; + //uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride); + /* if (state->encoder_control->chroma_format != UVG_CSP_400) { crc ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1); crc ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1); - } + }*/ /* UVG_GET_TIME(&hashmap_end_temp); crc_time += UVG_CLOCK_T_AS_DOUBLE(hashmap_end_temp) - UVG_CLOCK_T_AS_DOUBLE(hashmap_start_temp);*/ @@ -1133,11 +1135,11 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, while (result != NULL) { if (hashes_found == 0 && result->size > 1000) { - fprintf(stderr, "Found a block with %d elements\n", result->size); + //fprintf(stderr, "Found a block with %d elements\n", result->size); //break; } if (result->key == crc && result->value != own_location) { - hashes_found++; + hashes_found++; hits++; int pos_x = result->value >> 16; int pos_y = result->value & 0xffff; @@ -1149,11 +1151,13 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, bool full_block = true; // Is the full block covered by the IBC? for (int xxx = xx+UVG_HASHMAP_BLOCKSIZE; xxx < xx + width; xxx+=UVG_HASHMAP_BLOCKSIZE) { for (int yyy = yy; yyy < yy + height; yyy += UVG_HASHMAP_BLOCKSIZE) { - uint32_t crc_other_blocks = uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride); + uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[(yyy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xxx / UVG_HASHMAP_BLOCKSIZE]; + //uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride); + /* if (state->encoder_control->chroma_format != UVG_CSP_400) { crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1); crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1); - } + }*/ uvg_hashmap_node_t *result2 = uvg_hashmap_search(state->tile->frame->ibc_hashmap_row[ibc_buffer_row],crc_other_blocks); evaluations++; bool found_match = false; @@ -1180,19 +1184,22 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, break; } } + double cost = *inter_cost, bits = *inter_bitcost; + vector2d_t mv = { best_mv_x, best_mv_y}; + + if (full_block && check_mv_cost(&info, mv_x, mv_y, &cost, &bits, &mv)) { - if (full_block) { - - double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt; + //double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt; + //cost += bool better_mv = cost < *inter_cost; if (better_mv) { best_mv_x = mv_x; best_mv_y = mv_y; *inter_cost = cost; - *inter_bitcost = 0.0; + *inter_bitcost = bits; fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y); found_block = true; - break; + //break; } } } @@ -1256,12 +1263,13 @@ void uvg_search_cu_ibc(encoder_state_t * const state, *inter_cost = MAX_DOUBLE; *inter_bitcost = MAX_INT; // Quick hashmap search - uvg_search_hash_cu_ibc(state, - x, y, depth, - lcu, - inter_cost, - inter_bitcost); - return; + /* uvg_search_hash_cu_ibc( + state, + x, y, depth, + lcu, + inter_cost, + inter_bitcost); + return;*/ // Store information of L0, L1, and bipredictions. // Best cost will be left at MAX_DOUBLE if no valid CU is found. // These will be initialized by the following function. @@ -1327,7 +1335,7 @@ void uvg_search_cu_ibc(encoder_state_t * const state, uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, state->encoder_control->chroma_format != UVG_CSP_400); - if (*inter_cost < MAX_DOUBLE) { + if (*inter_cost < MAX_DOUBLE) { assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1])); } } diff --git a/src/videoframe.h b/src/videoframe.h index 7f7e7581..0a7509c6 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -83,6 +83,8 @@ typedef struct videoframe uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row + uint32_t *ibc_hashmap_pos_to_hash; //!< \brief Hashmap reverse search for position to hash + uint32_t ibc_hashmap_pos_to_hash_stride; //!< \brief Hashmap position to hash stride cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size