From 6f4d538f4fe12990b69f1dbd3f106ea6d83fca2f Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 24 Jul 2023 22:07:22 +0300 Subject: [PATCH] [ibc] Clean up the ibc search, utilize hash based starting points if ibc=2 --- src/encoder_state-ctors_dtors.c | 12 ++- src/encoderstate.c | 6 +- src/search.c | 2 +- src/search_ibc.c | 149 +++++++++++++++++++++----------- 4 files changed, 111 insertions(+), 58 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index 526c3bc5..e951e27c 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -133,9 +133,11 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu); - state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE); - state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) * - ((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride); + if (state->encoder_control->cfg.ibc & 2) { + state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE); + state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) * + ((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride); + } for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2); @@ -224,7 +226,9 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { FREE_POINTER(state->tile->frame->hmvp_size_ibc); if (state->encoder_control->cfg.ibc) { - FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash); + if (state->encoder_control->cfg.ibc & 2) { + FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash); + } for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); diff --git a/src/encoderstate.c b/src/encoderstate.c index dd60fd03..383f5fa2 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -755,14 +755,14 @@ static void encoder_state_worker_encode_lcu_search(void * opaque) if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); - if (state->encoder_control->cfg.ibc) { + if (state->encoder_control->cfg.ibc & 2) { videoframe_t * const frame = state->tile->frame; const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); int items = 0; // Hash the current LCU to the IBC hashmap - for (int32_t xx = (lcu->position_px.x>8)?-7:0; xx < (int32_t)(ibc_block_width)-7; xx++) { - for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy++) { + for (int32_t xx = 0; xx < (int32_t)(ibc_block_width)-7; xx+=UVG_HASHMAP_BLOCKSIZE) { + for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy+=UVG_HASHMAP_BLOCKSIZE) { int cur_x = lcu->position_px.x + xx; int cur_y = lcu->position_px.y + yy; diff --git a/src/search.c b/src/search.c index e7b9b737..ec803c1b 100644 --- a/src/search.c +++ b/src/search.c @@ -1010,7 +1010,7 @@ static double search_cu( } // Simple IBC search - if (cost > 500 && can_use_intra //&& state->frame->slicetype == UVG_SLICE_I + if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.ibc && cost > 1000 && cu_width > 4 diff --git a/src/search_ibc.c b/src/search_ibc.c index 6f8ff41b..ba5fffba 100644 --- a/src/search_ibc.c +++ b/src/search_ibc.c @@ -322,11 +322,60 @@ static void select_starting_point(ibc_search_info_t *info, extra_mv.x >>= INTERNAL_MV_PREC; extra_mv.y >>= INTERNAL_MV_PREC; + int origin_x = info->origin.x; + int origin_y = info->origin.y; + + int ibc_origin_x = origin_x / UVG_HASHMAP_BLOCKSIZE; + int ibc_origin_y = origin_y / UVG_HASHMAP_BLOCKSIZE; + // Check mv_in if it's not one of the merge candidates. if ((extra_mv.x != 0 || extra_mv.y != 0) && !mv_in_merge(info, extra_mv)) { check_mv_cost(info, extra_mv.x, extra_mv.y, best_cost, best_bits, best_mv); } + if (info->state->encoder_control->cfg.ibc & 2) { + int own_location = ((origin_x & 0xffff) << 16) | (origin_y & 0xffff); + + uint32_t ibc_buffer_row = origin_y / LCU_WIDTH; + + uint32_t crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(origin_y / UVG_HASHMAP_BLOCKSIZE) * + info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + + origin_x / UVG_HASHMAP_BLOCKSIZE]; + + uvg_hashmap_node_t *result = uvg_hashmap_search( + info->state->tile->frame->ibc_hashmap_row[ibc_buffer_row], crc); + + while (result != NULL) { + if (result->key == crc && result->value != own_location) { + int pos_x = result->value >> 16; + int pos_y = result->value & 0xffff; + int mv_x = pos_x - origin_x; + int mv_y = pos_y - origin_y; + + int ibc_pos_x = pos_x / UVG_HASHMAP_BLOCKSIZE; + int ibc_pos_y = pos_y / UVG_HASHMAP_BLOCKSIZE; + + bool full_block = true; + for (int ibc_x = 0; ibc_x < info->width / UVG_HASHMAP_BLOCKSIZE; ibc_x++) { + for (int ibc_y = 0; ibc_y < info->height / UVG_HASHMAP_BLOCKSIZE; ibc_y++) { + uint32_t neighbor_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(ibc_pos_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_pos_x + ibc_x]; + uint32_t other_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(ibc_origin_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_origin_x + ibc_x]; + if (other_crc != neighbor_crc) { + full_block = false; + break; + } + } + if (!full_block) break; + } + if (full_block) check_mv_cost(info, mv_x, mv_y, best_cost, best_bits, best_mv); + } + result = result->next; + } + } + // Go through candidates for (int32_t i = 0; i < info->num_merge_cand; ++i) { int32_t x = (info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] + (1 << (INTERNAL_MV_PREC - 1)) ) >> INTERNAL_MV_PREC; @@ -896,7 +945,7 @@ static void search_pu_ibc(encoder_state_t * const state, cur_pu->skipped = true; merge->size = 1; - merge->cost[0] = 0.0; // TODO: Check this + merge->cost[0] = (merge_idx )* state->lambda_sqrt; // TODO: Check this merge->bits[0] = merge_idx; // TODO: Check this merge->unit[0] = *cur_pu; return; @@ -1010,11 +1059,13 @@ static void search_pu_ibc(encoder_state_t * const state, if(cfg->rdo < 2) { int predmode_ctx; + + const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1) * 3; const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx); const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0); const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0); - const double total_bits = no_skip_flag + pred_mode_bits; + const double total_bits = ibc_flag + no_skip_flag + pred_mode_bits; if(amvp[0].size > 0) { const uint8_t best_key = amvp[0].keys[0]; amvp[0].bits[best_key] += total_bits; @@ -1083,8 +1134,8 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, info.merge_cand, lcu); - *inter_cost = MAX_DOUBLE; - *inter_bitcost = MAX_DOUBLE; + double ibc_cost = MAX_DOUBLE; + double ibc_bitcost = MAX_DOUBLE; bool valid_mv = false; @@ -1113,22 +1164,10 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, uint32_t ibc_buffer_row = yy / LCU_WIDTH; - //UVG_GET_TIME(&hashmap_start_temp); uint32_t crc = state->tile->frame->ibc_hashmap_pos_to_hash[(yy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xx / UVG_HASHMAP_BLOCKSIZE]; - //uvg_crc32c_8x8(&state->tile->frame->source->y[yy * state->tile->frame->source->stride + xx],state->tile->frame->source->stride); - /* if (state->encoder_control->chroma_format != UVG_CSP_400) { - crc ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1); - crc ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yy >> 1) * (state->tile->frame->source->stride>>1) + (xx >> 1)],state->tile->frame->source->stride>>1); - }*/ - /* UVG_GET_TIME(&hashmap_end_temp); - crc_time += UVG_CLOCK_T_AS_DOUBLE(hashmap_end_temp) - - UVG_CLOCK_T_AS_DOUBLE(hashmap_start_temp);*/ uvg_hashmap_node_t *result = uvg_hashmap_search(state->tile->frame->ibc_hashmap_row[ibc_buffer_row],crc); - - /* UVG_GET_TIME(&hashmap_start_temp); - search_time += UVG_CLOCK_T_AS_DOUBLE(hashmap_start_temp) - - UVG_CLOCK_T_AS_DOUBLE(hashmap_end_temp);*/ + bool found_block = false; @@ -1150,33 +1189,15 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, valid_mv = intmv_within_ibc_range(&info, mv_x, mv_y); if (valid_mv) { bool full_block = true; // Is the full block covered by the IBC? - for (int xxx = xx+UVG_HASHMAP_BLOCKSIZE; xxx < xx + width; xxx+=UVG_HASHMAP_BLOCKSIZE) { - for (int yyy = yy; yyy < yy + height; yyy += UVG_HASHMAP_BLOCKSIZE) { - uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[(yyy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xxx / UVG_HASHMAP_BLOCKSIZE]; - //uvg_crc32c_8x8(&state->tile->frame->source->y[yyy * state->tile->frame->source->stride + xxx],state->tile->frame->source->stride); - /* - if (state->encoder_control->chroma_format != UVG_CSP_400) { - crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->u[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1); - crc_other_blocks ^= uvg_crc32c_4x4(&state->tile->frame->source->v[(yyy >> 1) * (state->tile->frame->source->stride>>1) + (xxx >> 1)],state->tile->frame->source->stride>>1); - }*/ - uvg_hashmap_node_t *result2 = uvg_hashmap_search(state->tile->frame->ibc_hashmap_row[ibc_buffer_row],crc_other_blocks); - evaluations++; - bool found_match = false; - while (result2) { - if (result2->key == crc_other_blocks) { - int pos_x_temp = (uint16_t)(result2->value >> 16); - int pos_y_temp = (uint16_t)(result2->value & 0xffff); - int mv_x_temp = pos_x_temp - xxx; - int mv_y_temp = pos_y_temp - yyy; + for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width; offset_x+=UVG_HASHMAP_BLOCKSIZE) { + for (int offset_y = 0; offset_y < height; offset_y += UVG_HASHMAP_BLOCKSIZE) { + uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[ + ((yy+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (xx+offset_x) / UVG_HASHMAP_BLOCKSIZE]; - if (mv_x_temp == mv_x && mv_y_temp == mv_y) { - found_match = true; - break; - } - } - result2 = result2->next; - } - if (!found_match) { + uint32_t crc_neighbor = state->tile->frame->ibc_hashmap_pos_to_hash[((pos_y+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (pos_x+offset_x) / UVG_HASHMAP_BLOCKSIZE]; + + bool found_match = false; + if (crc_neighbor != crc_other_blocks) { full_block = false; break; } @@ -1185,19 +1206,20 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, break; } } - double cost = *inter_cost, bits = *inter_bitcost; - vector2d_t mv = { best_mv_x, best_mv_y}; - - if (full_block && check_mv_cost(&info, mv_x, mv_y, &cost, &bits, &mv)) { + + if (full_block) { + double cost = ibc_cost, bits = ibc_bitcost; + vector2d_t mv = { best_mv_x, best_mv_y}; + cost = calc_ibc_mvd_cost(state, mv_x, mv_y,INTERNAL_MV_PREC,info.mv_cand, info.merge_cand, info.num_merge_cand, NULL, &bits); //double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt; //cost += - bool better_mv = cost < *inter_cost; + bool better_mv = cost < ibc_cost; if (better_mv) { best_mv_x = mv_x; best_mv_y = mv_y; - *inter_cost = cost; - *inter_bitcost = bits; + ibc_cost = cost; + ibc_bitcost = bits; fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y); found_block = true; //break; @@ -1218,10 +1240,37 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state, if (!found_block) return; + *inter_cost = 2; + *inter_bitcost = ibc_bitcost; + + uint32_t merge_idx; + int8_t merged = 0; + uint32_t temp_bitcost = 0; + + cur_pu->inter.mv[0][0] = best_mv_x << INTERNAL_MV_PREC; cur_pu->inter.mv[0][1] = best_mv_y << INTERNAL_MV_PREC; + + // Check every candidate to find a match + for(merge_idx = 0; merge_idx < (uint32_t)info.num_merge_cand; merge_idx++) { + if (info.merge_cand[merge_idx].dir == 1 && info.merge_cand[merge_idx].mv[0][0] == cur_pu->inter.mv[0][0] && + info.merge_cand[merge_idx].mv[0][1] == cur_pu->inter.mv[0][1]) { + temp_bitcost += merge_idx; + merged = 1; + fprintf(stderr, "Merged!\r\n"); + break; + } + } + + cur_pu->merged = merged; + cur_pu->merge_idx = merge_idx; + cur_pu->skipped = merged; + const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1); + ibc_cost += ibc_flag * state->lambda_sqrt; + ibc_bitcost += ibc_flag; + uvg_inter_recon_cu( state, lcu,