diff --git a/CMakeLists.txt b/CMakeLists.txt index 99fa8a88..c0ec99c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,6 +145,7 @@ target_include_directories(uvg266 PUBLIC src/strategies) file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c") file(GLOB LIB_SOURCES_STRATEGIES_SSE41 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse41/*.c") +file(GLOB LIB_SOURCES_STRATEGIES_SSE42 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse42/*.c") set(CLI_SOURCES "src/encmain.c" "src/cli.c" "src/cli.h" "src/yuv_io.c" "src/yuv_io.h") @@ -175,7 +176,8 @@ else() list(APPEND ALLOW_AVX2 "x86_64" "AMD64") if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2) set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" ) - set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) + set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) + set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE42} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) endif() set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) @@ -200,7 +202,7 @@ file(GLOB SOURCE_GROUP_CABAC RELATIVE ${PROJECT_SOURCE_DIR} "src/bitstream.*" "s file(GLOB SOURCE_GROUP_COMPRESSION RELATIVE ${PROJECT_SOURCE_DIR} "src/search*" "src/rdo.*" "src/fast_coeff*") file(GLOB SOURCE_GROUP_CONSTRAINT RELATIVE ${PROJECT_SOURCE_DIR} "src/constraint.*" "src/ml_*") file(GLOB SOURCE_GROUP_CONTROL RELATIVE ${PROJECT_SOURCE_DIR} "src/cfg.*" "src/encoder.*" "src/encoder_state-c*" "src/encoder_state-g*" "src/encoderstate*" "src/gop.*" "src/input_frame_buffer.*" "src/uvg266*" "src/rate_control.*" "src/mip_data.h") -file(GLOB SOURCE_GROUP_DATA_STRUCTURES RELATIVE ${PROJECT_SOURCE_DIR} "src/cu.*" "src/image.*" "src/imagelist.*" "src/videoframe.*") +file(GLOB SOURCE_GROUP_DATA_STRUCTURES RELATIVE ${PROJECT_SOURCE_DIR} "src/cu.*" "src/image.*" "src/imagelist.*" "src/videoframe.*" "src/hashmap.*") file(GLOB SOURCE_GROUP_EXTRAS RELATIVE ${PROJECT_SOURCE_DIR} "src/extras/*.h" "src/extras/*.c") file(GLOB_RECURSE SOURCE_GROUP_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.h" "src/strategies/*.c") file(GLOB SOURCE_GROUP_RECON RELATIVE ${PROJECT_SOURCE_DIR} "src/alf.*" "src/filter.*" "src/inter.*" "src/intra.*" "src/reshape.*" "src/sao.*" "src/scalinglist.*" "src/tables.*" "src/transform.*") diff --git a/src/cabac.h b/src/cabac.h index 6f7aaa78..be249ba2 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -122,6 +122,7 @@ typedef struct cabac_ctx_t transform_skip_gt2[5]; cabac_ctx_t cclm_flag; cabac_ctx_t cclm_model; + cabac_ctx_t ibc_flag[3]; } ctx; } cabac_data_t; diff --git a/src/cfg.c b/src/cfg.c index 843729a6..cafadcb2 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -222,6 +222,9 @@ int uvg_config_init(uvg_config *cfg) cfg->dual_tree = 0; cfg->intra_rough_search_levels = 2; + + cfg->ibc = 0; + return 1; } @@ -1479,7 +1482,14 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value) else if OPT("intra-rough-granularity") { cfg->intra_rough_search_levels = atoi(value); } - else { + else if OPT ("ibc") { + int ibc_value = atoi(value); + if (ibc_value < 0 || ibc_value > 2) { + fprintf(stderr, "ibc supports only range from 0 to 2\n"); + return 0; + } + cfg->ibc = (uint8_t)ibc_value; + } else { return 0; } #undef OPT diff --git a/src/cli.c b/src/cli.c index 6d0c13f6..fa6ee6df 100644 --- a/src/cli.c +++ b/src/cli.c @@ -192,6 +192,7 @@ static const struct option long_options[] = { { "no-dual-tree", no_argument, NULL, 0 }, { "cabac-debug-file", required_argument, NULL, 0 }, { "intra-rough-granularity",required_argument, NULL, 0 }, + { "ibc", required_argument, NULL, 0 }, {0, 0, 0, 0} }; diff --git a/src/context.c b/src/context.c index 8e042cc2..83bd5502 100644 --- a/src/context.c +++ b/src/context.c @@ -423,6 +423,13 @@ static const uint8_t INIT_CCLM_MODEL[4] = { 9, }; +static const uint8_t INIT_IBC_FLAG[4][3] = { + { 0, 43, 45, }, + { 0, 57, 44, }, + { 17, 42, 36, }, + { 1, 5, 8, }, +}; + /* static const uint16_t g_inistateToCount[128] = { 614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342, @@ -514,6 +521,7 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) uvg_ctx_init(&cabac->ctx.lfnst_idx_model[i], QP, INIT_LFNST_IDX[slice][i], INIT_LFNST_IDX[3][i]); uvg_ctx_init(&cabac->ctx.transform_skip_sig_coeff_group[i], QP, INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[slice][i], INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[3][i]); uvg_ctx_init(&cabac->ctx.transform_skip_sig[i], QP, INIT_TRANSFORM_SKIP_SIG[slice][i], INIT_TRANSFORM_SKIP_SIG[3][i]); + uvg_ctx_init(&cabac->ctx.ibc_flag[i], QP, INIT_IBC_FLAG[slice][i], INIT_IBC_FLAG[3][i]); } for (i = 0; i < 4; i++) { diff --git a/src/cu.h b/src/cu.h index 74ff25a6..ddddaf55 100644 --- a/src/cu.h +++ b/src/cu.h @@ -52,6 +52,7 @@ typedef enum { CU_INTRA = 1, CU_INTER = 2, CU_PCM = 3, + CU_IBC = 4, } cu_type_t; typedef enum { @@ -146,7 +147,7 @@ enum uvg_tree_type { */ typedef struct { - uint8_t type : 2; //!< \brief block type, one of cu_type_t values + uint8_t type : 3; //!< \brief block type, one of cu_type_t values uint8_t depth : 3; //!< \brief depth / size of this block uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values uint8_t tr_depth : 3; //!< \brief transform depth diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 0552e211..7a3f401c 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -834,7 +834,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state, if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, width, height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); #endif } else { - if (state->frame->slicetype == UVG_SLICE_B) { + if (state->frame->slicetype == UVG_SLICE_B && cur_cu->type != CU_IBC) { // Code Inter Dir uint8_t inter_dir = cur_cu->inter.mv_dir; @@ -860,7 +860,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state, // size of the current reference index list (L0/L1) uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx]; - if (ref_LX_size > 1) { + if (ref_LX_size > 1 && cur_cu->type != CU_IBC) { // parseRefFrmIdx int32_t ref_frame = cur_cu->inter.mv_ref[ref_list_idx]; @@ -906,7 +906,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state, mv_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0]; mv_t mvd_ver = cur_cu->inter.mv[ref_list_idx][1] - mv_cand[cu_mv_cand][1]; - uvg_change_precision(INTERNAL_MV_PREC, uvg_g_imv_to_prec[UVG_IMV_OFF], &mvd_hor, &mvd_ver); + uvg_change_precision(INTERNAL_MV_PREC, uvg_g_imv_to_prec[(cur_cu->type == CU_IBC)?UVG_IMV_FPEL:UVG_IMV_OFF], &mvd_hor, &mvd_ver); uvg_encode_mvd(state, cabac, mvd_hor, mvd_ver, bits_out); non_zero_mvd |= (mvd_hor != 0) || (mvd_ver != 0); @@ -1262,95 +1262,6 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, if (cabac->only_count && bits_out) *bits_out += bits; } -/** -static void encode_part_mode(encoder_state_t * const state, - cabac_data_t * const cabac, - const cu_info_t * const cur_cu, - int depth) -{ - // Binarization from Table 9-34 of the HEVC spec: - // - // | log2CbSize > | log2CbSize == - // | MinCbLog2SizeY | MinCbLog2SizeY - // -------+-------+----------+---------+-----------+---------- - // pred | part | AMP | AMP | | - // mode | mode | disabled | enabled | size == 8 | size > 8 - // -------+-------+----------+---------+-----------+---------- - // intra | 2Nx2N | - - | 1 1 - // | NxN | - - | 0 0 - // -------+-------+--------------------+---------------------- - // inter | 2Nx2N | 1 1 | 1 1 - // | 2NxN | 01 011 | 01 01 - // | Nx2N | 00 001 | 00 001 - // | NxN | - - | - 000 - // | 2NxnU | - 0100 | - - - // | 2NxnD | - 0101 | - - - // | nLx2N | - 0000 | - - - // | nRx2N | - 0001 | - - - // -------+-------+--------------------+---------------------- - // - // - // Context indices from Table 9-37 of the HEVC spec: - // - // binIdx - // | 0 1 2 3 - // ------------------------------+------------------ - // log2CbSize == MinCbLog2SizeY | 0 1 2 bypass - // log2CbSize > MinCbLog2SizeY | 0 1 3 bypass - // ------------------------------+------------------ - double bits = 0; - if (cur_cu->type == CU_INTRA) { - if (depth == MAX_DEPTH) { - cabac->cur_ctx = &(cabac->ctx.part_size_model[0]); - if (cur_cu->part_size == SIZE_2Nx2N) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N"); - } else { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode NxN"); - } - } - } else { - - cabac->cur_ctx = &(cabac->ctx.part_size_model[0]); - if (cur_cu->part_size == SIZE_2Nx2N) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N"); - return bits; - } - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode split"); - - cabac->cur_ctx = &(cabac->ctx.part_size_model[1]); - if (cur_cu->part_size == SIZE_2NxN || - cur_cu->part_size == SIZE_2NxnU || - cur_cu->part_size == SIZE_2NxnD) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 1, bits, "part_mode vertical"); - } else { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 0, bits, "part_mode horizontal"); - } - - if (state->encoder_control->cfg.amp_enable && depth < MAX_DEPTH) { - cabac->cur_ctx = &(cabac->ctx.part_size_model[3]); - - if (cur_cu->part_size == SIZE_2NxN || - cur_cu->part_size == SIZE_Nx2N) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 1, bits, "part_mode SMP"); - return bits; - } - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 0, bits, "part_mode AMP"); - - if (cur_cu->part_size == SIZE_2NxnU || - cur_cu->part_size == SIZE_nLx2N) { - CABAC_BINS_EP(cabac, 0, 1, "part_mode AMP"); - if(cabac->only_count) bits += 1; - } else { - CABAC_BINS_EP(cabac, 1, 1, "part_mode AMP"); - if(cabac->only_count) bits += 1; - } - } - } - return bits; -} -**/ - - bool uvg_write_split_flag( const encoder_state_t * const state, cabac_data_t* cabac, @@ -1547,7 +1458,7 @@ void uvg_encode_coding_tree( // CABAC_BIN(cabac, 0, "split_transform_flag"); } - DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1); if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; @@ -1555,7 +1466,7 @@ void uvg_encode_coding_tree( } // Encode skip flag - if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) { + if ((state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc)) { int8_t ctx_skip = 0; @@ -1565,11 +1476,22 @@ void uvg_encode_coding_tree( if (above_cu && above_cu->skipped) { ctx_skip++; } - - cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]); - CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag"); + if (cu_width > 4 || state->encoder_control->cfg.ibc) { + cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]); + CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag"); + } if (cur_cu->skipped) { + + if (state->encoder_control->cfg.ibc && state->frame->slicetype != UVG_SLICE_I) + { // ToDo: Only for luma channel + // ToDo: Disable for blocks over 64x64 pixels + int8_t ctx_ibc = 0; + if (left_cu && left_cu->type == CU_IBC) ctx_ibc++; + if (above_cu && above_cu->type == CU_IBC) ctx_ibc++; + cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]); + CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag"); + } DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu); uvg_hmvp_add_mv(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu); int16_t num_cand = state->encoder_control->cfg.max_merge; @@ -1597,6 +1519,15 @@ void uvg_encode_coding_tree( } // Prediction mode + if ((state->frame->slicetype == UVG_SLICE_I || cu_width == 4) && state->encoder_control->cfg.ibc) { // ToDo: Only for luma channel + // ToDo: Disable for blocks over 64x64 pixels + int8_t ctx_ibc = 0; + if (left_cu && left_cu->type == CU_IBC) ctx_ibc++; + if (above_cu && above_cu->type == CU_IBC) ctx_ibc++; + cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]); + CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag"); + } + if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) { int8_t ctx_predmode = 0; @@ -1607,6 +1538,15 @@ void uvg_encode_coding_tree( cabac->cur_ctx = &(cabac->ctx.cu_pred_mode_model[ctx_predmode]); CABAC_BIN(cabac, (cur_cu->type == CU_INTRA), "PredMode"); + + // We need IBC flag if the mode is signalled as Inter + if (state->encoder_control->cfg.ibc && cur_cu->type != CU_INTRA) { + int8_t ctx_ibc = 0; + if (left_cu && left_cu->type == CU_IBC) ctx_ibc++; + if (above_cu && above_cu->type == CU_IBC) ctx_ibc++; + cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]); + CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag"); + } } // part_mode @@ -1657,7 +1597,7 @@ void uvg_encode_coding_tree( } else #endif - if (cur_cu->type == CU_INTER) { + if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { uint8_t imv_mode = UVG_IMV_OFF; const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size]; @@ -1679,10 +1619,10 @@ void uvg_encode_coding_tree( // 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel if (ctrl->cfg.amvr && non_zero_mvd) { cabac->cur_ctx = &(cabac->ctx.imv_flag[0]); - CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag"); + if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag"); if (imv_mode > UVG_IMV_OFF) { cabac->cur_ctx = &(cabac->ctx.imv_flag[4]); - CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag"); + if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag"); if (imv_mode < UVG_IMV_HPEL) { cabac->cur_ctx = &(cabac->ctx.imv_flag[1]); CABAC_BIN(cabac, (imv_mode > UVG_IMV_FPEL), "imv_flag"); // 1 indicates 4PEL, 0 FPEL @@ -1860,7 +1800,7 @@ double uvg_mock_encode_coding_unit( CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_pred_mode_model[ctx_predmode]), (cur_cu->type == CU_INTRA), bits, "PredMode"); } - if (cur_cu->type == CU_INTER) { + if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { const uint8_t imv_mode = UVG_IMV_OFF; const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, x, y, cu_width, cu_width, depth, lcu, &bits); if (ctrl->cfg.amvr && non_zero_mvd) { @@ -1897,35 +1837,38 @@ void uvg_encode_mvd(encoder_state_t * const state, const int8_t ver_abs_gr0 = mvd_ver != 0; const uint32_t mvd_hor_abs = abs(mvd_hor); const uint32_t mvd_ver_abs = abs(mvd_ver); + double temp_bits_out = 0.0; cabac->cur_ctx = &cabac->ctx.cu_mvd_model[0]; - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), *bits_out, "abs_mvd_greater0_flag_hor"); - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), *bits_out, "abs_mvd_greater0_flag_ver"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), temp_bits_out, "abs_mvd_greater0_flag_hor"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), temp_bits_out, "abs_mvd_greater0_flag_ver"); cabac->cur_ctx = &cabac->ctx.cu_mvd_model[1]; if (hor_abs_gr0) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), *bits_out,"abs_mvd_greater1_flag_hor"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), temp_bits_out,"abs_mvd_greater1_flag_hor"); } if (ver_abs_gr0) { - CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), *bits_out, "abs_mvd_greater1_flag_ver"); + CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), temp_bits_out, "abs_mvd_greater1_flag_ver"); } if (hor_abs_gr0) { if (mvd_hor_abs > 1) { uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1); - if(cabac->only_count) *bits_out += bits; + if(cabac->only_count) temp_bits_out += bits; } uint32_t mvd_hor_sign = (mvd_hor > 0) ? 0 : 1; CABAC_BIN_EP(cabac, mvd_hor_sign, "mvd_sign_flag_hor"); - if (cabac->only_count) *bits_out += 1; + if (cabac->only_count) temp_bits_out += 1; } if (ver_abs_gr0) { if (mvd_ver_abs > 1) { uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1); - if (cabac->only_count) *bits_out += bits; + if (cabac->only_count) temp_bits_out += bits; } uint32_t mvd_ver_sign = mvd_ver > 0 ? 0 : 1; CABAC_BIN_EP(cabac, mvd_ver_sign, "mvd_sign_flag_ver"); - if (cabac->only_count) *bits_out += 1; + if (cabac->only_count) temp_bits_out += 1; } + + if(bits_out) *bits_out = temp_bits_out; } diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 3ef5c64e..832969fc 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -154,9 +154,8 @@ static void encoder_state_write_bitstream_PTL(bitstream_t *stream, // end Profile Tier //uint8_t level = state->encoder_control->cfg.level; - // ToDo: level hardcoded to 5.2 - WRITE_U(stream, 86, 8, "general_level_idc"); - + // ToDo: level hardcoded to 6.3 + WRITE_U(stream, 105, 8, "general_level_idc"); WRITE_U(stream, 0, 1, "ptl_frame_only_constraint_flag"); WRITE_U(stream, 0, 1, "ptl_multilayer_enabled_flag"); @@ -694,7 +693,11 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, WRITE_UE(stream, 0, "sps_internal_bit_depth_minus_input_bit_depth"); } - WRITE_U(stream, 0, 1, "sps_ibc_enabled_flag"); + WRITE_U(stream, encoder->cfg.ibc > 0 ? 1 : 0, 1, "sps_ibc_enabled_flag"); + + if (encoder->cfg.ibc) { + WRITE_UE(stream,6 - IBC_MRG_MAX_NUM_CANDS, "sps_six_minus_max_num_ibc_merge_cand"); + } #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET // if(!no_ladf_constraint_flag) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index bb1300af..e951e27c 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -122,6 +122,31 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu); + // Allocate the HMVP for IBC in any case + state->tile->frame->hmvp_lut_ibc = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS); + state->tile->frame->hmvp_size_ibc = calloc(1, sizeof(uint8_t) * height_in_lcu); + + if (state->encoder_control->cfg.ibc) { + // Allocate pixel buffer for each LCU row + state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu); + + if (state->encoder_control->cfg.ibc & 2) { + state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE); + state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) * + ((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride); + } + + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { + state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2); + state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4 + state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE]; + state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2]; + } + } + state->tile->frame->rec = NULL; state->tile->frame->source = NULL; @@ -197,6 +222,24 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { FREE_POINTER(state->tile->frame->hmvp_lut); FREE_POINTER(state->tile->frame->hmvp_size); + FREE_POINTER(state->tile->frame->hmvp_lut_ibc); + FREE_POINTER(state->tile->frame->hmvp_size_ibc); + + if (state->encoder_control->cfg.ibc) { + if (state->encoder_control->cfg.ibc & 2) { + FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash); + } + + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { + FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); + uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]); + } + FREE_POINTER(state->tile->frame->ibc_hashmap_row); + FREE_POINTER(state->tile->frame->ibc_buffer_y); + FREE_POINTER(state->tile->frame->ibc_buffer_u); + FREE_POINTER(state->tile->frame->ibc_buffer_v); + } + uvg_videoframe_free(state->tile->frame); state->tile->frame = NULL; FREE_POINTER(state->tile->wf_jobs); diff --git a/src/encoderstate.c b/src/encoderstate.c index 9bed1b86..cdadccf4 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -45,17 +45,20 @@ #include "encode_coding_tree.h" #include "encoder_state-bitstream.h" #include "filter.h" +#include "hashmap.h" #include "image.h" #include "rate_control.h" #include "sao.h" #include "search.h" #include "tables.h" +#include "threads.h" #include "threadqueue.h" #include "alf.h" #include "reshape.h" #include "strategies/strategies-picture.h" + /** * \brief Strength of QP adjustments when using adaptive QP for 360 video. * @@ -250,6 +253,58 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state, frame->rec->stride / 2, 1); } } + + // Fill IBC buffer + if (state->encoder_control->cfg.ibc) { + + uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH >= IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x; + uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1; + uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH; + + // If the buffer is full shift all the lines LCU_WIDTH left + if (lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH) { + for (uint32_t i = 0; i < LCU_WIDTH; i++) { + memmove( + &frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH], + &frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH + LCU_WIDTH], + sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH - LCU_WIDTH)); + } + if (state->encoder_control->chroma_format != UVG_CSP_400) { + for (uint32_t i = 0; i < LCU_WIDTH_C; i++) { + memmove( + &frame->ibc_buffer_u[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C], + &frame->ibc_buffer_u[ibc_buffer_row] + [i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C], + sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C)); + memmove( + &frame->ibc_buffer_v[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C], + &frame->ibc_buffer_v[ibc_buffer_row] + [i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C], + sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C)); + } + } + } + + const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); + const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); + + uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x], + &frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x], + ibc_block_width, ibc_block_height, + frame->rec->stride, IBC_BUFFER_WIDTH); + + if (state->encoder_control->chroma_format != UVG_CSP_400) { + uvg_pixels_blit(&frame->rec->u[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)], + &frame->ibc_buffer_u[ibc_buffer_row][ibc_buffer_pos_x_c], + ibc_block_width>>1, ibc_block_height>>1, + frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C); + uvg_pixels_blit(&frame->rec->v[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)], + &frame->ibc_buffer_v[ibc_buffer_row][ibc_buffer_pos_x_c], + ibc_block_width>>1, ibc_block_height>>1, + frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C); + + } + } } @@ -692,9 +747,53 @@ static void encoder_state_worker_encode_lcu_search(void * opaque) cu_info_t original_lut[MAX_NUM_HMVP_CANDS]; uint8_t original_lut_size = state->tile->frame->hmvp_size[ctu_row]; + cu_info_t original_lut_ibc[MAX_NUM_HMVP_CANDS]; + uint8_t original_lut_size_ibc = state->tile->frame->hmvp_size_ibc[ctu_row]; // Store original HMVP lut before search and restore after, since it's modified if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + + + if (state->encoder_control->cfg.ibc & 2) { + videoframe_t * const frame = state->tile->frame; + const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); + const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); + int items = 0; + // Hash the current LCU to the IBC hashmap + for (int32_t xx = 0; xx < (int32_t)(ibc_block_width)-7; xx+=UVG_HASHMAP_BLOCKSIZE>>1) { + for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy+=UVG_HASHMAP_BLOCKSIZE>>1) { + int cur_x = lcu->position_px.x + xx; + int cur_y = lcu->position_px.y + yy; + + // Skip blocks that seem to be the same value for the whole block + uint64_t first_line = + *(uint64_t *)&frame->source->y[cur_y * frame->source->stride + cur_x]; + bool same_data = true; + for (int y_temp = 1; y_temp < 8; y_temp++) { + if (*(uint64_t *)&frame->source->y[(cur_y+y_temp) * frame->source->stride + cur_x] != first_line) { + same_data = false; + break; + } + } + + if (!same_data || (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0)) { + uint32_t crc = uvg_crc32c_8x8(&frame->source->y[cur_y * frame->source->stride + cur_x],frame->source->stride); + if (state->encoder_control->chroma_format != UVG_CSP_400) { + crc += uvg_crc32c_4x4(&frame->source->u[(cur_y>>1) * (frame->source->stride>>1) + (cur_x>>1)],frame->source->stride>>1); + crc += uvg_crc32c_4x4(&frame->source->v[(cur_y>>1) * (frame->source->stride>>1) + (cur_x>>1)],frame->source->stride>>1); + } + if (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0) { + state->tile->frame->ibc_hashmap_pos_to_hash[(cur_y / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + cur_x / UVG_HASHMAP_BLOCKSIZE] = crc; + } + uvg_hashmap_insert(frame->ibc_hashmap_row[ctu_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff)); + items++; + } + } + } + } + //fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y); + //This part doesn't write to bitstream, it's only search, deblock and sao uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff); @@ -703,6 +802,10 @@ static void encoder_state_worker_encode_lcu_search(void * opaque) memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], original_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size[ctu_row] = original_lut_size; } + if (state->encoder_control->cfg.ibc) { + memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], original_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + state->tile->frame->hmvp_size_ibc[ctu_row] = original_lut_size_ibc; + } encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search); @@ -899,8 +1002,13 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW; // Clear hmvp lut size before each leaf - if (!wavefront) memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu); - else state->tile->frame->hmvp_size[state->wfrow->lcu_offset_y] = 0; + if (!wavefront) { + memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu); + if(cfg->ibc) memset(state->tile->frame->hmvp_size_ibc, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu); + } else { + state->tile->frame->hmvp_size[state->wfrow->lcu_offset_y] = 0; + state->tile->frame->hmvp_size_ibc[state->wfrow->lcu_offset_y] = 0; + } bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control); if (!use_parallel_encoding) { @@ -1644,6 +1752,7 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict if (!state->encoder_control->tiles_enable) { memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu); + memset(state->tile->frame->hmvp_size_ibc, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu); } // ROI / delta QP maps diff --git a/src/encoderstate.h b/src/encoderstate.h index 40e1dc24..55d265e3 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -192,9 +192,6 @@ typedef struct encoder_state_config_frame_t { double *c_para; double *k_para; - - cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row - uint8_t* hmvp_size; //!< \brief HMVP LUT size bool jccr_sign; } encoder_state_config_frame_t; diff --git a/src/filter.c b/src/filter.c index edc9f1e1..2d51a17c 100644 --- a/src/filter.c +++ b/src/filter.c @@ -789,10 +789,10 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, cu_p->inter.mv[1][0] = 0; cu_p->inter.mv[1][1] = 0; } - const int refP0 = (cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1; - const int refP1 = (cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1; - const int refQ0 = (cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1; - const int refQ1 = (cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1; + const int refP0 = (cu_p->type == CU_IBC)?-2:(cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1; + const int refP1 = (cu_p->type == CU_IBC)?-2:(cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1; + const int refQ0 = (cu_q->type == CU_IBC)?-2:(cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1; + const int refQ1 = (cu_q->type == CU_IBC)?-2:(cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1; const mv_t* mvQ0 = cu_q->inter.mv[0]; const mv_t* mvQ1 = cu_q->inter.mv[1]; @@ -830,12 +830,14 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, } } else /*if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3)*/ { //is P-slice - if (cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { + const int refP = (cu_p->type == CU_IBC)?-2:state->frame->ref_LX[0][cu_p->inter.mv_ref[0]]; + const int refQ = (cu_q->type == CU_IBC)?-2:state->frame->ref_LX[0][cu_q->inter.mv_ref[0]]; + if (refP != refQ) { // Reference pictures are different strength = 1; } else if ( - ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) || - (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) { + ((abs(cu_q->inter.mv[0][0] - cu_p->inter.mv[0][0]) >= mvdThreashold) || + (abs(cu_q->inter.mv[0][1] - cu_p->inter.mv[0][1]) >= mvdThreashold))) { // Absolute motion vector diff between blocks >= 0.5 (Integer pixel) strength = 1; } diff --git a/src/global.h b/src/global.h index 448ea1f1..65ca2fa9 100644 --- a/src/global.h +++ b/src/global.h @@ -176,7 +176,6 @@ typedef int32_t mv_t; //! pow(2, MIN_SIZE) #define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE) -//! Round frame size up to this interval (8 pixels) #define CONF_WINDOW_PAD_IN_PIXELS ((1 << MIN_SIZE)<<1) //! spec: CtbSizeY @@ -254,6 +253,15 @@ typedef int32_t mv_t; #define AMVP_MAX_NUM_CANDS 2 #define AMVP_MAX_NUM_CANDS_MEM 3 #define MRG_MAX_NUM_CANDS 6 +/** + * \brief Max number of merge candidates in Intra Block Copy + * + */ +#define IBC_MRG_MAX_NUM_CANDS 6 +#define IBC_BUFFER_SIZE (128*128) +#define IBC_BUFFER_WIDTH (IBC_BUFFER_SIZE / LCU_WIDTH) +#define IBC_BUFFER_WIDTH_C ((IBC_BUFFER_SIZE / LCU_WIDTH) >> 1) + #define MAX_NUM_HMVP_CANDS 5 diff --git a/src/hashmap.c b/src/hashmap.c new file mode 100644 index 00000000..73d8f891 --- /dev/null +++ b/src/hashmap.c @@ -0,0 +1,150 @@ +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + +#include "hashmap.h" + +/** + * \brief This function creates a node for the uvg_hashmap. + * + * \param key the key of the node to be created + * \param value the value of the node to be created + * \return uvg_hashmap_node a node with the given key and value + */ +uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value) { + uvg_hashmap_node_t* new_node = (uvg_hashmap_node_t*)malloc(sizeof(uvg_hashmap_node_t)); + new_node->key = key; + new_node->value = value; + new_node->next = NULL; + new_node->size = 1; + return new_node; +} + +/** + * \brief This function creates a new uvg_hashmap with a given bucket size. + * + * \param bucket_size the size of the hashmap bucket + * \return uvg_hashmap a new uvg_hashmap with the given bucket size + */ +uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size) +{ + uvg_hashmap_t* new_hashmap = (uvg_hashmap_t*)malloc(sizeof(uvg_hashmap_t)); + new_hashmap->bucket_size = bucket_size; + new_hashmap->table = (uvg_hashmap_node_t**)malloc(sizeof(uvg_hashmap_node_t*) * bucket_size); + for (int i = 0; i < bucket_size; i++) { + new_hashmap->table[i] = NULL; + } + return new_hashmap; +} + +/** + * \brief This function calculates the hash index for a given + * key and bucket size using the Jenkins hash function. + * + * \param key the key to be hashed + * \param bucket_size the size of the hashmap bucket + * \return the hashed index for the given key and bucket size. + */ +static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size) +{ + //key ^= (key >> 20) ^ (key >> 12); + //return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size; + return key % bucket_size; +} + +/** + * \brief This function inserts a new node into the hashmap. + * + * \param map the hashmap to insert the new node into + * \param key the key of the new node + * \param value the value of the new node + */ +void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) { + uint32_t hash_index = uvg_hashmap_hash(key, map->bucket_size); + uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, value); + new_node->next = (void*)map->table[hash_index]; + if (new_node->next != NULL) new_node->size = ((uvg_hashmap_node_t*)new_node->next)->size + 1; + map->table[hash_index] = new_node; +} + +/** + * \brief This function searches the hashmap for the given key. + * + * \param map the hashmap to search in + * \param key the key to search for + * \return uvg_hashmap_node the node with the given key, NULL if not found. + */ +uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key) { + uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size); + return map->table[hashIndex]; +} + +uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key) +{ + uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size); + uvg_hashmap_node_t* temp = map->table[hashIndex]; + // Search key in chain and return the first match + while (temp) { + if (temp->key == key) { + return temp->value; + } + temp = (uvg_hashmap_node_t*)temp->next; + } + return -1; +} + +/** + * \brief This function frees the memory of a given hashmap node. + * + * \param node the node to free the memory of. + */ +void uvg_hashmap_node_free(uvg_hashmap_node_t* node) +{ + while (node) { + uvg_hashmap_node_t* to_delete = node; + node = (uvg_hashmap_node_t*)node->next; + free(to_delete); + } +} + +/** + * \brief This function frees the memory of a given hashmap. + * + * \param map the hashmap to free the memory of. + */ +void uvg_hashmap_free(uvg_hashmap_t* map) { + for (int i = 0; i < map->bucket_size; i++) { + uvg_hashmap_node_t* temp = map->table[i]; + uvg_hashmap_node_free(temp); + } + free(map->table); + free(map); +} diff --git a/src/hashmap.h b/src/hashmap.h new file mode 100644 index 00000000..1294bb87 --- /dev/null +++ b/src/hashmap.h @@ -0,0 +1,70 @@ +#pragma once + +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + +#include +#include +#include + +// The ratio of the hashmap bucket size to the maximum number of elements +#define UVG_HASHMAP_RATIO 12.0 +// Use Hashmap for 4x4 blocks +#define UVG_HASHMAP_BLOCKSIZE 8 + +typedef struct uvg_hashmap_node { + void* next; + uint32_t key; + uint32_t value; + uint32_t size; +} uvg_hashmap_node_t; + +typedef struct uvg_hashmap { + uint32_t bucket_size; + uvg_hashmap_node_t** table; +} uvg_hashmap_t; + +uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value); + +uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size); + +//uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size); + +void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value); + +uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key); + +uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key); + +void uvg_hashmap_node_free(uvg_hashmap_node_t* node); + +void uvg_hashmap_free(uvg_hashmap_t* map); diff --git a/src/inter.c b/src/inter.c index f89ddf50..3bbef427 100644 --- a/src/inter.c +++ b/src/inter.c @@ -593,6 +593,67 @@ void uvg_inter_recon_cu(const encoder_state_t * const state, } } +static void ibc_recon_cu(const encoder_state_t * const state, + lcu_t *lcu, + int32_t x, + int32_t y, + int32_t width, + bool predict_luma, + bool predict_chroma, + int i_pu) +{ + const int x_scu = SUB_SCU(x); + const int y_scu = SUB_SCU(y); + uint32_t offset = x_scu + y_scu * LCU_WIDTH; + uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; + cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu); + + int32_t mv_x = cu->inter.mv[0][0] >> INTERNAL_MV_PREC; + int32_t mv_y = cu->inter.mv[0][1] >> INTERNAL_MV_PREC; + uint32_t ibc_row = y / LCU_WIDTH; + + int32_t buffer_x = ((x - x_scu) + LCU_WIDTH <= IBC_BUFFER_WIDTH ? + x : + x - (((x - x_scu)) - IBC_BUFFER_WIDTH)) + mv_x; + int32_t buffer_y = y_scu + mv_y; + + // The whole block must be to the left of the current position + assert((-mv_x >= width || -mv_y >= width) && x >= 0 && y >= 0); + + // Predicted block completely outside of this LCU + if (mv_x + x_scu + width <= 0) { + if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width, width, IBC_BUFFER_WIDTH, LCU_WIDTH); + if (predict_chroma) { + uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + } + } else if (mv_x + x_scu + width >= width) { // Completely in current LCU + if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x], lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH); + if (predict_chroma) { + uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x) / 2], lcu->rec.u + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x) / 2], lcu->rec.v + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); + } + } else { // Partly on the buffer and party on the current LCU rec + + uint32_t width_buffer = -(mv_x + x_scu); + uint32_t width_lcu = width - width_buffer; + if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width_buffer, width, IBC_BUFFER_WIDTH, LCU_WIDTH); + if (predict_chroma) { + uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + } + + offset += width_buffer; + offset_c += width_buffer/2 + (width_buffer&1); + + if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x + width_buffer], lcu->rec.y + offset, width_lcu, width, LCU_WIDTH, LCU_WIDTH); + if (predict_chroma && (width_lcu / 2)) { + uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.u + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.v + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); + } + } +} + /** * Predict a single PU. * @@ -626,49 +687,56 @@ void uvg_inter_pred_pu(const encoder_state_t * const state, const int pu_h = PU_GET_H(cu->part_size, width, i_pu); cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y)); - if (pu->inter.mv_dir == 3) { - const uvg_picture *const refs[2] = { - state->frame->ref->images[ - state->frame->ref_LX[0][ - pu->inter.mv_ref[0]]], - state->frame->ref->images[ - state->frame->ref_LX[1][ - pu->inter.mv_ref[1]]], - }; - uvg_inter_recon_bipred(state, - refs[0], refs[1], - pu_x, pu_y, - pu_w, pu_h, - pu->inter.mv, - lcu, - predict_luma, predict_chroma); + if (cu->type == CU_IBC) { + ibc_recon_cu(state, lcu, x, y, width, predict_luma, predict_chroma, i_pu); + } else { + + if (pu->inter.mv_dir == 3) { + const uvg_picture * const refs[2] = { + state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]], + state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]], + }; + uvg_inter_recon_bipred( + state, + refs[0], + refs[1], + pu_x, + pu_y, + pu_w, + pu_h, + pu->inter.mv, + lcu, + predict_luma, + predict_chroma); + } else { + const int mv_idx = pu->inter.mv_dir - 1; + const uvg_picture * const ref = + state->frame->ref->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]]; + + const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x); + const unsigned offset_chroma = + SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2; + yuv_t lcu_adapter; + lcu_adapter.size = pu_w * pu_h; + lcu_adapter.y = lcu->rec.y + offset_luma, + lcu_adapter.u = lcu->rec.u + offset_chroma, + lcu_adapter.v = lcu->rec.v + offset_chroma, + + inter_recon_unipred( + state, + ref, + pu_x, + pu_y, + pu_w, + pu_h, + LCU_WIDTH, + pu->inter.mv[mv_idx], + &lcu_adapter, + NULL, + predict_luma, + predict_chroma); + } } - else { - const int mv_idx = pu->inter.mv_dir - 1; - const uvg_picture *const ref = - state->frame->ref->images[ - state->frame->ref_LX[mv_idx][ - pu->inter.mv_ref[mv_idx]]]; - - const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x); - const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2; - yuv_t lcu_adapter; - lcu_adapter.size = pu_w * pu_h; - lcu_adapter.y = lcu->rec.y + offset_luma, - lcu_adapter.u = lcu->rec.u + offset_chroma, - lcu_adapter.v = lcu->rec.v + offset_chroma, - - inter_recon_unipred(state, - ref, - pu_x, pu_y, - pu_w, pu_h, - LCU_WIDTH, - pu->inter.mv[mv_idx], - &lcu_adapter, - NULL, - predict_luma, predict_chroma); - } - if (predict_chroma && state->encoder_control->cfg.jccr) { const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); @@ -917,6 +985,259 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state, } } +static INLINE mv_t get_scaled_mv(mv_t mv, int scale) +{ + int32_t scaled = scale * mv; + return CLIP(-131072, 131071, (scaled + 127 + (scaled < 0)) >> 8); +} + +#define MV_EXPONENT_BITCOUNT 4 +#define MV_MANTISSA_BITCOUNT 6 +#define MV_MANTISSA_UPPER_LIMIT ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1) +#define MV_MANTISSA_LIMIT (1 << (MV_MANTISSA_BITCOUNT - 1)) +#define MV_EXPONENT_MASK ((1 << MV_EXPONENT_BITCOUNT) - 1) + +static int convert_mv_fixed_to_float(int32_t val) +{ + uint32_t sign = val >> 31; + int scale = uvg_math_floor_log2((val ^ sign) | MV_MANTISSA_UPPER_LIMIT) - (MV_MANTISSA_BITCOUNT - 1); + + int exponent; + uint32_t mantissa; + if (scale >= 0) + { + int round = (1 << scale) >> 1; + int n = (val + round) >> scale; + exponent = scale + ((n ^ sign) >> (MV_MANTISSA_BITCOUNT - 1)); + mantissa = (n & MV_MANTISSA_UPPER_LIMIT) | (sign << (MV_MANTISSA_BITCOUNT - 1)); + } + else + { + exponent = 0; + mantissa = val; + } + + return exponent | (mantissa << MV_EXPONENT_BITCOUNT); +} + +static int convert_mv_float_to_fixed(int val) +{ + int exponent = val & MV_EXPONENT_MASK; + uint32_t mantissa = val >> MV_EXPONENT_BITCOUNT; + return exponent == 0 ? mantissa : (mantissa ^ MV_MANTISSA_LIMIT) << (exponent - 1); +} + +static int round_mv_comp(int x) +{ + return convert_mv_float_to_fixed(convert_mv_fixed_to_float(x)); +} + +static void apply_mv_scaling_pocs(int32_t current_poc, + int32_t current_ref_poc, + int32_t neighbor_poc, + int32_t neighbor_ref_poc, + mv_t mv_cand[2]) +{ + int32_t diff_current = current_poc - current_ref_poc; + int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc; + + if (diff_current == diff_neighbor) return; + + diff_current = CLIP(-128, 127, diff_current); + diff_neighbor = CLIP(-128, 127, diff_neighbor); + + int scale = CLIP(-4096, 4095, + (diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6); + + mv_cand[0] = get_scaled_mv(mv_cand[0], scale); + mv_cand[1] = get_scaled_mv(mv_cand[1], scale); +} + +static INLINE void apply_mv_scaling(const encoder_state_t *state, + const cu_info_t *current_cu, + const cu_info_t *neighbor_cu, + int8_t current_reflist, + int8_t neighbor_reflist, + mv_t mv_cand[2]) +{ + apply_mv_scaling_pocs(state->frame->poc, + state->frame->ref->pocs[ + state->frame->ref_LX[current_reflist][ + current_cu->inter.mv_ref[current_reflist]]], + state->frame->poc, + state->frame->ref->pocs[ + state->frame->ref_LX[neighbor_reflist][ + neighbor_cu->inter.mv_ref[neighbor_reflist]]], + mv_cand); +} + +static INLINE bool add_mvp_candidate(const encoder_state_t *state, + const cu_info_t *cur_cu, + const cu_info_t *cand, + int8_t reflist, + bool scaling, + mv_t mv_cand_out[2]) +{ + if (!cand) return false; + + assert(cand->inter.mv_dir != 0); + + for (int i = 0; i < 2; i++) { + const int cand_list = i == 0 ? reflist : !reflist; + + if ((cand->inter.mv_dir & (1 << cand_list)) == 0) continue; + + if (scaling) { + mv_cand_out[0] = cand->inter.mv[cand_list][0]; + mv_cand_out[1] = cand->inter.mv[cand_list][1]; + apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out); + return true; + } + + if (state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] == + state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]]) + { + mv_cand_out[0] = cand->inter.mv[cand_list][0]; + mv_cand_out[1] = cand->inter.mv[cand_list][1]; + return true; + } + } + + return false; +} + + +static bool is_duplicate_candidate_ibc(const cu_info_t* cu1, const cu_info_t* cu2) +{ + if (!cu2) return false; + + if (cu1->inter.mv[0][0] != cu2->inter.mv[0][0] || + cu1->inter.mv[0][1] != cu2->inter.mv[0][1]) { + return false; + } + + + return true; +} + +/** + * \brief Get merge candidates for current block. + * + * The output parameters b0, b1, b2, a0, a1 are pointed to the + * corresponding cu_info_t struct in lcu->cu, or set to NULL, if the + * candidate is not available. + * + * \param x block x position in pixels + * \param y block y position in pixels + * \param width block width in pixels + * \param height block height in pixels + * \param picture_width tile width in pixels + * \param picture_height tile height in pixels + * \param lcu current LCU + * \param cand_out will be filled with A and B candidates + */ +static void get_ibc_merge_candidates(const encoder_state_t * const state, + const cu_info_t * const cur_cu, + lcu_t *lcu, + const cu_array_t *cua, + int32_t x, + int32_t y, + int32_t width, + int32_t height, + mv_t mv_cand[IBC_MRG_MAX_NUM_CANDS][2] + ) +{ + /* + Predictor block locations + ____ _______ + |B2|______|B1|B0| + | | + | Cur CU | + __| | + |A1|_________| + |A0| + */ + int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU + int32_t y_local = SUB_SCU(y); + + cu_info_t *a1 = NULL; + cu_info_t *b1 = NULL; + + uint8_t candidates = 0; + + // A1 availability testing + if (x != 0) { + a1 = lcu != NULL?LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1): uvg_cu_array_at_const(cua, x - 1, y + height - 1); + // Do not check a1->coded because the block above is always coded before + // the current one and the flag is not set when searching an SMP block. + if (a1->type == CU_IBC) { + inter_clear_cu_unused(a1); + mv_cand[candidates][0] = a1->inter.mv[0][0]; + mv_cand[candidates][1] = a1->inter.mv[0][1]; + candidates++; + } else { + a1 = NULL; + } + } + + // B1 availability testing + if (y != 0) { + b1 = lcu != NULL?LCU_GET_CU_AT_PX(lcu, x_local + width - 1, y_local - 1): uvg_cu_array_at_const(cua, x + width - 1, y - 1); + // Do not check b1->coded because the block to the left is always coded + // before the current one and the flag is not set when searching an SMP + // block. + if (b1->type == CU_IBC) { + if(!is_duplicate_candidate_ibc(b1, a1)) { + inter_clear_cu_unused(b1); + mv_cand[candidates][0] = b1->inter.mv[0][0]; + mv_cand[candidates][1] = b1->inter.mv[0][1]; + candidates++; + } + } else { + b1 = NULL; + } + } + + if (candidates > 0) + uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]); + if (candidates > 1) + uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]); + + if (candidates < IBC_MRG_MAX_NUM_CANDS) + { + const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH); + const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS; + int32_t num_cand = state->tile->frame->hmvp_size_ibc[ctu_row]; + for (int i = 0; i < MIN(MAX_NUM_HMVP_CANDS,num_cand); i++) { + cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + i]; + bool duplicate = false; + + // Check that the HMVP candidate is not duplicate + if (is_duplicate_candidate_ibc(cand, a1)) { + duplicate = true; + } else if(is_duplicate_candidate_ibc(cand, b1)) { + duplicate = true; + } + + // allow duplicates after the first hmvp lut item + if (!duplicate || i > 0) { + mv_cand[candidates][0] = cand->inter.mv[0][0]; + mv_cand[candidates][1] = cand->inter.mv[0][1]; + candidates++; + if (candidates == IBC_MRG_MAX_NUM_CANDS) return; + } + } + } + + // Fill with (0,0) + while (candidates < IBC_MRG_MAX_NUM_CANDS) { + mv_cand[candidates][0] = 0; + mv_cand[candidates][1] = 0; + candidates++; + } +} + + /** * \brief Get merge candidates for current block. * @@ -1093,92 +1414,6 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua, } } -static INLINE mv_t get_scaled_mv(mv_t mv, int scale) -{ - int32_t scaled = scale * mv; - return CLIP(-131072, 131071, (scaled + 127 + (scaled < 0)) >> 8); -} - -#define MV_EXPONENT_BITCOUNT 4 -#define MV_MANTISSA_BITCOUNT 6 -#define MV_MANTISSA_UPPER_LIMIT ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1) -#define MV_MANTISSA_LIMIT (1 << (MV_MANTISSA_BITCOUNT - 1)) -#define MV_EXPONENT_MASK ((1 << MV_EXPONENT_BITCOUNT) - 1) - -static int convert_mv_fixed_to_float(int32_t val) -{ - uint32_t sign = val >> 31; - int scale = uvg_math_floor_log2((val ^ sign) | MV_MANTISSA_UPPER_LIMIT) - (MV_MANTISSA_BITCOUNT - 1); - - int exponent; - uint32_t mantissa; - if (scale >= 0) - { - int round = (1 << scale) >> 1; - int n = (val + round) >> scale; - exponent = scale + ((n ^ sign) >> (MV_MANTISSA_BITCOUNT - 1)); - mantissa = (n & MV_MANTISSA_UPPER_LIMIT) | (sign << (MV_MANTISSA_BITCOUNT - 1)); - } - else - { - exponent = 0; - mantissa = val; - } - - return exponent | (mantissa << MV_EXPONENT_BITCOUNT); -} - -static int convert_mv_float_to_fixed(int val) -{ - int exponent = val & MV_EXPONENT_MASK; - uint32_t mantissa = val >> MV_EXPONENT_BITCOUNT; - return exponent == 0 ? mantissa : (mantissa ^ MV_MANTISSA_LIMIT) << (exponent - 1); -} - -static int round_mv_comp(int x) -{ - return convert_mv_float_to_fixed(convert_mv_fixed_to_float(x)); -} - -static void apply_mv_scaling_pocs(int32_t current_poc, - int32_t current_ref_poc, - int32_t neighbor_poc, - int32_t neighbor_ref_poc, - mv_t mv_cand[2]) -{ - int32_t diff_current = current_poc - current_ref_poc; - int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc; - - if (diff_current == diff_neighbor) return; - - diff_current = CLIP(-128, 127, diff_current); - diff_neighbor = CLIP(-128, 127, diff_neighbor); - - int scale = CLIP(-4096, 4095, - (diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6); - - mv_cand[0] = get_scaled_mv(mv_cand[0], scale); - mv_cand[1] = get_scaled_mv(mv_cand[1], scale); -} - -static INLINE void apply_mv_scaling(const encoder_state_t *state, - const cu_info_t *current_cu, - const cu_info_t *neighbor_cu, - int8_t current_reflist, - int8_t neighbor_reflist, - mv_t mv_cand[2]) -{ - apply_mv_scaling_pocs(state->frame->poc, - state->frame->ref->pocs[ - state->frame->ref_LX[current_reflist][ - current_cu->inter.mv_ref[current_reflist]]], - state->frame->poc, - state->frame->ref->pocs[ - state->frame->ref_LX[neighbor_reflist][ - neighbor_cu->inter.mv_ref[neighbor_reflist]]], - mv_cand); -} - /** * \brief Try to add a temporal MVP or merge candidate. * @@ -1246,41 +1481,6 @@ static bool add_temporal_candidate(const encoder_state_t *state, return true; } -static INLINE bool add_mvp_candidate(const encoder_state_t *state, - const cu_info_t *cur_cu, - const cu_info_t *cand, - int8_t reflist, - bool scaling, - mv_t mv_cand_out[2]) -{ - if (!cand) return false; - - assert(cand->inter.mv_dir != 0); - - for (int i = 0; i < 2; i++) { - const int cand_list = i == 0 ? reflist : !reflist; - - if ((cand->inter.mv_dir & (1 << cand_list)) == 0) continue; - - if (scaling) { - mv_cand_out[0] = cand->inter.mv[cand_list][0]; - mv_cand_out[1] = cand->inter.mv[cand_list][1]; - apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out); - return true; - } - - if (state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] == - state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]]) - { - mv_cand_out[0] = cand->inter.mv[cand_list][0]; - mv_cand_out[1] = cand->inter.mv[cand_list][1]; - return true; - } - } - - return false; -} - /** * \brief Pick two mv candidates from the spatial and temporal candidates. */ @@ -1407,14 +1607,20 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state, { merge_candidates_t merge_cand = { 0 }; const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level; - get_spatial_merge_candidates(x, y, width, height, - state->tile->frame->width, - state->tile->frame->height, - lcu, - &merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp); - get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); - get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand); - + if (cur_cu->type == CU_IBC) { + mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2]; + get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand); + memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2); + memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2); + } else { + get_spatial_merge_candidates(x, y, width, height, + state->tile->frame->width, + state->tile->frame->height, + lcu, + &merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp); + get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); + get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand); + } uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]); uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]); } @@ -1443,17 +1649,30 @@ void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state, merge_candidates_t merge_cand = { 0 }; const cu_array_t *cua = state->tile->frame->cu_array; - get_spatial_merge_candidates_cua(cua, - x, y, width, height, - state->tile->frame->width, state->tile->frame->height, - &merge_cand, state->encoder_control->cfg.wpp); - get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); - get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand); - + if (cur_cu->type == CU_IBC) { + mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2]; + get_ibc_merge_candidates(state, cur_cu, NULL,cua,x, y, width, height,ibc_mv_cand); + memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2); + memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2); + } else { + get_spatial_merge_candidates_cua(cua, + x, y, width, height, + state->tile->frame->width, state->tile->frame->height, + &merge_cand, state->encoder_control->cfg.wpp); + get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); + get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand); + } uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]); uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]); } +/** +• \brief Checks if two CUs have similar motion vectors. The function takes two CUs and compares their motion vectors. +• \param cu1 first CU +• \param cu2 second CU +• \return returns 0 if the two CUs have dissimilar motion vectors, and 1 if the motions are similar. +*/ + static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2) { if (!cu2) return false; @@ -1472,6 +1691,16 @@ static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2) return true; } +/** +* Adds a merge candidate to the list of possible candidates, if it is not a duplicate. +* +* \param cand The candidate to be added. +* \param possible_duplicate1 The first possible duplicate candidate to check for duplication. +* \param possible_duplicate2 The second possible duplicate candidate to check for duplication. +* \param merge_cand_out The output parameter to store the merge candidate information. +* +* @return Returns true if the merge candidate was added successfully, false otherwise. +*/ static bool add_merge_candidate(const cu_info_t *cand, const cu_info_t *possible_duplicate1, const cu_info_t *possible_duplicate2, @@ -1503,14 +1732,23 @@ static void hmvp_shift_lut(cu_info_t* lut, int32_t size, int32_t start, int32_t } } -static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu) { +static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu, bool ibc) { int8_t duplicate = -1; - for (int i = 0; i < size; i++) { - if (is_duplicate_candidate(cu, (const cu_info_t*)&lut[i])) { - duplicate = i; - break; + if (ibc) { + for (int i = 0; i < size; i++) { + if (is_duplicate_candidate_ibc(cu, (const cu_info_t *)&lut[i])) { + duplicate = i; + break; + } + } + } else { + for (int i = 0; i < size; i++) { + if (is_duplicate_candidate(cu, (const cu_info_t *)&lut[i])) { + duplicate = i; + break; + } } } // If duplicate found, shift the whole lut up to the duplicate, otherwise to the end @@ -1534,21 +1772,28 @@ static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu) { //if (!cu.geoFlag && !cu.affine) - if(cu->type == CU_INTER) + if(cu->type != CU_INTRA) { const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level; const uint32_t xBr = block_width + pic_x; const uint32_t yBr = block_height + pic_y; bool hmvp_possible = ((xBr >> parallel_merge_level) > (pic_x >> parallel_merge_level)) && ((yBr >> parallel_merge_level) > (pic_y >> parallel_merge_level)); - if (hmvp_possible) { // ToDo: check for IBC + if (hmvp_possible || cu->type == CU_IBC) { const uint32_t ctu_row = (pic_y >> LOG2_LCU_WIDTH); const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS; - bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut[ctu_row_mul_five], state->tile->frame->hmvp_size[ctu_row], cu); - if(add_row && state->tile->frame->hmvp_size[ctu_row] < MAX_NUM_HMVP_CANDS) { - state->tile->frame->hmvp_size[ctu_row]++; + if (cu->type == CU_IBC) { + bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], state->tile->frame->hmvp_size_ibc[ctu_row], cu, true); + if(add_row && state->tile->frame->hmvp_size_ibc[ctu_row] < MAX_NUM_HMVP_CANDS) { + state->tile->frame->hmvp_size_ibc[ctu_row]++; + } + } else { + bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut[ctu_row_mul_five], state->tile->frame->hmvp_size[ctu_row], cu, false); + if(add_row && state->tile->frame->hmvp_size[ctu_row] < MAX_NUM_HMVP_CANDS) { + state->tile->frame->hmvp_size[ctu_row]++; + } } } } @@ -1652,6 +1897,19 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state, const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level; merge_candidates_t merge_cand = { 0 }; const uint8_t max_num_cands = state->encoder_control->cfg.max_merge; + + cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y)); + if(cur_cu->type == CU_IBC) { + mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2]; + get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand); + for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) { + mv_cand[i].dir = 1; + mv_cand[i].mv[0][0] = ibc_mv_cand[i][0]; + mv_cand[i].mv[0][1] = ibc_mv_cand[i][1]; + } + return IBC_MRG_MAX_NUM_CANDS; + } + get_spatial_merge_candidates(x, y, width, height, state->tile->frame->width, state->tile->frame->height, @@ -1721,7 +1979,6 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state, for (int i = 0; i < num_cand; i++) { const cu_info_t* hmvp_cand = &state->tile->frame->hmvp_lut[ctu_row_mul_five + i]; - // ToDo: Add IBC condition if (i > 1 || ((!is_duplicate_candidate(hmvp_cand, a[1])) && (!is_duplicate_candidate(hmvp_cand, b[1]))) ) { mv_cand[candidates].mv[0][0] = state->tile->frame->hmvp_lut[ctu_row_mul_five + i].inter.mv[0][0]; diff --git a/src/rdo.c b/src/rdo.c index 51131c6a..f8ebacdf 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -1773,6 +1773,109 @@ double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state, return bits; } + +/** MVD cost calculation with CABAC +* \returns int +* Calculates Motion Vector cost and related costs using CABAC coding +*/ +double uvg_calc_ibc_mvd_cost_cabac(const encoder_state_t * state, + int x, + int y, + int mv_shift, + mv_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + double* bitcost) +{ + cabac_data_t state_cabac_copy; + cabac_data_t* cabac; + uint32_t merge_idx; + vector2d_t mvd = { 0, 0 }; + int8_t merged = 0; + int8_t cur_mv_cand = 0; + + x *= 1 << mv_shift; + y *= 1 << mv_shift; + + // Check every candidate to find a match + for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) { + if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x && + merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y) + { + merged = 1; + break; + } + } + + // Store cabac state and contexts + memcpy(&state_cabac_copy, &state->search_cabac, sizeof(cabac_data_t)); + + // Clear bytes and bits and set mode to "count" + state_cabac_copy.only_count = 1; + + cabac = &state_cabac_copy; + double bits = 0; + + if (!merged) { + vector2d_t mvd1 = { + x - mv_cand[0][0], + y - mv_cand[0][1], + }; + vector2d_t mvd2 = { + x - mv_cand[1][0], + y - mv_cand[1][1], + }; + + uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1); + uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2); + + double cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y); + double cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y); + + // Select candidate 1 if it has lower cost + if (cand2_cost < cand1_cost) { + cur_mv_cand = 1; + mvd = mvd2; + } else { + mvd = mvd1; + } + } + + cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model); + + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), merged, bits, "MergeFlag"); + num_cand = state->encoder_control->cfg.max_merge; + if (merged) { + if (num_cand > 1) { + int32_t ui; + for (ui = 0; ui < num_cand - 1; ui++) { + int32_t symbol = (ui != merge_idx); + if (ui == 0) { + CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex"); + } else { + CABAC_BIN_EP(cabac, symbol, "MergeIndex"); + bits += 1; + } + if (symbol == 0) break; + } + } + } else { + + // It is safe to drop const here because cabac->only_count is set. + uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y, &bits); + + // Signal which candidate MV to use + cabac->cur_ctx = &(cabac->ctx.mvp_idx_model); + CABAC_BIN(cabac, cur_mv_cand, "mvp_flag"); + } + + *bitcost = bits; + + // Store bitcost before restoring cabac + return *bitcost * state->lambda_sqrt; +} + /** MVD cost calculation with CABAC * \returns int * Calculates Motion Vector cost and related costs using CABAC coding diff --git a/src/rdo.h b/src/rdo.h index b7f93729..7f325cfd 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -88,6 +88,7 @@ uint32_t uvg_get_coded_level(encoder_state_t * state, double* coded_cost, double int32_t q_bits,double temp, int8_t last, int8_t type); uvg_mvd_cost_func uvg_calc_mvd_cost_cabac; +uvg_mvd_cost_func uvg_calc_ibc_mvd_cost_cabac; double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state, const cabac_data_t* cabac, diff --git a/src/search.c b/src/search.c index b76c169a..cb9fc1d1 100644 --- a/src/search.c +++ b/src/search.c @@ -45,6 +45,7 @@ #include "rdo.h" #include "search_inter.h" #include "search_intra.h" +#include "search_ibc.h" #include "threadqueue.h" #include "transform.h" #include "videoframe.h" @@ -179,7 +180,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in } } -static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width) +static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type) { const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size; const int num_pu = uvg_part_mode_num_parts[part_mode]; @@ -191,7 +192,7 @@ static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width) const int height_pu = PU_GET_H(part_mode, cu_width, i); cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu); - pu->type = CU_INTER; + pu->type = type; lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu); } } @@ -306,7 +307,7 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, lcu_t *const lcu) { const int width = LCU_WIDTH >> depth; - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac; // cur_cu is used for TU parameters. @@ -380,7 +381,7 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 }; const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); double tr_tree_bits = 0; double coeff_bits = 0; @@ -477,7 +478,7 @@ static double cu_rd_cost_tr_split_accurate( enum uvg_tree_type tree_type) { const int width = LCU_WIDTH >> depth; - const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); // cur_cu is used for TU parameters. cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); @@ -499,7 +500,7 @@ static double cu_rd_cost_tr_split_accurate( int cbf = cbf_is_set_any(pred_cu->cbf, depth); // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual - if (pred_cu->type == CU_INTER && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) { + if (pred_cu->type != CU_INTRA && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf"); } @@ -803,9 +804,12 @@ static double search_cu( cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS]; uint8_t hmvp_lut_size = state->tile->frame->hmvp_size[ctu_row]; + cu_info_t hmvp_lut_ibc[MAX_NUM_HMVP_CANDS]; + uint8_t hmvp_lut_size_ibc = state->tile->frame->hmvp_size_ibc[ctu_row]; // Store original HMVP lut before search and restore after, since it's modified if (state->frame->slicetype != UVG_SLICE_I) memcpy(hmvp_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + if(state->encoder_control->cfg.ibc) memcpy(hmvp_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); struct { int32_t min; @@ -1006,6 +1010,34 @@ static double search_cu( } } + // Simple IBC search + if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I + && state->encoder_control->cfg.ibc + && cost > 1000 + && cu_width > 4 + && (x >= cu_width || y >= cu_width) + && !cur_cu->skipped) { + + cu_info_t backup_cu = *cur_cu; + + double mode_cost; + double mode_bitcost; + uvg_search_cu_ibc(state, + x, y, + depth, + lcu, + &mode_cost, &mode_bitcost); + if (mode_cost < cost) { + cost = mode_cost; + inter_bitcost = mode_bitcost; + cur_cu->type = CU_IBC; + cur_cu->inter.mv_dir = 1; + cur_cu->joint_cb_cr = 0; + } else { + *cur_cu = backup_cu; + } + } + // Reconstruct best mode because we need the reconstructed pixels for // mode search of adjacent CUs. if (cur_cu->type == CU_INTRA) { @@ -1035,7 +1067,7 @@ static double search_cu( lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); - } else if (cur_cu->type == CU_INTER) { + } else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { if (!cur_cu->skipped) { @@ -1081,12 +1113,12 @@ static double search_cu( inter_bitcost += cur_cu->merge_idx; } } - lcu_fill_inter(lcu, x_local, y_local, cu_width); + lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type); lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); } } - if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) { + if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { double bits = 0; cabac_data_t* cabac = &state->search_cabac; cabac->update = 1; @@ -1289,7 +1321,14 @@ static double search_cu( if (state->frame->slicetype != UVG_SLICE_I) { // Reset HMVP to the beginning of this CU level search and add this CU as the mvp memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); - state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size; + state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size; + } + if (state->encoder_control->cfg.ibc) { + memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], hmvp_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + state->tile->frame->hmvp_size_ibc[ctu_row] = hmvp_lut_size_ibc; + } + // Add candidate when in inter slice or ibc is enabled + if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) { uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu); } } @@ -1311,7 +1350,14 @@ static double search_cu( if (state->frame->slicetype != UVG_SLICE_I) { // Reset HMVP to the beginning of this CU level search and add this CU as the mvp memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); - state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size; + state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size; + } + if (state->encoder_control->cfg.ibc) { + memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], hmvp_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS); + state->tile->frame->hmvp_size_ibc[ctu_row] = hmvp_lut_size_ibc; + } + // Add candidate when in inter slice or ibc is enabled + if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) { uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu); } } diff --git a/src/search_ibc.c b/src/search_ibc.c new file mode 100644 index 00000000..44f9ac50 --- /dev/null +++ b/src/search_ibc.c @@ -0,0 +1,1389 @@ +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2022, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + +#include "search_ibc.h" +#include "search_inter.h" + +#include +#include + +#include "cabac.h" +#include "encoder.h" +#include "encode_coding_tree.h" +#include "image.h" +#include "imagelist.h" +#include "inter.h" +#include "uvg266.h" +#include "rdo.h" +#include "search.h" +#include "strategies/strategies-ipol.h" +#include "strategies/strategies-picture.h" +#include "transform.h" +#include "videoframe.h" + +typedef struct { + encoder_state_t *state; + + /** + * \brief Current frame + */ + const uvg_picture *pic; + + /** + * \brief Top-left corner of the PU + */ + vector2d_t origin; + int32_t width; + int32_t height; + + mv_t mv_cand[2][2]; + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS]; + int32_t num_merge_cand; + + uvg_mvd_cost_func *mvd_cost_func; + + /** + * \brief Possible optimized SAD implementation for the width, leave as + * NULL for arbitrary-width blocks + */ + optimized_sad_func_ptr_t optimized_sad; + + lcu_t *lcu; + +} ibc_search_info_t; + + + + + +/** + * \return True if referred block is within current tile. + */ +static INLINE bool intmv_within_ibc_range(const ibc_search_info_t *info, int x, int y) +{ + bool negative_values = x <= 0 && y <= 0; + bool mv_range_valid = ((-y >= info->height) || (-x >= info->width)) && // Must be block height/width away from the block + SUB_SCU(info->origin.y) >= -y && // Y vector must be inside the current CTU + (-x <= IBC_BUFFER_WIDTH-LCU_WIDTH) && // X must be inside the buffer + info->origin.x + x >= 0; // Don't go outside of the frame + + + return negative_values && mv_range_valid; +} + +static INLINE bool fracmv_within_ibc_range(const ibc_search_info_t *info, int x, int y) +{ + return intmv_within_ibc_range( + info, + x >> INTERNAL_MV_PREC, + y >> INTERNAL_MV_PREC); +} + + +static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y) +{ + const int x_scu = SUB_SCU(x); + const int y_scu = SUB_SCU(y); + + cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu); + + cu_info_t cu_backup = *cur_cu; + uint32_t cost = MAX_INT; + + + const uint32_t offset = x_scu + y_scu * LCU_WIDTH; + const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; + + cur_cu->type = CU_IBC; + cur_cu->inter.mv_dir = 1; + cur_cu->skipped = false; + cur_cu->merged = false; + cur_cu->inter.mv_cand0 = 0; + cur_cu->joint_cb_cr = 0; + cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);; + cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);; + + uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400); + + *cur_cu = cu_backup; + + cost = uvg_satd_any_size(width, + width, + lcu->rec.y + offset, + LCU_WIDTH, + &state->tile->frame->source->y[y * state->tile->frame->source->stride + x], + state->tile->frame->source->stride) >> (UVG_BIT_DEPTH - 8); + + if(state->encoder_control->chroma_format != UVG_CSP_400) { + cost += uvg_satd_any_size(width / 2, + width / 2, + lcu->rec.u + offset_c, + LCU_WIDTH_C, + &state->tile->frame->source->u[(y / 2) * (state->tile->frame->source->stride / 2) + (x / 2)], + state->tile->frame->source->stride / 2) >> (UVG_BIT_DEPTH - 8); + cost += uvg_satd_any_size(width / 2, + width / 2, + lcu->rec.v + offset_c, + LCU_WIDTH_C, + &state->tile->frame->source->v[(y / 2) * (state->tile->frame->source->stride / 2) + (x / 2)], + state->tile->frame->source->stride / 2) >> (UVG_BIT_DEPTH - 8); + } + + return cost; +} + + +static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_sad_func_ptr_t optimized_sad, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y) +{ + cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y)); + + cu_info_t cu_backup = *cur_cu; + uint32_t cost = MAX_INT; + + const int x_scu = SUB_SCU(x); + const int y_scu = SUB_SCU(y); + const uint32_t offset = x_scu + y_scu * LCU_WIDTH; + const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; + + cur_cu->type = CU_IBC; + cur_cu->inter.mv_dir = 1; + cur_cu->skipped = false; + cur_cu->merged = false; + cur_cu->inter.mv_cand0 = 0; + cur_cu->joint_cb_cr = 0; + cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);; + cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);; + + uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400); + + *cur_cu = cu_backup; + + if (optimized_sad != NULL) { + cost = optimized_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * state->tile->frame->source->stride + x], width, LCU_WIDTH, state->tile->frame->source->stride); + if(state->encoder_control->chroma_format != UVG_CSP_400) { + cost += optimized_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2); + cost += optimized_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2); + } + } else { + cost = uvg_reg_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * state->tile->frame->source->stride + x], width,width, LCU_WIDTH, state->tile->frame->source->stride); + if(state->encoder_control->chroma_format != UVG_CSP_400) { + cost += uvg_reg_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2); + cost += uvg_reg_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2); + } + } + + return cost; +} + +static bool check_mv_cost_satd(ibc_search_info_t *info, + int x, + int y, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) +{ + + } +/** + * \brief Calculate cost for an integer motion vector. + * + * Updates best_mv, best_cost and best_bitcost to the new + * motion vector if it yields a lower cost than the current one. + * + * If the motion vector violates the MV constraints for tiles or WPP, the + * cost is not set. + * + * \return true if best_mv was changed, false otherwise + */ +static bool check_mv_cost(ibc_search_info_t *info, + int x, + int y, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) +{ + if (!intmv_within_ibc_range(info, x, y)) return false; + + double bitcost = 0; + double cost = MAX_DOUBLE; + + cost = calculate_ibc_cost_sad(info->state, info->optimized_sad, info->lcu, info->origin.x, info->origin.y, info->width, x, y); + + if (cost >= *best_cost) return false; + + cost += info->mvd_cost_func( + info->state, + x, y, INTERNAL_MV_PREC, + info->mv_cand, + NULL, + 0, + NULL, + &bitcost + ); + + if (cost >= *best_cost) return false; + + // Set to motion vector in internal pixel precision. + best_mv->x = x * (1 << INTERNAL_MV_PREC); + best_mv->y = y * (1 << INTERNAL_MV_PREC); + *best_cost = cost; + *best_bits = bitcost; + + return true; +} + + +static unsigned get_ep_ex_golomb_bitcost(unsigned symbol) +{ + // Calculate 2 * log2(symbol ) + + unsigned bins = 0; + symbol += 0; + if (symbol >= 1 << 8) { bins += 16; symbol >>= 8; } + if (symbol >= 1 << 4) { bins += 8; symbol >>= 4; } + if (symbol >= 1 << 2) { bins += 4; symbol >>= 2; } + if (symbol >= 1 << 1) { bins += 2; } + + // TODO: It might be a good idea to put a small slope on this function to + // make sure any search function that follows the gradient heads towards + // a smaller MVD, but that would require fractinal costs and bits being + // used everywhere in inter search. + // return num_bins + 0.001 * symbol; + + return bins; +} + + +/** + * \brief Checks if mv is one of the merge candidates. + * \return true if found else return false + */ +static bool mv_in_merge(const ibc_search_info_t *info, vector2d_t mv) +{ + for (int i = 0; i < info->num_merge_cand; ++i) { + if (info->merge_cand[i].dir == 3) continue; + const vector2d_t merge_mv = { + info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0], + info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] + }; + if (merge_mv.x == mv.x * (1 << (INTERNAL_MV_PREC)) && merge_mv.y == mv.y * (1 << (INTERNAL_MV_PREC))) { + return true; + } + } + return false; +} + + +/** + * \brief Select starting point for integer motion estimation search. + * + * Checks the zero vector, extra_mv and merge candidates and updates + * best_mv to the best one. + */ +static void select_starting_point(ibc_search_info_t *info, + vector2d_t extra_mv, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) +{ + // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list. + check_mv_cost(info, -info->width, 0, best_cost, best_bits, best_mv); + + // Change to integer precision. + extra_mv.x >>= INTERNAL_MV_PREC; + extra_mv.y >>= INTERNAL_MV_PREC; + + int origin_x = info->origin.x; + int origin_y = info->origin.y; + + int ibc_origin_x = origin_x / UVG_HASHMAP_BLOCKSIZE; + int ibc_origin_y = origin_y / UVG_HASHMAP_BLOCKSIZE; + + // Check mv_in if it's not one of the merge candidates. + if ((extra_mv.x != 0 || extra_mv.y != 0) && !mv_in_merge(info, extra_mv)) { + check_mv_cost(info, extra_mv.x, extra_mv.y, best_cost, best_bits, best_mv); + } + + if (info->state->encoder_control->cfg.ibc & 2) { + int own_location = ((origin_x & 0xffff) << 16) | (origin_y & 0xffff); + + uint32_t ibc_buffer_row = origin_y / LCU_WIDTH; + + uint32_t crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(origin_y / UVG_HASHMAP_BLOCKSIZE) * + info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + + origin_x / UVG_HASHMAP_BLOCKSIZE]; + + uvg_hashmap_node_t *result = uvg_hashmap_search( + info->state->tile->frame->ibc_hashmap_row[ibc_buffer_row], crc); + + while (result != NULL) { + if (result->key == crc && result->value != own_location) { + int pos_x = result->value >> 16; + int pos_y = result->value & 0xffff; + int mv_x = pos_x - origin_x; + int mv_y = pos_y - origin_y; + + int ibc_pos_x = pos_x / UVG_HASHMAP_BLOCKSIZE; + int ibc_pos_y = pos_y / UVG_HASHMAP_BLOCKSIZE; + + bool full_block = true; + for (int ibc_x = 0; ibc_x < info->width / UVG_HASHMAP_BLOCKSIZE; ibc_x++) { + for (int ibc_y = 0; ibc_y < info->height / UVG_HASHMAP_BLOCKSIZE; ibc_y++) { + uint32_t neighbor_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(ibc_pos_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_pos_x + ibc_x]; + uint32_t other_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(ibc_origin_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_origin_x + ibc_x]; + if (other_crc != neighbor_crc) { + full_block = false; + break; + } + } + if (!full_block) break; + } + if (full_block) check_mv_cost(info, mv_x, mv_y, best_cost, best_bits, best_mv); + } + result = result->next; + } + } + + // Go through candidates + for (int32_t i = 0; i < info->num_merge_cand; ++i) { + int32_t x = (info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] + (1 << (INTERNAL_MV_PREC - 1)) ) >> INTERNAL_MV_PREC; + int32_t y = (info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] + (1 << (INTERNAL_MV_PREC - 1)) ) >> INTERNAL_MV_PREC; + + check_mv_cost(info, x, y, best_cost, best_bits, best_mv); + } +} + +static double get_ibc_mvd_coding_cost(const encoder_state_t* state, + const cabac_data_t* cabac, + const int32_t mvd_hor, + const int32_t mvd_ver) +{ + double bitcost = 4 << CTX_FRAC_BITS; + const vector2d_t abs_mvd = { abs(mvd_hor), abs(mvd_ver) }; + bitcost += abs_mvd.x == 1 ? 1 << CTX_FRAC_BITS : (0 * (1 << CTX_FRAC_BITS)); + bitcost += abs_mvd.y == 1 ? 1 << CTX_FRAC_BITS : (0 * (1 << CTX_FRAC_BITS)); + + bitcost += get_ep_ex_golomb_bitcost(abs_mvd.x) << CTX_FRAC_BITS; + bitcost += get_ep_ex_golomb_bitcost(abs_mvd.y) << CTX_FRAC_BITS; + + // Round and shift back to integer bits. + return bitcost / (1 << CTX_FRAC_BITS); +} + + +static int select_ibc_mv_cand(const encoder_state_t *state, + mv_t mv_cand[2][2], + int32_t mv_x, + int32_t mv_y, + double*cost_out) +{ + const bool same_cand = + (mv_cand[0][0] == mv_cand[1][0] && mv_cand[0][1] == mv_cand[1][1]); + + if (same_cand && !cost_out) { + // Pick the first one if both candidates are the same. + return 0; + } + + double (*mvd_coding_cost)(const encoder_state_t * const state, + const cabac_data_t*, + int32_t, int32_t); + if (state->encoder_control->cfg.mv_rdo) { + mvd_coding_cost = uvg_get_mvd_coding_cost_cabac; + } else { + mvd_coding_cost = get_ibc_mvd_coding_cost; + } + + vector2d_t mvd = { mv_x - mv_cand[0][0], mv_y - mv_cand[0][1] }; + + uvg_change_precision_vector2d(INTERNAL_MV_PREC, UVG_IMV_FPEL, &mvd); + + double cand1_cost = mvd_coding_cost( + state, &state->cabac, + mvd.x, + mvd.y); + + double cand2_cost; + if (same_cand) { + cand2_cost = cand1_cost; + } else { + vector2d_t mvd2 = { mv_x - mv_cand[1][0], mv_y - mv_cand[1][1] }; + uvg_change_precision_vector2d(INTERNAL_MV_PREC, UVG_IMV_FPEL, &mvd2); + cand2_cost = mvd_coding_cost( + state, &state->cabac, + mvd2.x, + mvd2.y); + } + + if (cost_out) { + *cost_out = MIN(cand1_cost, cand2_cost); + } + + // Pick the second candidate if it has lower cost. + return cand2_cost < cand1_cost ? 1 : 0; +} + +static double calc_ibc_mvd_cost(const encoder_state_t *state, + int x, + int y, + int mv_shift, + mv_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + double* bitcost) +{ + double temp_bitcost = 0; + uint32_t merge_idx; + int8_t merged = 0; + + x *= 1 << mv_shift; + y *= 1 << mv_shift; + + // Check every candidate to find a match + for(merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) { + if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x && + merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y) { + temp_bitcost += merge_idx; + merged = 1; + break; + } + } + + // Check mvd cost only if mv is not merged + if (!merged) { + double mvd_cost = 0; + select_ibc_mv_cand(state, mv_cand, x, y, &mvd_cost); + temp_bitcost += mvd_cost; + } + *bitcost = temp_bitcost; + return temp_bitcost * state->lambda; +} + + +static bool early_terminate(ibc_search_info_t *info, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) +{ + static const vector2d_t small_hexbs[7] = { + { 0, -1 }, { -1, 0 }, { 0, 1 }, { 1, 0 }, + { 0, -1 }, { -1, 0 }, { 0, 0 }, + }; + + vector2d_t mv = { best_mv->x >> INTERNAL_MV_PREC, best_mv->y >> INTERNAL_MV_PREC }; + + int first_index = 0; + int last_index = 3; + + for (int k = 0; k < 2; ++k) { + double threshold; + if (info->state->encoder_control->cfg.me_early_termination == + UVG_ME_EARLY_TERMINATION_SENSITIVE) + { + threshold = *best_cost * 0.95; + } else { + threshold = *best_cost; + } + + int best_index = 6; + for (int i = first_index; i <= last_index; i++) { + int x = mv.x + small_hexbs[i].x; + int y = mv.y + small_hexbs[i].y; + + if (check_mv_cost(info, x, y, best_cost, best_bits, best_mv)) { + best_index = i; + } + } + + // Adjust the movement vector + mv.x += small_hexbs[best_index].x; + mv.y += small_hexbs[best_index].y; + + // If best match is not better than threshold, we stop the search. + if (*best_cost >= threshold) { + return true; + } + + first_index = (best_index + 3) % 4; + last_index = first_index + 2; + } + return false; +} + + + +/** + * \brief Do motion search using the HEXBS algorithm. + * + * \param info search info + * \param extra_mv extra motion vector to check + * \param steps how many steps are done at maximum before exiting, does not affect the final step + * + * Motion vector is searched by first searching iteratively with the large + * hexagon pattern until the best match is at the center of the hexagon. + * As a final step a smaller hexagon is used to check the adjacent pixels. + * + * If a non 0,0 predicted motion vector predictor is given as extra_mv, + * the 0,0 vector is also tried. This is hoped to help in the case where + * the predicted motion vector is way off. In the future even more additional + * points like 0,0 might be used, such as vectors from top or left. + */ +static void hexagon_search(ibc_search_info_t *info, + vector2d_t extra_mv, + uint32_t steps, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) +{ + // The start of the hexagonal pattern has been repeated at the end so that + // the indices between 1-6 can be used as the start of a 3-point list of new + // points to search. + // 6--1,7 + // / \ =) + // 5 0 2,8 + // \ / + // 4---3 + static const vector2d_t large_hexbs[9] = { + { 0, 0 }, + { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }, { -1, -2 }, + { 1, -2 }, { 2, 0 } + }; + // This is used as the last step of the hexagon search. + // 1 + // 2 0 3 + // 4 + static const vector2d_t small_hexbs[9] = { + { 0, 0 }, + { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 }, + { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } + }; + + vector2d_t mv = { best_mv->x >> INTERNAL_MV_PREC, best_mv->y >> INTERNAL_MV_PREC }; + + // Current best index, either to merge_cands, large_hexbs or small_hexbs. + int best_index = 0; + + // Search the initial 7 points of the hexagon. + for (int i = 1; i < 7; ++i) { + if (check_mv_cost(info, mv.x + large_hexbs[i].x, mv.y + large_hexbs[i].y, best_cost, best_bits, best_mv)) { + best_index = i; + } + } + + // Iteratively search the 3 new points around the best match, until the best + // match is in the center. + while (best_index != 0 && steps != 0) { + // decrement count if enabled + if (steps > 0) steps -= 1; + + // Starting point of the 3 offsets to be searched. + unsigned start; + if (best_index == 1) { + start = 6; + } else if (best_index == 8) { + start = 1; + } else { + start = best_index - 1; + } + + // Move the center to the best match. + mv.x += large_hexbs[best_index].x; + mv.y += large_hexbs[best_index].y; + best_index = 0; + + // Iterate through the next 3 points. + for (int i = 0; i < 3; ++i) { + vector2d_t offset = large_hexbs[start + i]; + if (check_mv_cost(info, mv.x + offset.x, mv.y + offset.y, best_cost, best_bits, best_mv)) { + best_index = start + i; + } + } + } + + // Move the center to the best match. + //mv.x += large_hexbs[best_index].x; + //mv.y += large_hexbs[best_index].y; + + // Do the final step of the search with a small pattern. + for (int i = 1; i < 9; ++i) { + check_mv_cost(info, mv.x + small_hexbs[i].x, mv.y + small_hexbs[i].y, best_cost, best_bits, best_mv); + } +} + +/** +* \brief Do motion search using the diamond algorithm. +* +* \param info search info +* \param extra_mv extra motion vector to check +* \param steps how many steps are done at maximum before exiting +* +* Motion vector is searched by searching iteratively with a diamond-shaped +* pattern. We take care of not checking the direction we came from, but +* further checking for avoiding visits to already visited points is not done. +* +* If a non 0,0 predicted motion vector predictor is given as extra_mv, +* the 0,0 vector is also tried. This is hoped to help in the case where +* the predicted motion vector is way off. In the future even more additional +* points like 0,0 might be used, such as vectors from top or left. +**/ +static void diamond_search(ibc_search_info_t *info, + vector2d_t extra_mv, + uint32_t steps, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) +{ + enum diapos { + DIA_UP = 0, + DIA_RIGHT = 1, + DIA_LEFT = 2, + DIA_DOWN = 3, + DIA_CENTER = 4, + }; + + // a diamond shape with the center included + // 0 + // 2 4 1 + // 3 + static const vector2d_t diamond[5] = { + {0, -1}, {1, 0}, {0, 1}, {-1, 0}, + {0, 0} + }; + + // current motion vector + vector2d_t mv = { best_mv->x >> INTERNAL_MV_PREC, best_mv->y >> INTERNAL_MV_PREC }; + + // current best index + enum diapos best_index = DIA_CENTER; + + // initial search of the points of the diamond + for (int i = 0; i < 5; ++i) { + if (check_mv_cost(info, mv.x + diamond[i].x, mv.y + diamond[i].y, best_cost, best_bits, best_mv)) { + best_index = i; + } + } + + if (best_index == DIA_CENTER) { + // the center point was the best in initial check + return; + } + + // Move the center to the best match. + mv.x += diamond[best_index].x; + mv.y += diamond[best_index].y; + + // the arrival direction, the index of the diamond member that will be excluded + enum diapos from_dir = DIA_CENTER; + + // whether we found a better candidate this iteration + uint8_t better_found; + + do { + better_found = 0; + // decrement count if enabled + if (steps > 0) steps -= 1; + + // search the points of the diamond + for (int i = 0; i < 4; ++i) { + // this is where we came from so it's checked already + if (i == from_dir) continue; + + if (check_mv_cost(info, mv.x + diamond[i].x, mv.y + diamond[i].y, best_cost, best_bits, best_mv)) { + best_index = i; + better_found = 1; + } + } + + if (better_found) { + // Move the center to the best match. + mv.x += diamond[best_index].x; + mv.y += diamond[best_index].y; + + // record where we came from to the next iteration + // the xor operation flips the orientation + from_dir = best_index ^ 0x3; + } + } while (better_found && steps != 0); + // and we're done +} + + +/** + * \brief Check if an identical merge candidate exists in a list + * + * \param all_cand Full list of available merge candidates + * \param cand_to_add Merge candidate to be checked for duplicates + * \param added_idx_list List of indices of unique merge candidates + * \param list_size Size of the list + * + * \return Does an identical candidate exist in list + */ +static bool merge_candidate_in_list(inter_merge_cand_t *all_cands, + inter_merge_cand_t *cand_to_add, + unit_stats_map_t *merge) +{ + bool found = false; + for (int i = 0; i < merge->size && !found; ++i) { + int key = merge->keys[i]; + inter_merge_cand_t * list_cand = &all_cands[merge->unit[key].merge_idx]; + + found = + cand_to_add->mv[0][0] == list_cand->mv[0][0] && + cand_to_add->mv[0][1] == list_cand->mv[0][1]; + } + + return found; +} + +/** + * \brief Collect PU parameters and costs at this depth. + * + * \param state encoder state + * \param x_cu x-coordinate of the containing CU + * \param y_cu y-coordinate of the containing CU + * \param depth depth of the CU in the quadtree + * \param part_mode partition mode of the CU + * \param i_pu index of the PU in the CU + * \param lcu containing LCU + * + * \param amvp Return searched AMVP PUs sorted by costs + * \param merge Return searched Merge PUs sorted by costs + */ +static void search_pu_ibc(encoder_state_t * const state, + int x_cu, int y_cu, + int depth, + part_mode_t part_mode, + int i_pu, + unit_stats_map_t *amvp, + unit_stats_map_t *merge, + ibc_search_info_t *info) +{ + const uvg_config *cfg = &state->encoder_control->cfg; + const videoframe_t * const frame = state->tile->frame; + const int width_cu = LCU_WIDTH >> depth; + const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu); + const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu); + const int width = PU_GET_W(part_mode, width_cu, i_pu); + const int height = PU_GET_H(part_mode, width_cu, i_pu); + + // Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and + // nRx2N partitions. + const bool merge_a1 = i_pu == 0 || width >= height; + // Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and + // 2NxnD partitions. + const bool merge_b1 = i_pu == 0 || width <= height; + + + lcu_t *lcu = info->lcu; + const int x_local = SUB_SCU(x); + const int y_local = SUB_SCU(y); + cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); + cur_pu->type = CU_IBC; + cur_pu->part_size = part_mode; + cur_pu->depth = depth; + cur_pu->tr_depth = depth; + cur_pu->qp = state->qp; + cur_pu->inter.mv_dir = 1; + + // Default to candidate 0 + CU_SET_MV_CAND(cur_pu, 0, 0); + + FILL(*info, 0); + + info->state = state; + info->pic = frame->source; + info->origin.x = x; + info->origin.y = y; + info->width = width; + info->height = height; + info->mvd_cost_func = cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost; + info->optimized_sad = uvg_get_optimized_sad(width); + info->lcu = lcu; + + // Search for merge mode candidates + info->num_merge_cand = uvg_inter_get_merge_cand( + state, + x, y, + width, height, + merge_a1, merge_b1, + info->merge_cand, + lcu); + + // Merge Analysis starts here + merge->size = 0; + for (int i = 0; i < MRG_MAX_NUM_CANDS; ++i) { + merge->keys[i] = -1; + merge->cost[i] = MAX_DOUBLE; + } + + const double merge_flag_cost = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_merge_flag_ext_model, 1); +#ifdef COMPLETE_PRED_MODE_BITS + // Technically counting these bits would be correct, however counting + // them universally degrades quality so this block is disabled by default + const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(x, y, lcu, NULL)], 0); +#else + const double no_skip_flag = 0; +#endif + // Check motion vector constraints and perform rough search + for (int merge_idx = 0; merge_idx < info->num_merge_cand; ++merge_idx) { + + inter_merge_cand_t *cur_cand = &info->merge_cand[merge_idx]; + cur_pu->inter.mv_dir = cur_cand->dir; + cur_pu->inter.mv[0][0] = cur_cand->mv[0][0]; + cur_pu->inter.mv[0][1] = cur_cand->mv[0][1]; + + + bool is_duplicate = merge_candidate_in_list(info->merge_cand, cur_cand, merge); + + // Don't try merge candidates that don't satisfy mv constraints. + // Don't add duplicates to list + if ((!fracmv_within_ibc_range(info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1])) || + is_duplicate) + { + continue; + } + uvg_inter_pred_pu(state, info->lcu, x_cu, y_cu, width_cu, true, false, i_pu); + merge->unit[merge->size] = *cur_pu; + merge->unit[merge->size].type = CU_IBC; + merge->unit[merge->size].merge_idx = merge_idx; + merge->unit[merge->size].merged = true; + merge->unit[merge->size].skipped = false; + + double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0); + if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { + uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits); + } + else { + merge->cost[merge->size] = uvg_satd_any_size(width, height, + lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH, + lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH); + bits += no_skip_flag; + merge->cost[merge->size] += bits * info->state->lambda_sqrt; + } + // Add cost of coding the merge index + merge->bits[merge->size] = bits; + merge->keys[merge->size] = merge->size; + + + merge->size++; + } + + assert(merge->size <= MAX_UNIT_STATS_MAP_SIZE); + uvg_sort_keys_by_cost(merge); + + // Try early skip decision on just one merge candidate if available + int num_rdo_cands = MIN(1, merge->size); + + // Early Skip Mode Decision + bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400; + if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) { + for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) { + if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) { + merge->size = 1; + merge->bits[0] = merge->bits[merge->keys[merge_key]]; + merge->cost[0] = merge->cost[merge->keys[merge_key]]; + merge->unit[0] = merge->unit[merge->keys[merge_key]]; + merge->keys[0] = 0; + } + else if(cfg->rdo < 2) { + // Reconstruct blocks with merge candidate. + // Check luma CBF. Then, check chroma CBFs if luma CBF is not set + // and chroma exists. + // Early terminate if merge candidate with zero CBF is found. + int merge_idx = merge->unit[merge->keys[merge_key]].merge_idx; + cur_pu->inter.mv_dir = info->merge_cand[merge_idx].dir; + cur_pu->inter.mv[0][0] = info->merge_cand[merge_idx].mv[0][0]; + cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1]; + uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T); + uvg_inter_recon_cu(state, lcu, x, y, width, true, false); + uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T); + + if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) { + continue; + } + else if (has_chroma) { + uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma); + uvg_quantize_lcu_residual(state, false, has_chroma, + false, /*we are only checking for lack of coeffs so no need to check jccr*/ + x, y, depth, cur_pu, lcu, true, UVG_BOTH_T); + if (!cbf_is_set_any(cur_pu->cbf, depth)) { + cur_pu->type = CU_IBC; + cur_pu->merge_idx = merge_idx; + cur_pu->skipped = true; + + merge->size = 1; + merge->cost[0] = (merge_idx )* state->lambda_sqrt; // TODO: Check this + merge->bits[0] = merge_idx; // TODO: Check this + merge->unit[0] = *cur_pu; + return; + } + } + } + } + } + + // AMVP search starts here + amvp[0].size = 0; + amvp[1].size = 0; + amvp[2].size = 0; + amvp[0].cost[0] = MAX_DOUBLE; + + + // Do the motion search + + uvg_inter_get_mv_cand(info->state, + info->origin.x, + info->origin.y, + info->width, + info->height, + info->mv_cand, + cur_pu, + lcu, + NULL); + + vector2d_t best_mv = { 0, 0 }; + + double best_cost = MAX_DOUBLE; + double best_bits = MAX_INT; + + // Select starting point from among merge candidates. These should + // include both mv_cand vectors and (0, 0). + select_starting_point(info, best_mv, &best_cost, &best_bits, &best_mv); + bool skip_me = early_terminate(info, &best_cost, &best_bits, &best_mv); + + if (!(info->state->encoder_control->cfg.me_early_termination && skip_me)) { + + switch (cfg->ime_algorithm) { + case UVG_IME_DIA: + diamond_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps, + &best_cost, &best_bits, &best_mv); + break; + default: + hexagon_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps, + &best_cost, &best_bits, &best_mv); + break; + } + } + + if (best_cost < MAX_DOUBLE) { + // Recalculate inter cost with SATD. + best_cost = calculate_ibc_cost_satd( + info->state, + lcu, + info->origin.x, + info->origin.y, + info->width, + (best_mv.x >> INTERNAL_MV_PREC), + (best_mv.y >> INTERNAL_MV_PREC)); + best_cost += best_bits * info->state->lambda; + } + + + int cu_mv_cand = select_ibc_mv_cand(info->state, info->mv_cand, best_mv.x, best_mv.y, NULL); + + // Update best unipreds for biprediction + bool valid_mv = fracmv_within_ibc_range(info, best_mv.x, best_mv.y); + if (valid_mv && best_cost < MAX_DOUBLE) { + + // Map reference index to L0/L1 pictures + unit_stats_map_t *cur_map = &amvp[0]; + int entry = cur_map->size; + cu_info_t *unipred_pu = &cur_map->unit[entry]; + *unipred_pu = *cur_pu; + unipred_pu->type = CU_IBC; + unipred_pu->merged = false; + unipred_pu->skipped = false; + unipred_pu->inter.mv_dir = 1; + unipred_pu->inter.mv[0][0] = (mv_t)best_mv.x; + unipred_pu->inter.mv[0][1] = (mv_t)best_mv.y; + CU_SET_MV_CAND(unipred_pu, 0, cu_mv_cand); + + cur_map->cost[entry] = best_cost; + cur_map->bits[entry] = best_bits; + cur_map->keys[entry] = entry; + cur_map->size++; + } + + + assert(amvp[0].size <= MAX_UNIT_STATS_MAP_SIZE); + uvg_sort_keys_by_cost(&amvp[0]); + + int best_keys[2] = { + amvp[0].size > 0 ? amvp[0].keys[0] : 0, + amvp[1].size > 0 ? amvp[1].keys[0] : 0 + }; + + cu_info_t *best_unipred[2] = { + &amvp[0].unit[best_keys[0]], + &amvp[1].unit[best_keys[1]] + }; + + + if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { + if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]); + } + + + if(cfg->rdo < 2) { + int predmode_ctx; + + const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1) * 3; + const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx); + const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0); + + const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0); + const double total_bits = ibc_flag + no_skip_flag + pred_mode_bits; + if(amvp[0].size > 0) { + const uint8_t best_key = amvp[0].keys[0]; + amvp[0].bits[best_key] += total_bits; + amvp[0].cost[best_key] += (total_bits)* state->lambda; + } + } +} + +#include "threads.h" + +static int uvg_search_hash_cu_ibc(encoder_state_t* const state, + int x, int y, int depth, + lcu_t* lcu, + double* inter_cost, + double* inter_bitcost) +{ + const int x_cu = x; + const int y_cu = y; + const int part_mode = SIZE_2Nx2N; + const uvg_config *cfg = &state->encoder_control->cfg; + const videoframe_t * const frame = state->tile->frame; + const int width_cu = LCU_WIDTH >> depth; + const int width = PU_GET_W(part_mode, width_cu, 0); + const int height = PU_GET_H(part_mode, width_cu, 0); + + const bool merge_a1 = true; + const bool merge_b1 = true; + + ibc_search_info_t info; + + const int x_local = SUB_SCU(x); + const int y_local = SUB_SCU(y); + cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); + + cur_pu->type = CU_IBC; + cur_pu->part_size = part_mode; + cur_pu->depth = depth; + cur_pu->tr_depth = depth; + cur_pu->qp = state->qp; + + // Default to candidate 0 + CU_SET_MV_CAND(cur_pu, 0, 0); + + FILL(info, 0); + + info.state = state; + info.pic = frame->source; + info.origin.x = x; + info.origin.y = y; + info.width = width; + info.height = height; + info.mvd_cost_func = + cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost; + info.optimized_sad = uvg_get_optimized_sad(width); + info.lcu = lcu; + + // Search for merge mode candidates + info.num_merge_cand = uvg_inter_get_merge_cand( + state, + x, + y, + width, + height, + merge_a1, + merge_b1, + info.merge_cand, + lcu); + + double ibc_cost = MAX_DOUBLE; + double ibc_bitcost = MAX_DOUBLE; + + bool valid_mv = false; + + static double time_spent = 0.0; + static double search_time = 0.0; + static double crc_time = 0.0; + static int evaluations = 0; + static int hits = 0; + + + UVG_CLOCK_T hashmap_start_temp; + UVG_CLOCK_T hashmap_end_temp; + + + UVG_CLOCK_T hashmap_start_real_time; + UVG_CLOCK_T hashmap_end_real_time; + UVG_GET_TIME(&hashmap_start_real_time); + + int xx = x; + int yy = y; + + int best_mv_x = INT_MAX>>2; + int best_mv_y = INT_MAX>>2; + + int own_location = ((xx & 0xffff) << 16) | (yy & 0xffff); + + uint32_t ibc_buffer_row = yy / LCU_WIDTH; + + uint32_t crc = state->tile->frame->ibc_hashmap_pos_to_hash[(yy / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + xx / UVG_HASHMAP_BLOCKSIZE]; + + uvg_hashmap_node_t *result = uvg_hashmap_search(state->tile->frame->ibc_hashmap_row[ibc_buffer_row],crc); + + + bool found_block = false; + + int hashes_found = 0; + + while (result != NULL) { + if (hashes_found == 0 && result->size > 1000) { + //fprintf(stderr, "Found a block with %d elements\n", result->size); + //break; + } + if (result->key == crc && result->value != own_location) { + hashes_found++; + hits++; + int pos_x = result->value >> 16; + int pos_y = result->value & 0xffff; + int mv_x = pos_x - xx; + int mv_y = pos_y - yy; + if (pos_x <= xx - width && pos_y <= yy - height) { + valid_mv = intmv_within_ibc_range(&info, mv_x, mv_y); + if (valid_mv) { + bool full_block = true; // Is the full block covered by the IBC? + for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width; offset_x+=UVG_HASHMAP_BLOCKSIZE) { + for (int offset_y = 0; offset_y < height; offset_y += UVG_HASHMAP_BLOCKSIZE) { + uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[ + ((yy+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (xx+offset_x) / UVG_HASHMAP_BLOCKSIZE]; + + uint32_t crc_neighbor = state->tile->frame->ibc_hashmap_pos_to_hash[((pos_y+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (pos_x+offset_x) / UVG_HASHMAP_BLOCKSIZE]; + + bool found_match = false; + if (crc_neighbor != crc_other_blocks) { + full_block = false; + break; + } + } + if (!full_block) { + break; + } + } + + + if (full_block) { + double cost = ibc_cost, bits = ibc_bitcost; + vector2d_t mv = { best_mv_x, best_mv_y}; + cost = calc_ibc_mvd_cost(state, mv_x, mv_y,INTERNAL_MV_PREC,info.mv_cand, info.merge_cand, info.num_merge_cand, NULL, &bits); + //double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt; + //cost += + bool better_mv = cost < ibc_cost; + if (better_mv) { + best_mv_x = mv_x; + best_mv_y = mv_y; + ibc_cost = cost; + ibc_bitcost = bits; + fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y); + found_block = true; + //break; + } + } + } + } + } + result = result->next; + } + + + UVG_GET_TIME(&hashmap_end_real_time); + time_spent += UVG_CLOCK_T_AS_DOUBLE(hashmap_end_real_time) - + UVG_CLOCK_T_AS_DOUBLE(hashmap_start_real_time); + //if (x > state->tile->frame->width-64 && y > state->tile->frame->height-64) + //fprintf(stderr, "Hashmap time: %f (crc: %f, search: %f) Evaluations: %d Hits: %d, hashed in this block: %d\n", time_spent,crc_time, search_time, evaluations, hits,hashes_found); + + if (!found_block) return; + + *inter_cost = ibc_cost; + *inter_bitcost = ibc_bitcost; + + uint32_t merge_idx; + int8_t merged = 0; + uint32_t temp_bitcost = 0; + + + cur_pu->inter.mv[0][0] = best_mv_x << INTERNAL_MV_PREC; + cur_pu->inter.mv[0][1] = best_mv_y << INTERNAL_MV_PREC; + + // Check every candidate to find a match + for(merge_idx = 0; merge_idx < (uint32_t)info.num_merge_cand; merge_idx++) { + if (info.merge_cand[merge_idx].dir == 1 && info.merge_cand[merge_idx].mv[0][0] == cur_pu->inter.mv[0][0] && + info.merge_cand[merge_idx].mv[0][1] == cur_pu->inter.mv[0][1]) { + temp_bitcost += merge_idx; + merged = 1; + fprintf(stderr, "Merged!\r\n"); + break; + } + } + + cur_pu->merged = merged; + cur_pu->merge_idx = merge_idx; + cur_pu->skipped = merged; + + + const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1); + ibc_cost += ibc_flag * state->lambda; + ibc_bitcost += ibc_flag; + + uvg_inter_recon_cu( + state, + lcu, + x, + y, + CU_WIDTH_FROM_DEPTH(depth), + true, + state->encoder_control->chroma_format != UVG_CSP_400); + + if (*inter_cost < MAX_DOUBLE) { + assert(fracmv_within_ibc_range( + &info, + cur_pu->inter.mv[0][0], + cur_pu->inter.mv[0][1])); + } + +} + + +/** + * \brief Update CU to have best modes at this depth. + * + * Only searches the 2Nx2N partition mode. + * + * \param state encoder state + * \param x x-coordinate of the CU + * \param y y-coordinate of the CU + * \param depth depth of the CU in the quadtree + * \param lcu containing LCU + * + * \param inter_cost Return inter cost + * \param inter_bitcost Return inter bitcost + */ +void uvg_search_cu_ibc(encoder_state_t * const state, + int x, int y, int depth, + lcu_t *lcu, + double *inter_cost, + double* inter_bitcost) +{ + *inter_cost = MAX_DOUBLE; + *inter_bitcost = MAX_INT; + // Quick hashmap search + /* uvg_search_hash_cu_ibc( + state, + x, y, depth, + lcu, + inter_cost, + inter_bitcost); + return;*/ + // Store information of L0, L1, and bipredictions. + // Best cost will be left at MAX_DOUBLE if no valid CU is found. + // These will be initialized by the following function. + unit_stats_map_t amvp[3]; + unit_stats_map_t merge; + ibc_search_info_t info; + + info.lcu = lcu; + + search_pu_ibc(state, + x, y, depth, + SIZE_2Nx2N, 0, + amvp, + &merge, + &info); + + // Early Skip CU decision + if (merge.size == 1 && merge.unit[0].skipped) { + *inter_cost = merge.cost[0]; + *inter_bitcost = merge.bits[0]; + return; + } + + cu_info_t *best_inter_pu = NULL; + + + int best_key = amvp[0].keys[0]; + + if (amvp[0].size > 0 && + amvp[0].cost[best_key] < *inter_cost) { + + best_inter_pu = &amvp[0].unit[best_key]; + *inter_cost = amvp[0].cost[best_key]; + *inter_bitcost = amvp[0].bits[best_key]; + } + + + // Compare best AMVP against best Merge mode + int best_merge_key = merge.keys[0]; + + if (merge.size > 0 && merge.cost[best_merge_key] < *inter_cost) { + + best_inter_pu = &merge.unit[best_merge_key]; + *inter_cost = merge.cost[best_merge_key]; + *inter_bitcost = 0; // TODO: Check this + } + + if (*inter_cost == MAX_DOUBLE) { + // Could not find any motion vector. + *inter_cost = MAX_DOUBLE; + *inter_bitcost = MAX_INT; + return; + } + + const int x_local = SUB_SCU(x); + const int y_local = SUB_SCU(y); + cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); + *cur_pu = *best_inter_pu; + cur_pu->type = CU_IBC; + + uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), + true, state->encoder_control->chroma_format != UVG_CSP_400); + + if (*inter_cost < MAX_DOUBLE) { + assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1])); + } +} diff --git a/src/search_ibc.h b/src/search_ibc.h new file mode 100644 index 00000000..14ce3b6f --- /dev/null +++ b/src/search_ibc.h @@ -0,0 +1,55 @@ +#pragma once + +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + +/** + * \ingroup Compression + * \file + * Inter prediction parameter search. + */ + +#include "cu.h" +#include "encoderstate.h" +#include "global.h" // IWYU pragma: keep +#include "inter.h" +#include "uvg266.h" + + +void uvg_search_cu_ibc(encoder_state_t * const state, + int x, int y, int depth, + lcu_t *lcu, + double *inter_cost, + double* inter_bitcost); + + + diff --git a/src/search_inter.c b/src/search_inter.c index 345a83e9..6508995f 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -312,6 +312,55 @@ static void select_starting_point(inter_search_info_t *info, check_mv_cost(info, extra_mv.x, extra_mv.y, best_cost, best_bits, best_mv); } + if (info->state->encoder_control->cfg.ibc & 2) { + int origin_x = info->origin.x; + int origin_y = info->origin.y; + + int ibc_origin_x = origin_x / UVG_HASHMAP_BLOCKSIZE; + int ibc_origin_y = origin_y / UVG_HASHMAP_BLOCKSIZE; + + int own_location = ((origin_x & 0xffff) << 16) | (origin_y & 0xffff); + + uint32_t ibc_buffer_row = origin_y / LCU_WIDTH; + + uint32_t crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(origin_y / UVG_HASHMAP_BLOCKSIZE) * + info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + + origin_x / UVG_HASHMAP_BLOCKSIZE]; + + uvg_hashmap_node_t *result = uvg_hashmap_search( + info->state->tile->frame->ibc_hashmap_row[ibc_buffer_row], crc); + + while (result != NULL) { + if (result->key == crc && result->value != own_location) { + int pos_x = result->value >> 16; + int pos_y = result->value & 0xffff; + int mv_x = pos_x - origin_x; + int mv_y = pos_y - origin_y; + + int ibc_pos_x = pos_x / UVG_HASHMAP_BLOCKSIZE; + int ibc_pos_y = pos_y / UVG_HASHMAP_BLOCKSIZE; + + bool full_block = true; + for (int ibc_x = 0; ibc_x < info->width / UVG_HASHMAP_BLOCKSIZE; ibc_x++) { + for (int ibc_y = 0; ibc_y < info->height / UVG_HASHMAP_BLOCKSIZE; ibc_y++) { + uint32_t neighbor_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(ibc_pos_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_pos_x + ibc_x]; + uint32_t other_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash + [(ibc_origin_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_origin_x + ibc_x]; + if (other_crc != neighbor_crc) { + full_block = false; + break; + } + } + if (!full_block) break; + } + if (full_block) check_mv_cost(info, mv_x, mv_y, best_cost, best_bits, best_mv); + } + result = result->next; + } + } + // Go through candidates for (int32_t i = 0; i < info->num_merge_cand; ++i) { if (info->merge_cand[i].dir == 3) continue; diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c index 09fce28a..817befed 100644 --- a/src/strategies/generic/picture-generic.c +++ b/src/strategies/generic/picture-generic.c @@ -793,9 +793,83 @@ static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel* } } +INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf) +{ + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF]; + return crc; +} + + +INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf) +{ + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF]; + crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF]; + return crc; +} + +static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]); + return crc ^ 0xFFFFFFFF; +} + +static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride] + 4); + + crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride] + 4); + + crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride] + 4); + + crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]); + crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride] + 4); + return crc ^ 0xFFFFFFFF; +} + +static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]); + + crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]); + + crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]); + + crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]); + crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]); + return crc ^ 0xFFFFFFFF; +} + int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth) { bool success = true; + if (bitdepth == 8) { + success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_8bit_generic); + success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic); + } else { + success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic); + } + success &= uvg_strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic); diff --git a/src/strategies/sse42/picture-sse42.c b/src/strategies/sse42/picture-sse42.c new file mode 100644 index 00000000..30549cb3 --- /dev/null +++ b/src/strategies/sse42/picture-sse42.c @@ -0,0 +1,95 @@ +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + +#include "global.h" + +#if COMPILE_INTEL_SSE42 +#include "uvg266.h" + +#include "strategies/sse42/picture-sse42.h" + +#include +#include + +#include "strategyselector.h" + + + +static uint32_t uvg_crc32c_4x4_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint32_t crc = 0xFFFFFFFF; + crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[0 * pic_stride])); + crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[1 * pic_stride])); + crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[2 * pic_stride])); + crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[3 * pic_stride])); + return crc ^ 0xFFFFFFFF; +} + +static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint64_t crc = 0xFFFFFFFF; + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride])); + return (uint32_t)(crc ^ 0xFFFFFFFF); +} + +static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) +{ + uint64_t crc = 0xFFFFFFFF; + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[4 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[5 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[6 * pic_stride])); + crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[7 * pic_stride])); + return (uint32_t)(crc ^ 0xFFFFFFFF); +} + + +#endif //COMPILE_INTEL_SSE42 + +int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) { + bool success = true; +#if COMPILE_INTEL_SSE42 + if (bitdepth == 8){ + success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_8bit_sse42); + success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42); + } else { + success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42); + } +#endif + return success; +} diff --git a/src/strategies/sse42/picture-sse42.h b/src/strategies/sse42/picture-sse42.h new file mode 100644 index 00000000..e1828b8c --- /dev/null +++ b/src/strategies/sse42/picture-sse42.h @@ -0,0 +1,45 @@ +#pragma once + +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2022, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + +/** + * \ingroup Optimization + * \file + * Optimizations for SSE4.2. + */ + +#include "global.h" // IWYU pragma: keep +#include "uvg266.h" + + +int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth); diff --git a/src/strategies/strategies-picture.c b/src/strategies/strategies-picture.c index 8ff49246..00ad9ccb 100644 --- a/src/strategies/strategies-picture.c +++ b/src/strategies/strategies-picture.c @@ -41,6 +41,8 @@ // Define function pointers. +crc32c_4x4_func * uvg_crc32c_4x4 = 0; +crc32c_8x8_func * uvg_crc32c_8x8 = 0; reg_sad_func * uvg_reg_sad = 0; cost_pixel_nxn_func * uvg_sad_4x4 = 0; @@ -83,6 +85,8 @@ pixel_var_func *uvg_pixel_var = 0; generate_residual_func *uvg_generate_residual = 0; + + int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) { bool success = true; @@ -94,6 +98,9 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) { if (uvg_g_hardware_flags.intel_flags.sse41) { success &= uvg_strategy_register_picture_sse41(opaque, bitdepth); } + if (uvg_g_hardware_flags.intel_flags.sse42) { + success &= uvg_strategy_register_picture_sse42(opaque, bitdepth); + } if (uvg_g_hardware_flags.intel_flags.avx2) { success &= uvg_strategy_register_picture_avx2(opaque, bitdepth); } @@ -206,3 +213,50 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n) return NULL; } } + +// Precomputed CRC32C lookup table for polynomial 0x04C11DB7 +const uint32_t uvg_crc_table[256] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, + 0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c, + 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, + 0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512, + 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, + 0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf, + 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, + 0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f, + 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, + 0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e, + 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, + 0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4, + 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, + 0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5, + 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, + 0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905, + 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, + 0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8, + 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, + 0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6, + 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, + 0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, +}; \ No newline at end of file diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index eee3ffd5..88f52cfc 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -151,7 +151,16 @@ typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len); typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride); + +extern const uint32_t uvg_crc_table[256]; + +typedef uint32_t(crc32c_4x4_func)(const uvg_pixel *buf, uint32_t pic_stride); +typedef uint32_t(crc32c_8x8_func)(const uvg_pixel *buf, uint32_t pic_stride); + // Declare function pointers. +extern crc32c_4x4_func * uvg_crc32c_4x4; +extern crc32c_8x8_func * uvg_crc32c_8x8; + extern reg_sad_func * uvg_reg_sad; extern cost_pixel_nxn_func * uvg_sad_4x4; @@ -198,6 +207,8 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n); cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n); #define STRATEGIES_PICTURE_EXPORTS \ + {"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \ + {"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \ {"reg_sad", (void**) &uvg_reg_sad}, \ {"sad_4x4", (void**) &uvg_sad_4x4}, \ {"sad_8x8", (void**) &uvg_sad_8x8}, \ diff --git a/src/uvg266.h b/src/uvg266.h index e2ad9597..3bec7756 100644 --- a/src/uvg266.h +++ b/src/uvg266.h @@ -543,6 +543,8 @@ typedef struct uvg_config uint8_t dual_tree; uint8_t intra_rough_search_levels; + + uint8_t ibc; /* \brief Intra Block Copy parameter */ } uvg_config; /** diff --git a/src/videoframe.c b/src/videoframe.c index 8b3258ba..f5a4d8af 100644 --- a/src/videoframe.c +++ b/src/videoframe.c @@ -104,6 +104,8 @@ int uvg_videoframe_free(videoframe_t * const frame) free(frame); + + return 1; } diff --git a/src/videoframe.h b/src/videoframe.h index e1a82181..0a7509c6 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -41,6 +41,7 @@ #include "cu.h" #include "global.h" // IWYU pragma: keep #include "uvg266.h" +#include "hashmap.h" /** @@ -77,12 +78,22 @@ typedef struct videoframe struct param_set_map* alf_param_set_map; int32_t poc; //!< \brief Picture order count - cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row + + uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row + uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row + uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row + uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row + uint32_t *ibc_hashmap_pos_to_hash; //!< \brief Hashmap reverse search for position to hash + uint32_t ibc_hashmap_pos_to_hash_stride; //!< \brief Hashmap position to hash stride + cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row + uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size + cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row uint8_t* hmvp_size; //!< \brief HMVP LUT size bool source_lmcs_mapped; //!< \brief Indicate if source_lmcs is available and mapped to LMCS bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS + } videoframe_t; diff --git a/tests/test_tools.sh b/tests/test_tools.sh index 398ef2e2..5f1f3aeb 100755 --- a/tests/test_tools.sh +++ b/tests/test_tools.sh @@ -14,4 +14,5 @@ valgrind_test $common_args --gop=8 --subme=4 --bipred --tmvp valgrind_test $common_args --transform-skip --tr-skip-max-size=5 valgrind_test $common_args --vaq=8 valgrind_test $common_args --vaq=8 --bitrate 350000 -valgrind_test $common_args --vaq=8 --rc-algorithm oba --bitrate 350000 \ No newline at end of file +valgrind_test $common_args --vaq=8 --rc-algorithm oba --bitrate 350000 +valgrind_test $common_args --ibc=1 \ No newline at end of file