Merge branch 'implement_ibc'

This commit is contained in:
Marko Viitanen 2023-08-09 09:34:29 +03:00
commit 1a1fea1a19
31 changed files with 2875 additions and 332 deletions

View file

@ -145,6 +145,7 @@ target_include_directories(uvg266 PUBLIC src/strategies)
file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c")
file(GLOB LIB_SOURCES_STRATEGIES_SSE41 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse41/*.c")
file(GLOB LIB_SOURCES_STRATEGIES_SSE42 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse42/*.c")
set(CLI_SOURCES "src/encmain.c" "src/cli.c" "src/cli.h" "src/yuv_io.c" "src/yuv_io.h")
@ -175,7 +176,8 @@ else()
list(APPEND ALLOW_AVX2 "x86_64" "AMD64")
if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2)
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" )
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE42} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" )
endif()
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
@ -200,7 +202,7 @@ file(GLOB SOURCE_GROUP_CABAC RELATIVE ${PROJECT_SOURCE_DIR} "src/bitstream.*" "s
file(GLOB SOURCE_GROUP_COMPRESSION RELATIVE ${PROJECT_SOURCE_DIR} "src/search*" "src/rdo.*" "src/fast_coeff*")
file(GLOB SOURCE_GROUP_CONSTRAINT RELATIVE ${PROJECT_SOURCE_DIR} "src/constraint.*" "src/ml_*")
file(GLOB SOURCE_GROUP_CONTROL RELATIVE ${PROJECT_SOURCE_DIR} "src/cfg.*" "src/encoder.*" "src/encoder_state-c*" "src/encoder_state-g*" "src/encoderstate*" "src/gop.*" "src/input_frame_buffer.*" "src/uvg266*" "src/rate_control.*" "src/mip_data.h")
file(GLOB SOURCE_GROUP_DATA_STRUCTURES RELATIVE ${PROJECT_SOURCE_DIR} "src/cu.*" "src/image.*" "src/imagelist.*" "src/videoframe.*")
file(GLOB SOURCE_GROUP_DATA_STRUCTURES RELATIVE ${PROJECT_SOURCE_DIR} "src/cu.*" "src/image.*" "src/imagelist.*" "src/videoframe.*" "src/hashmap.*")
file(GLOB SOURCE_GROUP_EXTRAS RELATIVE ${PROJECT_SOURCE_DIR} "src/extras/*.h" "src/extras/*.c")
file(GLOB_RECURSE SOURCE_GROUP_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.h" "src/strategies/*.c")
file(GLOB SOURCE_GROUP_RECON RELATIVE ${PROJECT_SOURCE_DIR} "src/alf.*" "src/filter.*" "src/inter.*" "src/intra.*" "src/reshape.*" "src/sao.*" "src/scalinglist.*" "src/tables.*" "src/transform.*")

View file

@ -122,6 +122,7 @@ typedef struct
cabac_ctx_t transform_skip_gt2[5];
cabac_ctx_t cclm_flag;
cabac_ctx_t cclm_model;
cabac_ctx_t ibc_flag[3];
} ctx;
} cabac_data_t;

View file

@ -222,6 +222,9 @@ int uvg_config_init(uvg_config *cfg)
cfg->dual_tree = 0;
cfg->intra_rough_search_levels = 2;
cfg->ibc = 0;
return 1;
}
@ -1479,7 +1482,14 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
else if OPT("intra-rough-granularity") {
cfg->intra_rough_search_levels = atoi(value);
}
else {
else if OPT ("ibc") {
int ibc_value = atoi(value);
if (ibc_value < 0 || ibc_value > 2) {
fprintf(stderr, "ibc supports only range from 0 to 2\n");
return 0;
}
cfg->ibc = (uint8_t)ibc_value;
} else {
return 0;
}
#undef OPT

View file

@ -192,6 +192,7 @@ static const struct option long_options[] = {
{ "no-dual-tree", no_argument, NULL, 0 },
{ "cabac-debug-file", required_argument, NULL, 0 },
{ "intra-rough-granularity",required_argument, NULL, 0 },
{ "ibc", required_argument, NULL, 0 },
{0, 0, 0, 0}
};

View file

@ -423,6 +423,13 @@ static const uint8_t INIT_CCLM_MODEL[4] = {
9,
};
static const uint8_t INIT_IBC_FLAG[4][3] = {
{ 0, 43, 45, },
{ 0, 57, 44, },
{ 17, 42, 36, },
{ 1, 5, 8, },
};
/*
static const uint16_t g_inistateToCount[128] = {
614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342,
@ -514,6 +521,7 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
uvg_ctx_init(&cabac->ctx.lfnst_idx_model[i], QP, INIT_LFNST_IDX[slice][i], INIT_LFNST_IDX[3][i]);
uvg_ctx_init(&cabac->ctx.transform_skip_sig_coeff_group[i], QP, INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[slice][i], INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[3][i]);
uvg_ctx_init(&cabac->ctx.transform_skip_sig[i], QP, INIT_TRANSFORM_SKIP_SIG[slice][i], INIT_TRANSFORM_SKIP_SIG[3][i]);
uvg_ctx_init(&cabac->ctx.ibc_flag[i], QP, INIT_IBC_FLAG[slice][i], INIT_IBC_FLAG[3][i]);
}
for (i = 0; i < 4; i++) {

View file

@ -52,6 +52,7 @@ typedef enum {
CU_INTRA = 1,
CU_INTER = 2,
CU_PCM = 3,
CU_IBC = 4,
} cu_type_t;
typedef enum {
@ -146,7 +147,7 @@ enum uvg_tree_type {
*/
typedef struct
{
uint8_t type : 2; //!< \brief block type, one of cu_type_t values
uint8_t type : 3; //!< \brief block type, one of cu_type_t values
uint8_t depth : 3; //!< \brief depth / size of this block
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
uint8_t tr_depth : 3; //!< \brief transform depth

View file

@ -834,7 +834,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, width, height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
#endif
} else {
if (state->frame->slicetype == UVG_SLICE_B) {
if (state->frame->slicetype == UVG_SLICE_B && cur_cu->type != CU_IBC) {
// Code Inter Dir
uint8_t inter_dir = cur_cu->inter.mv_dir;
@ -860,7 +860,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
// size of the current reference index list (L0/L1)
uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx];
if (ref_LX_size > 1) {
if (ref_LX_size > 1 && cur_cu->type != CU_IBC) {
// parseRefFrmIdx
int32_t ref_frame = cur_cu->inter.mv_ref[ref_list_idx];
@ -906,7 +906,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
mv_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0];
mv_t mvd_ver = cur_cu->inter.mv[ref_list_idx][1] - mv_cand[cu_mv_cand][1];
uvg_change_precision(INTERNAL_MV_PREC, uvg_g_imv_to_prec[UVG_IMV_OFF], &mvd_hor, &mvd_ver);
uvg_change_precision(INTERNAL_MV_PREC, uvg_g_imv_to_prec[(cur_cu->type == CU_IBC)?UVG_IMV_FPEL:UVG_IMV_OFF], &mvd_hor, &mvd_ver);
uvg_encode_mvd(state, cabac, mvd_hor, mvd_ver, bits_out);
non_zero_mvd |= (mvd_hor != 0) || (mvd_ver != 0);
@ -1262,95 +1262,6 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
if (cabac->only_count && bits_out) *bits_out += bits;
}
/**
static void encode_part_mode(encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int depth)
{
// Binarization from Table 9-34 of the HEVC spec:
//
// | log2CbSize > | log2CbSize ==
// | MinCbLog2SizeY | MinCbLog2SizeY
// -------+-------+----------+---------+-----------+----------
// pred | part | AMP | AMP | |
// mode | mode | disabled | enabled | size == 8 | size > 8
// -------+-------+----------+---------+-----------+----------
// intra | 2Nx2N | - - | 1 1
// | NxN | - - | 0 0
// -------+-------+--------------------+----------------------
// inter | 2Nx2N | 1 1 | 1 1
// | 2NxN | 01 011 | 01 01
// | Nx2N | 00 001 | 00 001
// | NxN | - - | - 000
// | 2NxnU | - 0100 | - -
// | 2NxnD | - 0101 | - -
// | nLx2N | - 0000 | - -
// | nRx2N | - 0001 | - -
// -------+-------+--------------------+----------------------
//
//
// Context indices from Table 9-37 of the HEVC spec:
//
// binIdx
// | 0 1 2 3
// ------------------------------+------------------
// log2CbSize == MinCbLog2SizeY | 0 1 2 bypass
// log2CbSize > MinCbLog2SizeY | 0 1 3 bypass
// ------------------------------+------------------
double bits = 0;
if (cur_cu->type == CU_INTRA) {
if (depth == MAX_DEPTH) {
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
if (cur_cu->part_size == SIZE_2Nx2N) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
} else {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode NxN");
}
}
} else {
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
if (cur_cu->part_size == SIZE_2Nx2N) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
return bits;
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode split");
cabac->cur_ctx = &(cabac->ctx.part_size_model[1]);
if (cur_cu->part_size == SIZE_2NxN ||
cur_cu->part_size == SIZE_2NxnU ||
cur_cu->part_size == SIZE_2NxnD) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 1, bits, "part_mode vertical");
} else {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 0, bits, "part_mode horizontal");
}
if (state->encoder_control->cfg.amp_enable && depth < MAX_DEPTH) {
cabac->cur_ctx = &(cabac->ctx.part_size_model[3]);
if (cur_cu->part_size == SIZE_2NxN ||
cur_cu->part_size == SIZE_Nx2N) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 1, bits, "part_mode SMP");
return bits;
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 0, bits, "part_mode AMP");
if (cur_cu->part_size == SIZE_2NxnU ||
cur_cu->part_size == SIZE_nLx2N) {
CABAC_BINS_EP(cabac, 0, 1, "part_mode AMP");
if(cabac->only_count) bits += 1;
} else {
CABAC_BINS_EP(cabac, 1, 1, "part_mode AMP");
if(cabac->only_count) bits += 1;
}
}
}
return bits;
}
**/
bool uvg_write_split_flag(
const encoder_state_t * const state,
cabac_data_t* cabac,
@ -1547,7 +1458,7 @@ void uvg_encode_coding_tree(
// CABAC_BIN(cabac, 0, "split_transform_flag");
}
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1);
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1);
if (ctrl->cfg.lossless) {
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
@ -1555,7 +1466,7 @@ void uvg_encode_coding_tree(
}
// Encode skip flag
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
if ((state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc)) {
int8_t ctx_skip = 0;
@ -1565,11 +1476,22 @@ void uvg_encode_coding_tree(
if (above_cu && above_cu->skipped) {
ctx_skip++;
}
cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]);
CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag");
if (cu_width > 4 || state->encoder_control->cfg.ibc) {
cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]);
CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag");
}
if (cur_cu->skipped) {
if (state->encoder_control->cfg.ibc && state->frame->slicetype != UVG_SLICE_I)
{ // ToDo: Only for luma channel
// ToDo: Disable for blocks over 64x64 pixels
int8_t ctx_ibc = 0;
if (left_cu && left_cu->type == CU_IBC) ctx_ibc++;
if (above_cu && above_cu->type == CU_IBC) ctx_ibc++;
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
}
DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
uvg_hmvp_add_mv(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
int16_t num_cand = state->encoder_control->cfg.max_merge;
@ -1597,6 +1519,15 @@ void uvg_encode_coding_tree(
}
// Prediction mode
if ((state->frame->slicetype == UVG_SLICE_I || cu_width == 4) && state->encoder_control->cfg.ibc) { // ToDo: Only for luma channel
// ToDo: Disable for blocks over 64x64 pixels
int8_t ctx_ibc = 0;
if (left_cu && left_cu->type == CU_IBC) ctx_ibc++;
if (above_cu && above_cu->type == CU_IBC) ctx_ibc++;
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
}
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
int8_t ctx_predmode = 0;
@ -1607,6 +1538,15 @@ void uvg_encode_coding_tree(
cabac->cur_ctx = &(cabac->ctx.cu_pred_mode_model[ctx_predmode]);
CABAC_BIN(cabac, (cur_cu->type == CU_INTRA), "PredMode");
// We need IBC flag if the mode is signalled as Inter
if (state->encoder_control->cfg.ibc && cur_cu->type != CU_INTRA) {
int8_t ctx_ibc = 0;
if (left_cu && left_cu->type == CU_IBC) ctx_ibc++;
if (above_cu && above_cu->type == CU_IBC) ctx_ibc++;
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
}
}
// part_mode
@ -1657,7 +1597,7 @@ void uvg_encode_coding_tree(
} else
#endif
if (cur_cu->type == CU_INTER) {
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
uint8_t imv_mode = UVG_IMV_OFF;
const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size];
@ -1679,10 +1619,10 @@ void uvg_encode_coding_tree(
// 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel
if (ctrl->cfg.amvr && non_zero_mvd) {
cabac->cur_ctx = &(cabac->ctx.imv_flag[0]);
CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag");
if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag");
if (imv_mode > UVG_IMV_OFF) {
cabac->cur_ctx = &(cabac->ctx.imv_flag[4]);
CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag");
if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag");
if (imv_mode < UVG_IMV_HPEL) {
cabac->cur_ctx = &(cabac->ctx.imv_flag[1]);
CABAC_BIN(cabac, (imv_mode > UVG_IMV_FPEL), "imv_flag"); // 1 indicates 4PEL, 0 FPEL
@ -1860,7 +1800,7 @@ double uvg_mock_encode_coding_unit(
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_pred_mode_model[ctx_predmode]), (cur_cu->type == CU_INTRA), bits, "PredMode");
}
if (cur_cu->type == CU_INTER) {
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
const uint8_t imv_mode = UVG_IMV_OFF;
const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, x, y, cu_width, cu_width, depth, lcu, &bits);
if (ctrl->cfg.amvr && non_zero_mvd) {
@ -1897,35 +1837,38 @@ void uvg_encode_mvd(encoder_state_t * const state,
const int8_t ver_abs_gr0 = mvd_ver != 0;
const uint32_t mvd_hor_abs = abs(mvd_hor);
const uint32_t mvd_ver_abs = abs(mvd_ver);
double temp_bits_out = 0.0;
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[0];
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), *bits_out, "abs_mvd_greater0_flag_hor");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), *bits_out, "abs_mvd_greater0_flag_ver");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), temp_bits_out, "abs_mvd_greater0_flag_hor");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), temp_bits_out, "abs_mvd_greater0_flag_ver");
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[1];
if (hor_abs_gr0) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), *bits_out,"abs_mvd_greater1_flag_hor");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), temp_bits_out,"abs_mvd_greater1_flag_hor");
}
if (ver_abs_gr0) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), *bits_out, "abs_mvd_greater1_flag_ver");
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), temp_bits_out, "abs_mvd_greater1_flag_ver");
}
if (hor_abs_gr0) {
if (mvd_hor_abs > 1) {
uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1);
if(cabac->only_count) *bits_out += bits;
if(cabac->only_count) temp_bits_out += bits;
}
uint32_t mvd_hor_sign = (mvd_hor > 0) ? 0 : 1;
CABAC_BIN_EP(cabac, mvd_hor_sign, "mvd_sign_flag_hor");
if (cabac->only_count) *bits_out += 1;
if (cabac->only_count) temp_bits_out += 1;
}
if (ver_abs_gr0) {
if (mvd_ver_abs > 1) {
uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1);
if (cabac->only_count) *bits_out += bits;
if (cabac->only_count) temp_bits_out += bits;
}
uint32_t mvd_ver_sign = mvd_ver > 0 ? 0 : 1;
CABAC_BIN_EP(cabac, mvd_ver_sign, "mvd_sign_flag_ver");
if (cabac->only_count) *bits_out += 1;
if (cabac->only_count) temp_bits_out += 1;
}
if(bits_out) *bits_out = temp_bits_out;
}

View file

@ -154,9 +154,8 @@ static void encoder_state_write_bitstream_PTL(bitstream_t *stream,
// end Profile Tier
//uint8_t level = state->encoder_control->cfg.level;
// ToDo: level hardcoded to 5.2
WRITE_U(stream, 86, 8, "general_level_idc");
// ToDo: level hardcoded to 6.3
WRITE_U(stream, 105, 8, "general_level_idc");
WRITE_U(stream, 0, 1, "ptl_frame_only_constraint_flag");
WRITE_U(stream, 0, 1, "ptl_multilayer_enabled_flag");
@ -694,7 +693,11 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
WRITE_UE(stream, 0, "sps_internal_bit_depth_minus_input_bit_depth");
}
WRITE_U(stream, 0, 1, "sps_ibc_enabled_flag");
WRITE_U(stream, encoder->cfg.ibc > 0 ? 1 : 0, 1, "sps_ibc_enabled_flag");
if (encoder->cfg.ibc) {
WRITE_UE(stream,6 - IBC_MRG_MAX_NUM_CANDS, "sps_six_minus_max_num_ibc_merge_cand");
}
#if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
// if(!no_ladf_constraint_flag)

View file

@ -122,6 +122,31 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu);
// Allocate the HMVP for IBC in any case
state->tile->frame->hmvp_lut_ibc = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size_ibc = calloc(1, sizeof(uint8_t) * height_in_lcu);
if (state->encoder_control->cfg.ibc) {
// Allocate pixel buffer for each LCU row
state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu);
if (state->encoder_control->cfg.ibc & 2) {
state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE);
state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) *
((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride);
}
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2);
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE];
state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2];
}
}
state->tile->frame->rec = NULL;
state->tile->frame->source = NULL;
@ -197,6 +222,24 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
FREE_POINTER(state->tile->frame->hmvp_lut);
FREE_POINTER(state->tile->frame->hmvp_size);
FREE_POINTER(state->tile->frame->hmvp_lut_ibc);
FREE_POINTER(state->tile->frame->hmvp_size_ibc);
if (state->encoder_control->cfg.ibc) {
if (state->encoder_control->cfg.ibc & 2) {
FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash);
}
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]);
}
FREE_POINTER(state->tile->frame->ibc_hashmap_row);
FREE_POINTER(state->tile->frame->ibc_buffer_y);
FREE_POINTER(state->tile->frame->ibc_buffer_u);
FREE_POINTER(state->tile->frame->ibc_buffer_v);
}
uvg_videoframe_free(state->tile->frame);
state->tile->frame = NULL;
FREE_POINTER(state->tile->wf_jobs);

View file

@ -45,17 +45,20 @@
#include "encode_coding_tree.h"
#include "encoder_state-bitstream.h"
#include "filter.h"
#include "hashmap.h"
#include "image.h"
#include "rate_control.h"
#include "sao.h"
#include "search.h"
#include "tables.h"
#include "threads.h"
#include "threadqueue.h"
#include "alf.h"
#include "reshape.h"
#include "strategies/strategies-picture.h"
/**
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
*
@ -250,6 +253,58 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
frame->rec->stride / 2, 1);
}
}
// Fill IBC buffer
if (state->encoder_control->cfg.ibc) {
uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH >= IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x;
uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1;
uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH;
// If the buffer is full shift all the lines LCU_WIDTH left
if (lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH) {
for (uint32_t i = 0; i < LCU_WIDTH; i++) {
memmove(
&frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH],
&frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH + LCU_WIDTH],
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH - LCU_WIDTH));
}
if (state->encoder_control->chroma_format != UVG_CSP_400) {
for (uint32_t i = 0; i < LCU_WIDTH_C; i++) {
memmove(
&frame->ibc_buffer_u[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C],
&frame->ibc_buffer_u[ibc_buffer_row]
[i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C],
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C));
memmove(
&frame->ibc_buffer_v[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C],
&frame->ibc_buffer_v[ibc_buffer_row]
[i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C],
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C));
}
}
}
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x],
&frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x],
ibc_block_width, ibc_block_height,
frame->rec->stride, IBC_BUFFER_WIDTH);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
uvg_pixels_blit(&frame->rec->u[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)],
&frame->ibc_buffer_u[ibc_buffer_row][ibc_buffer_pos_x_c],
ibc_block_width>>1, ibc_block_height>>1,
frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C);
uvg_pixels_blit(&frame->rec->v[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)],
&frame->ibc_buffer_v[ibc_buffer_row][ibc_buffer_pos_x_c],
ibc_block_width>>1, ibc_block_height>>1,
frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C);
}
}
}
@ -692,9 +747,53 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
cu_info_t original_lut[MAX_NUM_HMVP_CANDS];
uint8_t original_lut_size = state->tile->frame->hmvp_size[ctu_row];
cu_info_t original_lut_ibc[MAX_NUM_HMVP_CANDS];
uint8_t original_lut_size_ibc = state->tile->frame->hmvp_size_ibc[ctu_row];
// Store original HMVP lut before search and restore after, since it's modified
if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
if (state->encoder_control->cfg.ibc & 2) {
videoframe_t * const frame = state->tile->frame;
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
int items = 0;
// Hash the current LCU to the IBC hashmap
for (int32_t xx = 0; xx < (int32_t)(ibc_block_width)-7; xx+=UVG_HASHMAP_BLOCKSIZE>>1) {
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy+=UVG_HASHMAP_BLOCKSIZE>>1) {
int cur_x = lcu->position_px.x + xx;
int cur_y = lcu->position_px.y + yy;
// Skip blocks that seem to be the same value for the whole block
uint64_t first_line =
*(uint64_t *)&frame->source->y[cur_y * frame->source->stride + cur_x];
bool same_data = true;
for (int y_temp = 1; y_temp < 8; y_temp++) {
if (*(uint64_t *)&frame->source->y[(cur_y+y_temp) * frame->source->stride + cur_x] != first_line) {
same_data = false;
break;
}
}
if (!same_data || (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0)) {
uint32_t crc = uvg_crc32c_8x8(&frame->source->y[cur_y * frame->source->stride + cur_x],frame->source->stride);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
crc += uvg_crc32c_4x4(&frame->source->u[(cur_y>>1) * (frame->source->stride>>1) + (cur_x>>1)],frame->source->stride>>1);
crc += uvg_crc32c_4x4(&frame->source->v[(cur_y>>1) * (frame->source->stride>>1) + (cur_x>>1)],frame->source->stride>>1);
}
if (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0) {
state->tile->frame->ibc_hashmap_pos_to_hash[(cur_y / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + cur_x / UVG_HASHMAP_BLOCKSIZE] = crc;
}
uvg_hashmap_insert(frame->ibc_hashmap_row[ctu_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
items++;
}
}
}
}
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
//This part doesn't write to bitstream, it's only search, deblock and sao
uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff);
@ -703,6 +802,10 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], original_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size[ctu_row] = original_lut_size;
}
if (state->encoder_control->cfg.ibc) {
memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], original_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size_ibc[ctu_row] = original_lut_size_ibc;
}
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
@ -899,8 +1002,13 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
// Clear hmvp lut size before each leaf
if (!wavefront) memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
else state->tile->frame->hmvp_size[state->wfrow->lcu_offset_y] = 0;
if (!wavefront) {
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
if(cfg->ibc) memset(state->tile->frame->hmvp_size_ibc, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
} else {
state->tile->frame->hmvp_size[state->wfrow->lcu_offset_y] = 0;
state->tile->frame->hmvp_size_ibc[state->wfrow->lcu_offset_y] = 0;
}
bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control);
if (!use_parallel_encoding) {
@ -1644,6 +1752,7 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
if (!state->encoder_control->tiles_enable) {
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
memset(state->tile->frame->hmvp_size_ibc, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
}
// ROI / delta QP maps

View file

@ -192,9 +192,6 @@ typedef struct encoder_state_config_frame_t {
double *c_para;
double *k_para;
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uint8_t* hmvp_size; //!< \brief HMVP LUT size
bool jccr_sign;
} encoder_state_config_frame_t;

View file

@ -789,10 +789,10 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
cu_p->inter.mv[1][0] = 0;
cu_p->inter.mv[1][1] = 0;
}
const int refP0 = (cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1;
const int refP1 = (cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1;
const int refQ0 = (cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1;
const int refQ1 = (cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1;
const int refP0 = (cu_p->type == CU_IBC)?-2:(cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1;
const int refP1 = (cu_p->type == CU_IBC)?-2:(cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1;
const int refQ0 = (cu_q->type == CU_IBC)?-2:(cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1;
const int refQ1 = (cu_q->type == CU_IBC)?-2:(cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1;
const mv_t* mvQ0 = cu_q->inter.mv[0];
const mv_t* mvQ1 = cu_q->inter.mv[1];
@ -830,12 +830,14 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
}
}
else /*if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3)*/ { //is P-slice
if (cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) {
const int refP = (cu_p->type == CU_IBC)?-2:state->frame->ref_LX[0][cu_p->inter.mv_ref[0]];
const int refQ = (cu_q->type == CU_IBC)?-2:state->frame->ref_LX[0][cu_q->inter.mv_ref[0]];
if (refP != refQ) {
// Reference pictures are different
strength = 1;
} else if (
((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) ||
(abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) {
((abs(cu_q->inter.mv[0][0] - cu_p->inter.mv[0][0]) >= mvdThreashold) ||
(abs(cu_q->inter.mv[0][1] - cu_p->inter.mv[0][1]) >= mvdThreashold))) {
// Absolute motion vector diff between blocks >= 0.5 (Integer pixel)
strength = 1;
}

View file

@ -176,7 +176,6 @@ typedef int32_t mv_t;
//! pow(2, MIN_SIZE)
#define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE)
//! Round frame size up to this interval (8 pixels)
#define CONF_WINDOW_PAD_IN_PIXELS ((1 << MIN_SIZE)<<1)
//! spec: CtbSizeY
@ -254,6 +253,15 @@ typedef int32_t mv_t;
#define AMVP_MAX_NUM_CANDS 2
#define AMVP_MAX_NUM_CANDS_MEM 3
#define MRG_MAX_NUM_CANDS 6
/**
* \brief Max number of merge candidates in Intra Block Copy
*
*/
#define IBC_MRG_MAX_NUM_CANDS 6
#define IBC_BUFFER_SIZE (128*128)
#define IBC_BUFFER_WIDTH (IBC_BUFFER_SIZE / LCU_WIDTH)
#define IBC_BUFFER_WIDTH_C ((IBC_BUFFER_SIZE / LCU_WIDTH) >> 1)
#define MAX_NUM_HMVP_CANDS 5

150
src/hashmap.c Normal file
View file

@ -0,0 +1,150 @@
/*****************************************************************************
* This file is part of uvg266 VVC encoder.
*
* Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
****************************************************************************/
#include "hashmap.h"
/**
* \brief This function creates a node for the uvg_hashmap.
*
* \param key the key of the node to be created
* \param value the value of the node to be created
* \return uvg_hashmap_node a node with the given key and value
*/
uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value) {
uvg_hashmap_node_t* new_node = (uvg_hashmap_node_t*)malloc(sizeof(uvg_hashmap_node_t));
new_node->key = key;
new_node->value = value;
new_node->next = NULL;
new_node->size = 1;
return new_node;
}
/**
* \brief This function creates a new uvg_hashmap with a given bucket size.
*
* \param bucket_size the size of the hashmap bucket
* \return uvg_hashmap a new uvg_hashmap with the given bucket size
*/
uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size)
{
uvg_hashmap_t* new_hashmap = (uvg_hashmap_t*)malloc(sizeof(uvg_hashmap_t));
new_hashmap->bucket_size = bucket_size;
new_hashmap->table = (uvg_hashmap_node_t**)malloc(sizeof(uvg_hashmap_node_t*) * bucket_size);
for (int i = 0; i < bucket_size; i++) {
new_hashmap->table[i] = NULL;
}
return new_hashmap;
}
/**
* \brief This function calculates the hash index for a given
* key and bucket size using the Jenkins hash function.
*
* \param key the key to be hashed
* \param bucket_size the size of the hashmap bucket
* \return the hashed index for the given key and bucket size.
*/
static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size)
{
//key ^= (key >> 20) ^ (key >> 12);
//return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
return key % bucket_size;
}
/**
* \brief This function inserts a new node into the hashmap.
*
* \param map the hashmap to insert the new node into
* \param key the key of the new node
* \param value the value of the new node
*/
void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) {
uint32_t hash_index = uvg_hashmap_hash(key, map->bucket_size);
uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, value);
new_node->next = (void*)map->table[hash_index];
if (new_node->next != NULL) new_node->size = ((uvg_hashmap_node_t*)new_node->next)->size + 1;
map->table[hash_index] = new_node;
}
/**
* \brief This function searches the hashmap for the given key.
*
* \param map the hashmap to search in
* \param key the key to search for
* \return uvg_hashmap_node the node with the given key, NULL if not found.
*/
uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key) {
uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size);
return map->table[hashIndex];
}
uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key)
{
uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size);
uvg_hashmap_node_t* temp = map->table[hashIndex];
// Search key in chain and return the first match
while (temp) {
if (temp->key == key) {
return temp->value;
}
temp = (uvg_hashmap_node_t*)temp->next;
}
return -1;
}
/**
* \brief This function frees the memory of a given hashmap node.
*
* \param node the node to free the memory of.
*/
void uvg_hashmap_node_free(uvg_hashmap_node_t* node)
{
while (node) {
uvg_hashmap_node_t* to_delete = node;
node = (uvg_hashmap_node_t*)node->next;
free(to_delete);
}
}
/**
* \brief This function frees the memory of a given hashmap.
*
* \param map the hashmap to free the memory of.
*/
void uvg_hashmap_free(uvg_hashmap_t* map) {
for (int i = 0; i < map->bucket_size; i++) {
uvg_hashmap_node_t* temp = map->table[i];
uvg_hashmap_node_free(temp);
}
free(map->table);
free(map);
}

70
src/hashmap.h Normal file
View file

@ -0,0 +1,70 @@
#pragma once
/*****************************************************************************
* This file is part of uvg266 VVC encoder.
*
* Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
// The ratio of the hashmap bucket size to the maximum number of elements
#define UVG_HASHMAP_RATIO 12.0
// Use Hashmap for 4x4 blocks
#define UVG_HASHMAP_BLOCKSIZE 8
typedef struct uvg_hashmap_node {
void* next;
uint32_t key;
uint32_t value;
uint32_t size;
} uvg_hashmap_node_t;
typedef struct uvg_hashmap {
uint32_t bucket_size;
uvg_hashmap_node_t** table;
} uvg_hashmap_t;
uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value);
uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size);
//uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size);
void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value);
uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key);
uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key);
void uvg_hashmap_node_free(uvg_hashmap_node_t* node);
void uvg_hashmap_free(uvg_hashmap_t* map);

View file

@ -593,6 +593,67 @@ void uvg_inter_recon_cu(const encoder_state_t * const state,
}
}
static void ibc_recon_cu(const encoder_state_t * const state,
lcu_t *lcu,
int32_t x,
int32_t y,
int32_t width,
bool predict_luma,
bool predict_chroma,
int i_pu)
{
const int x_scu = SUB_SCU(x);
const int y_scu = SUB_SCU(y);
uint32_t offset = x_scu + y_scu * LCU_WIDTH;
uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
int32_t mv_x = cu->inter.mv[0][0] >> INTERNAL_MV_PREC;
int32_t mv_y = cu->inter.mv[0][1] >> INTERNAL_MV_PREC;
uint32_t ibc_row = y / LCU_WIDTH;
int32_t buffer_x = ((x - x_scu) + LCU_WIDTH <= IBC_BUFFER_WIDTH ?
x :
x - (((x - x_scu)) - IBC_BUFFER_WIDTH)) + mv_x;
int32_t buffer_y = y_scu + mv_y;
// The whole block must be to the left of the current position
assert((-mv_x >= width || -mv_y >= width) && x >= 0 && y >= 0);
// Predicted block completely outside of this LCU
if (mv_x + x_scu + width <= 0) {
if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width, width, IBC_BUFFER_WIDTH, LCU_WIDTH);
if (predict_chroma) {
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
}
} else if (mv_x + x_scu + width >= width) { // Completely in current LCU
if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x], lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH);
if (predict_chroma) {
uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x) / 2], lcu->rec.u + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x) / 2], lcu->rec.v + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
}
} else { // Partly on the buffer and party on the current LCU rec
uint32_t width_buffer = -(mv_x + x_scu);
uint32_t width_lcu = width - width_buffer;
if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width_buffer, width, IBC_BUFFER_WIDTH, LCU_WIDTH);
if (predict_chroma) {
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
}
offset += width_buffer;
offset_c += width_buffer/2 + (width_buffer&1);
if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x + width_buffer], lcu->rec.y + offset, width_lcu, width, LCU_WIDTH, LCU_WIDTH);
if (predict_chroma && (width_lcu / 2)) {
uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.u + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.v + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
}
}
}
/**
* Predict a single PU.
*
@ -626,49 +687,56 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
if (pu->inter.mv_dir == 3) {
const uvg_picture *const refs[2] = {
state->frame->ref->images[
state->frame->ref_LX[0][
pu->inter.mv_ref[0]]],
state->frame->ref->images[
state->frame->ref_LX[1][
pu->inter.mv_ref[1]]],
};
uvg_inter_recon_bipred(state,
refs[0], refs[1],
pu_x, pu_y,
pu_w, pu_h,
pu->inter.mv,
lcu,
predict_luma, predict_chroma);
if (cu->type == CU_IBC) {
ibc_recon_cu(state, lcu, x, y, width, predict_luma, predict_chroma, i_pu);
} else {
if (pu->inter.mv_dir == 3) {
const uvg_picture * const refs[2] = {
state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]],
state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]],
};
uvg_inter_recon_bipred(
state,
refs[0],
refs[1],
pu_x,
pu_y,
pu_w,
pu_h,
pu->inter.mv,
lcu,
predict_luma,
predict_chroma);
} else {
const int mv_idx = pu->inter.mv_dir - 1;
const uvg_picture * const ref =
state->frame->ref->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]];
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
const unsigned offset_chroma =
SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
yuv_t lcu_adapter;
lcu_adapter.size = pu_w * pu_h;
lcu_adapter.y = lcu->rec.y + offset_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma,
inter_recon_unipred(
state,
ref,
pu_x,
pu_y,
pu_w,
pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx],
&lcu_adapter,
NULL,
predict_luma,
predict_chroma);
}
}
else {
const int mv_idx = pu->inter.mv_dir - 1;
const uvg_picture *const ref =
state->frame->ref->images[
state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]];
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
yuv_t lcu_adapter;
lcu_adapter.size = pu_w * pu_h;
lcu_adapter.y = lcu->rec.y + offset_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma,
inter_recon_unipred(state,
ref,
pu_x, pu_y,
pu_w, pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx],
&lcu_adapter,
NULL,
predict_luma, predict_chroma);
}
if (predict_chroma && state->encoder_control->cfg.jccr) {
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
@ -917,6 +985,259 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
}
}
static INLINE mv_t get_scaled_mv(mv_t mv, int scale)
{
int32_t scaled = scale * mv;
return CLIP(-131072, 131071, (scaled + 127 + (scaled < 0)) >> 8);
}
#define MV_EXPONENT_BITCOUNT 4
#define MV_MANTISSA_BITCOUNT 6
#define MV_MANTISSA_UPPER_LIMIT ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1)
#define MV_MANTISSA_LIMIT (1 << (MV_MANTISSA_BITCOUNT - 1))
#define MV_EXPONENT_MASK ((1 << MV_EXPONENT_BITCOUNT) - 1)
static int convert_mv_fixed_to_float(int32_t val)
{
uint32_t sign = val >> 31;
int scale = uvg_math_floor_log2((val ^ sign) | MV_MANTISSA_UPPER_LIMIT) - (MV_MANTISSA_BITCOUNT - 1);
int exponent;
uint32_t mantissa;
if (scale >= 0)
{
int round = (1 << scale) >> 1;
int n = (val + round) >> scale;
exponent = scale + ((n ^ sign) >> (MV_MANTISSA_BITCOUNT - 1));
mantissa = (n & MV_MANTISSA_UPPER_LIMIT) | (sign << (MV_MANTISSA_BITCOUNT - 1));
}
else
{
exponent = 0;
mantissa = val;
}
return exponent | (mantissa << MV_EXPONENT_BITCOUNT);
}
static int convert_mv_float_to_fixed(int val)
{
int exponent = val & MV_EXPONENT_MASK;
uint32_t mantissa = val >> MV_EXPONENT_BITCOUNT;
return exponent == 0 ? mantissa : (mantissa ^ MV_MANTISSA_LIMIT) << (exponent - 1);
}
static int round_mv_comp(int x)
{
return convert_mv_float_to_fixed(convert_mv_fixed_to_float(x));
}
static void apply_mv_scaling_pocs(int32_t current_poc,
int32_t current_ref_poc,
int32_t neighbor_poc,
int32_t neighbor_ref_poc,
mv_t mv_cand[2])
{
int32_t diff_current = current_poc - current_ref_poc;
int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc;
if (diff_current == diff_neighbor) return;
diff_current = CLIP(-128, 127, diff_current);
diff_neighbor = CLIP(-128, 127, diff_neighbor);
int scale = CLIP(-4096, 4095,
(diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6);
mv_cand[0] = get_scaled_mv(mv_cand[0], scale);
mv_cand[1] = get_scaled_mv(mv_cand[1], scale);
}
static INLINE void apply_mv_scaling(const encoder_state_t *state,
const cu_info_t *current_cu,
const cu_info_t *neighbor_cu,
int8_t current_reflist,
int8_t neighbor_reflist,
mv_t mv_cand[2])
{
apply_mv_scaling_pocs(state->frame->poc,
state->frame->ref->pocs[
state->frame->ref_LX[current_reflist][
current_cu->inter.mv_ref[current_reflist]]],
state->frame->poc,
state->frame->ref->pocs[
state->frame->ref_LX[neighbor_reflist][
neighbor_cu->inter.mv_ref[neighbor_reflist]]],
mv_cand);
}
static INLINE bool add_mvp_candidate(const encoder_state_t *state,
const cu_info_t *cur_cu,
const cu_info_t *cand,
int8_t reflist,
bool scaling,
mv_t mv_cand_out[2])
{
if (!cand) return false;
assert(cand->inter.mv_dir != 0);
for (int i = 0; i < 2; i++) {
const int cand_list = i == 0 ? reflist : !reflist;
if ((cand->inter.mv_dir & (1 << cand_list)) == 0) continue;
if (scaling) {
mv_cand_out[0] = cand->inter.mv[cand_list][0];
mv_cand_out[1] = cand->inter.mv[cand_list][1];
apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out);
return true;
}
if (state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] ==
state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]])
{
mv_cand_out[0] = cand->inter.mv[cand_list][0];
mv_cand_out[1] = cand->inter.mv[cand_list][1];
return true;
}
}
return false;
}
static bool is_duplicate_candidate_ibc(const cu_info_t* cu1, const cu_info_t* cu2)
{
if (!cu2) return false;
if (cu1->inter.mv[0][0] != cu2->inter.mv[0][0] ||
cu1->inter.mv[0][1] != cu2->inter.mv[0][1]) {
return false;
}
return true;
}
/**
* \brief Get merge candidates for current block.
*
* The output parameters b0, b1, b2, a0, a1 are pointed to the
* corresponding cu_info_t struct in lcu->cu, or set to NULL, if the
* candidate is not available.
*
* \param x block x position in pixels
* \param y block y position in pixels
* \param width block width in pixels
* \param height block height in pixels
* \param picture_width tile width in pixels
* \param picture_height tile height in pixels
* \param lcu current LCU
* \param cand_out will be filled with A and B candidates
*/
static void get_ibc_merge_candidates(const encoder_state_t * const state,
const cu_info_t * const cur_cu,
lcu_t *lcu,
const cu_array_t *cua,
int32_t x,
int32_t y,
int32_t width,
int32_t height,
mv_t mv_cand[IBC_MRG_MAX_NUM_CANDS][2]
)
{
/*
Predictor block locations
____ _______
|B2|______|B1|B0|
| |
| Cur CU |
__| |
|A1|_________|
|A0|
*/
int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
int32_t y_local = SUB_SCU(y);
cu_info_t *a1 = NULL;
cu_info_t *b1 = NULL;
uint8_t candidates = 0;
// A1 availability testing
if (x != 0) {
a1 = lcu != NULL?LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1): uvg_cu_array_at_const(cua, x - 1, y + height - 1);
// Do not check a1->coded because the block above is always coded before
// the current one and the flag is not set when searching an SMP block.
if (a1->type == CU_IBC) {
inter_clear_cu_unused(a1);
mv_cand[candidates][0] = a1->inter.mv[0][0];
mv_cand[candidates][1] = a1->inter.mv[0][1];
candidates++;
} else {
a1 = NULL;
}
}
// B1 availability testing
if (y != 0) {
b1 = lcu != NULL?LCU_GET_CU_AT_PX(lcu, x_local + width - 1, y_local - 1): uvg_cu_array_at_const(cua, x + width - 1, y - 1);
// Do not check b1->coded because the block to the left is always coded
// before the current one and the flag is not set when searching an SMP
// block.
if (b1->type == CU_IBC) {
if(!is_duplicate_candidate_ibc(b1, a1)) {
inter_clear_cu_unused(b1);
mv_cand[candidates][0] = b1->inter.mv[0][0];
mv_cand[candidates][1] = b1->inter.mv[0][1];
candidates++;
}
} else {
b1 = NULL;
}
}
if (candidates > 0)
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
if (candidates > 1)
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
if (candidates < IBC_MRG_MAX_NUM_CANDS)
{
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
int32_t num_cand = state->tile->frame->hmvp_size_ibc[ctu_row];
for (int i = 0; i < MIN(MAX_NUM_HMVP_CANDS,num_cand); i++) {
cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + i];
bool duplicate = false;
// Check that the HMVP candidate is not duplicate
if (is_duplicate_candidate_ibc(cand, a1)) {
duplicate = true;
} else if(is_duplicate_candidate_ibc(cand, b1)) {
duplicate = true;
}
// allow duplicates after the first hmvp lut item
if (!duplicate || i > 0) {
mv_cand[candidates][0] = cand->inter.mv[0][0];
mv_cand[candidates][1] = cand->inter.mv[0][1];
candidates++;
if (candidates == IBC_MRG_MAX_NUM_CANDS) return;
}
}
}
// Fill with (0,0)
while (candidates < IBC_MRG_MAX_NUM_CANDS) {
mv_cand[candidates][0] = 0;
mv_cand[candidates][1] = 0;
candidates++;
}
}
/**
* \brief Get merge candidates for current block.
*
@ -1093,92 +1414,6 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
}
}
static INLINE mv_t get_scaled_mv(mv_t mv, int scale)
{
int32_t scaled = scale * mv;
return CLIP(-131072, 131071, (scaled + 127 + (scaled < 0)) >> 8);
}
#define MV_EXPONENT_BITCOUNT 4
#define MV_MANTISSA_BITCOUNT 6
#define MV_MANTISSA_UPPER_LIMIT ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1)
#define MV_MANTISSA_LIMIT (1 << (MV_MANTISSA_BITCOUNT - 1))
#define MV_EXPONENT_MASK ((1 << MV_EXPONENT_BITCOUNT) - 1)
static int convert_mv_fixed_to_float(int32_t val)
{
uint32_t sign = val >> 31;
int scale = uvg_math_floor_log2((val ^ sign) | MV_MANTISSA_UPPER_LIMIT) - (MV_MANTISSA_BITCOUNT - 1);
int exponent;
uint32_t mantissa;
if (scale >= 0)
{
int round = (1 << scale) >> 1;
int n = (val + round) >> scale;
exponent = scale + ((n ^ sign) >> (MV_MANTISSA_BITCOUNT - 1));
mantissa = (n & MV_MANTISSA_UPPER_LIMIT) | (sign << (MV_MANTISSA_BITCOUNT - 1));
}
else
{
exponent = 0;
mantissa = val;
}
return exponent | (mantissa << MV_EXPONENT_BITCOUNT);
}
static int convert_mv_float_to_fixed(int val)
{
int exponent = val & MV_EXPONENT_MASK;
uint32_t mantissa = val >> MV_EXPONENT_BITCOUNT;
return exponent == 0 ? mantissa : (mantissa ^ MV_MANTISSA_LIMIT) << (exponent - 1);
}
static int round_mv_comp(int x)
{
return convert_mv_float_to_fixed(convert_mv_fixed_to_float(x));
}
static void apply_mv_scaling_pocs(int32_t current_poc,
int32_t current_ref_poc,
int32_t neighbor_poc,
int32_t neighbor_ref_poc,
mv_t mv_cand[2])
{
int32_t diff_current = current_poc - current_ref_poc;
int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc;
if (diff_current == diff_neighbor) return;
diff_current = CLIP(-128, 127, diff_current);
diff_neighbor = CLIP(-128, 127, diff_neighbor);
int scale = CLIP(-4096, 4095,
(diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6);
mv_cand[0] = get_scaled_mv(mv_cand[0], scale);
mv_cand[1] = get_scaled_mv(mv_cand[1], scale);
}
static INLINE void apply_mv_scaling(const encoder_state_t *state,
const cu_info_t *current_cu,
const cu_info_t *neighbor_cu,
int8_t current_reflist,
int8_t neighbor_reflist,
mv_t mv_cand[2])
{
apply_mv_scaling_pocs(state->frame->poc,
state->frame->ref->pocs[
state->frame->ref_LX[current_reflist][
current_cu->inter.mv_ref[current_reflist]]],
state->frame->poc,
state->frame->ref->pocs[
state->frame->ref_LX[neighbor_reflist][
neighbor_cu->inter.mv_ref[neighbor_reflist]]],
mv_cand);
}
/**
* \brief Try to add a temporal MVP or merge candidate.
*
@ -1246,41 +1481,6 @@ static bool add_temporal_candidate(const encoder_state_t *state,
return true;
}
static INLINE bool add_mvp_candidate(const encoder_state_t *state,
const cu_info_t *cur_cu,
const cu_info_t *cand,
int8_t reflist,
bool scaling,
mv_t mv_cand_out[2])
{
if (!cand) return false;
assert(cand->inter.mv_dir != 0);
for (int i = 0; i < 2; i++) {
const int cand_list = i == 0 ? reflist : !reflist;
if ((cand->inter.mv_dir & (1 << cand_list)) == 0) continue;
if (scaling) {
mv_cand_out[0] = cand->inter.mv[cand_list][0];
mv_cand_out[1] = cand->inter.mv[cand_list][1];
apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out);
return true;
}
if (state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] ==
state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]])
{
mv_cand_out[0] = cand->inter.mv[cand_list][0];
mv_cand_out[1] = cand->inter.mv[cand_list][1];
return true;
}
}
return false;
}
/**
* \brief Pick two mv candidates from the spatial and temporal candidates.
*/
@ -1407,14 +1607,20 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
{
merge_candidates_t merge_cand = { 0 };
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
get_spatial_merge_candidates(x, y, width, height,
state->tile->frame->width,
state->tile->frame->height,
lcu,
&merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp);
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
if (cur_cu->type == CU_IBC) {
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
} else {
get_spatial_merge_candidates(x, y, width, height,
state->tile->frame->width,
state->tile->frame->height,
lcu,
&merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp);
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
}
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
}
@ -1443,17 +1649,30 @@ void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state,
merge_candidates_t merge_cand = { 0 };
const cu_array_t *cua = state->tile->frame->cu_array;
get_spatial_merge_candidates_cua(cua,
x, y, width, height,
state->tile->frame->width, state->tile->frame->height,
&merge_cand, state->encoder_control->cfg.wpp);
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
if (cur_cu->type == CU_IBC) {
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
get_ibc_merge_candidates(state, cur_cu, NULL,cua,x, y, width, height,ibc_mv_cand);
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
} else {
get_spatial_merge_candidates_cua(cua,
x, y, width, height,
state->tile->frame->width, state->tile->frame->height,
&merge_cand, state->encoder_control->cfg.wpp);
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
}
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
}
/**
\brief Checks if two CUs have similar motion vectors. The function takes two CUs and compares their motion vectors.
\param cu1 first CU
\param cu2 second CU
\return returns 0 if the two CUs have dissimilar motion vectors, and 1 if the motions are similar.
*/
static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2)
{
if (!cu2) return false;
@ -1472,6 +1691,16 @@ static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2)
return true;
}
/**
* Adds a merge candidate to the list of possible candidates, if it is not a duplicate.
*
* \param cand The candidate to be added.
* \param possible_duplicate1 The first possible duplicate candidate to check for duplication.
* \param possible_duplicate2 The second possible duplicate candidate to check for duplication.
* \param merge_cand_out The output parameter to store the merge candidate information.
*
* @return Returns true if the merge candidate was added successfully, false otherwise.
*/
static bool add_merge_candidate(const cu_info_t *cand,
const cu_info_t *possible_duplicate1,
const cu_info_t *possible_duplicate2,
@ -1503,14 +1732,23 @@ static void hmvp_shift_lut(cu_info_t* lut, int32_t size, int32_t start, int32_t
}
}
static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu) {
static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu, bool ibc) {
int8_t duplicate = -1;
for (int i = 0; i < size; i++) {
if (is_duplicate_candidate(cu, (const cu_info_t*)&lut[i])) {
duplicate = i;
break;
if (ibc) {
for (int i = 0; i < size; i++) {
if (is_duplicate_candidate_ibc(cu, (const cu_info_t *)&lut[i])) {
duplicate = i;
break;
}
}
} else {
for (int i = 0; i < size; i++) {
if (is_duplicate_candidate(cu, (const cu_info_t *)&lut[i])) {
duplicate = i;
break;
}
}
}
// If duplicate found, shift the whole lut up to the duplicate, otherwise to the end
@ -1534,21 +1772,28 @@ static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu
void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu)
{
//if (!cu.geoFlag && !cu.affine)
if(cu->type == CU_INTER)
if(cu->type != CU_INTRA)
{
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
const uint32_t xBr = block_width + pic_x;
const uint32_t yBr = block_height + pic_y;
bool hmvp_possible = ((xBr >> parallel_merge_level) > (pic_x >> parallel_merge_level)) && ((yBr >> parallel_merge_level) > (pic_y >> parallel_merge_level));
if (hmvp_possible) { // ToDo: check for IBC
if (hmvp_possible || cu->type == CU_IBC) {
const uint32_t ctu_row = (pic_y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut[ctu_row_mul_five], state->tile->frame->hmvp_size[ctu_row], cu);
if(add_row && state->tile->frame->hmvp_size[ctu_row] < MAX_NUM_HMVP_CANDS) {
state->tile->frame->hmvp_size[ctu_row]++;
if (cu->type == CU_IBC) {
bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], state->tile->frame->hmvp_size_ibc[ctu_row], cu, true);
if(add_row && state->tile->frame->hmvp_size_ibc[ctu_row] < MAX_NUM_HMVP_CANDS) {
state->tile->frame->hmvp_size_ibc[ctu_row]++;
}
} else {
bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut[ctu_row_mul_five], state->tile->frame->hmvp_size[ctu_row], cu, false);
if(add_row && state->tile->frame->hmvp_size[ctu_row] < MAX_NUM_HMVP_CANDS) {
state->tile->frame->hmvp_size[ctu_row]++;
}
}
}
}
@ -1652,6 +1897,19 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
merge_candidates_t merge_cand = { 0 };
const uint8_t max_num_cands = state->encoder_control->cfg.max_merge;
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
if(cur_cu->type == CU_IBC) {
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) {
mv_cand[i].dir = 1;
mv_cand[i].mv[0][0] = ibc_mv_cand[i][0];
mv_cand[i].mv[0][1] = ibc_mv_cand[i][1];
}
return IBC_MRG_MAX_NUM_CANDS;
}
get_spatial_merge_candidates(x, y, width, height,
state->tile->frame->width,
state->tile->frame->height,
@ -1721,7 +1979,6 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
for (int i = 0; i < num_cand; i++) {
const cu_info_t* hmvp_cand = &state->tile->frame->hmvp_lut[ctu_row_mul_five + i];
// ToDo: Add IBC condition
if (i > 1 || ((!is_duplicate_candidate(hmvp_cand, a[1]))
&& (!is_duplicate_candidate(hmvp_cand, b[1]))) ) {
mv_cand[candidates].mv[0][0] = state->tile->frame->hmvp_lut[ctu_row_mul_five + i].inter.mv[0][0];

103
src/rdo.c
View file

@ -1773,6 +1773,109 @@ double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
return bits;
}
/** MVD cost calculation with CABAC
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding
*/
double uvg_calc_ibc_mvd_cost_cabac(const encoder_state_t * state,
int x,
int y,
int mv_shift,
mv_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
double* bitcost)
{
cabac_data_t state_cabac_copy;
cabac_data_t* cabac;
uint32_t merge_idx;
vector2d_t mvd = { 0, 0 };
int8_t merged = 0;
int8_t cur_mv_cand = 0;
x *= 1 << mv_shift;
y *= 1 << mv_shift;
// Check every candidate to find a match
for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x &&
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y)
{
merged = 1;
break;
}
}
// Store cabac state and contexts
memcpy(&state_cabac_copy, &state->search_cabac, sizeof(cabac_data_t));
// Clear bytes and bits and set mode to "count"
state_cabac_copy.only_count = 1;
cabac = &state_cabac_copy;
double bits = 0;
if (!merged) {
vector2d_t mvd1 = {
x - mv_cand[0][0],
y - mv_cand[0][1],
};
vector2d_t mvd2 = {
x - mv_cand[1][0],
y - mv_cand[1][1],
};
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1);
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
double cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
double cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
cur_mv_cand = 1;
mvd = mvd2;
} else {
mvd = mvd1;
}
}
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), merged, bits, "MergeFlag");
num_cand = state->encoder_control->cfg.max_merge;
if (merged) {
if (num_cand > 1) {
int32_t ui;
for (ui = 0; ui < num_cand - 1; ui++) {
int32_t symbol = (ui != merge_idx);
if (ui == 0) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
} else {
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
bits += 1;
}
if (symbol == 0) break;
}
}
} else {
// It is safe to drop const here because cabac->only_count is set.
uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y, &bits);
// Signal which candidate MV to use
cabac->cur_ctx = &(cabac->ctx.mvp_idx_model);
CABAC_BIN(cabac, cur_mv_cand, "mvp_flag");
}
*bitcost = bits;
// Store bitcost before restoring cabac
return *bitcost * state->lambda_sqrt;
}
/** MVD cost calculation with CABAC
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding

View file

@ -88,6 +88,7 @@ uint32_t uvg_get_coded_level(encoder_state_t * state, double* coded_cost, double
int32_t q_bits,double temp, int8_t last, int8_t type);
uvg_mvd_cost_func uvg_calc_mvd_cost_cabac;
uvg_mvd_cost_func uvg_calc_ibc_mvd_cost_cabac;
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
const cabac_data_t* cabac,

View file

@ -45,6 +45,7 @@
#include "rdo.h"
#include "search_inter.h"
#include "search_intra.h"
#include "search_ibc.h"
#include "threadqueue.h"
#include "transform.h"
#include "videoframe.h"
@ -179,7 +180,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
}
}
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type)
{
const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
const int num_pu = uvg_part_mode_num_parts[part_mode];
@ -191,7 +192,7 @@ static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
const int height_pu = PU_GET_H(part_mode, cu_width, i);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
pu->type = CU_INTER;
pu->type = type;
lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
}
}
@ -306,7 +307,7 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
lcu_t *const lcu)
{
const int width = LCU_WIDTH >> depth;
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
// cur_cu is used for TU parameters.
@ -380,7 +381,7 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
double tr_tree_bits = 0;
double coeff_bits = 0;
@ -477,7 +478,7 @@ static double cu_rd_cost_tr_split_accurate(
enum uvg_tree_type tree_type) {
const int width = LCU_WIDTH >> depth;
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
// cur_cu is used for TU parameters.
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
@ -499,7 +500,7 @@ static double cu_rd_cost_tr_split_accurate(
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
// Only need to signal coded block flag if not skipped or merged
// skip = no coded residual, merge = coded residual
if (pred_cu->type == CU_INTER && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
if (pred_cu->type != CU_INTRA && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
}
@ -803,9 +804,12 @@ static double search_cu(
cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS];
uint8_t hmvp_lut_size = state->tile->frame->hmvp_size[ctu_row];
cu_info_t hmvp_lut_ibc[MAX_NUM_HMVP_CANDS];
uint8_t hmvp_lut_size_ibc = state->tile->frame->hmvp_size_ibc[ctu_row];
// Store original HMVP lut before search and restore after, since it's modified
if (state->frame->slicetype != UVG_SLICE_I) memcpy(hmvp_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
if(state->encoder_control->cfg.ibc) memcpy(hmvp_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
struct {
int32_t min;
@ -1006,6 +1010,34 @@ static double search_cu(
}
}
// Simple IBC search
if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I
&& state->encoder_control->cfg.ibc
&& cost > 1000
&& cu_width > 4
&& (x >= cu_width || y >= cu_width)
&& !cur_cu->skipped) {
cu_info_t backup_cu = *cur_cu;
double mode_cost;
double mode_bitcost;
uvg_search_cu_ibc(state,
x, y,
depth,
lcu,
&mode_cost, &mode_bitcost);
if (mode_cost < cost) {
cost = mode_cost;
inter_bitcost = mode_bitcost;
cur_cu->type = CU_IBC;
cur_cu->inter.mv_dir = 1;
cur_cu->joint_cb_cr = 0;
} else {
*cur_cu = backup_cu;
}
}
// Reconstruct best mode because we need the reconstructed pixels for
// mode search of adjacent CUs.
if (cur_cu->type == CU_INTRA) {
@ -1035,7 +1067,7 @@ static double search_cu(
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
} else if (cur_cu->type == CU_INTER) {
} else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
if (!cur_cu->skipped) {
@ -1081,12 +1113,12 @@ static double search_cu(
inter_bitcost += cur_cu->merge_idx;
}
}
lcu_fill_inter(lcu, x_local, y_local, cu_width);
lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type);
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
}
}
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
double bits = 0;
cabac_data_t* cabac = &state->search_cabac;
cabac->update = 1;
@ -1289,7 +1321,14 @@ static double search_cu(
if (state->frame->slicetype != UVG_SLICE_I) {
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
}
if (state->encoder_control->cfg.ibc) {
memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], hmvp_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size_ibc[ctu_row] = hmvp_lut_size_ibc;
}
// Add candidate when in inter slice or ibc is enabled
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
}
}
@ -1311,7 +1350,14 @@ static double search_cu(
if (state->frame->slicetype != UVG_SLICE_I) {
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
}
if (state->encoder_control->cfg.ibc) {
memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], hmvp_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size_ibc[ctu_row] = hmvp_lut_size_ibc;
}
// Add candidate when in inter slice or ibc is enabled
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
}
}

1389
src/search_ibc.c Normal file

File diff suppressed because it is too large Load diff

55
src/search_ibc.h Normal file
View file

@ -0,0 +1,55 @@
#pragma once
/*****************************************************************************
* This file is part of uvg266 VVC encoder.
*
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
****************************************************************************/
/**
* \ingroup Compression
* \file
* Inter prediction parameter search.
*/
#include "cu.h"
#include "encoderstate.h"
#include "global.h" // IWYU pragma: keep
#include "inter.h"
#include "uvg266.h"
void uvg_search_cu_ibc(encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
double* inter_bitcost);

View file

@ -312,6 +312,55 @@ static void select_starting_point(inter_search_info_t *info,
check_mv_cost(info, extra_mv.x, extra_mv.y, best_cost, best_bits, best_mv);
}
if (info->state->encoder_control->cfg.ibc & 2) {
int origin_x = info->origin.x;
int origin_y = info->origin.y;
int ibc_origin_x = origin_x / UVG_HASHMAP_BLOCKSIZE;
int ibc_origin_y = origin_y / UVG_HASHMAP_BLOCKSIZE;
int own_location = ((origin_x & 0xffff) << 16) | (origin_y & 0xffff);
uint32_t ibc_buffer_row = origin_y / LCU_WIDTH;
uint32_t crc = info->state->tile->frame->ibc_hashmap_pos_to_hash
[(origin_y / UVG_HASHMAP_BLOCKSIZE) *
info->state->tile->frame->ibc_hashmap_pos_to_hash_stride +
origin_x / UVG_HASHMAP_BLOCKSIZE];
uvg_hashmap_node_t *result = uvg_hashmap_search(
info->state->tile->frame->ibc_hashmap_row[ibc_buffer_row], crc);
while (result != NULL) {
if (result->key == crc && result->value != own_location) {
int pos_x = result->value >> 16;
int pos_y = result->value & 0xffff;
int mv_x = pos_x - origin_x;
int mv_y = pos_y - origin_y;
int ibc_pos_x = pos_x / UVG_HASHMAP_BLOCKSIZE;
int ibc_pos_y = pos_y / UVG_HASHMAP_BLOCKSIZE;
bool full_block = true;
for (int ibc_x = 0; ibc_x < info->width / UVG_HASHMAP_BLOCKSIZE; ibc_x++) {
for (int ibc_y = 0; ibc_y < info->height / UVG_HASHMAP_BLOCKSIZE; ibc_y++) {
uint32_t neighbor_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash
[(ibc_pos_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_pos_x + ibc_x];
uint32_t other_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash
[(ibc_origin_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_origin_x + ibc_x];
if (other_crc != neighbor_crc) {
full_block = false;
break;
}
}
if (!full_block) break;
}
if (full_block) check_mv_cost(info, mv_x, mv_y, best_cost, best_bits, best_mv);
}
result = result->next;
}
}
// Go through candidates
for (int32_t i = 0; i < info->num_merge_cand; ++i) {
if (info->merge_cand[i].dir == 3) continue;

View file

@ -793,9 +793,83 @@ static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel*
}
}
INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf)
{
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF];
return crc;
}
INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf)
{
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF];
return crc;
}
static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]);
return crc ^ 0xFFFFFFFF;
}
static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride] + 4);
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride] + 4);
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride] + 4);
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride] + 4);
return crc ^ 0xFFFFFFFF;
}
static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]);
return crc ^ 0xFFFFFFFF;
}
int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
{
bool success = true;
if (bitdepth == 8) {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_8bit_generic);
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic);
} else {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic);
}
success &= uvg_strategyselector_register(opaque, "reg_sad", "generic", 0, &reg_sad_generic);

View file

@ -0,0 +1,95 @@
/*****************************************************************************
* This file is part of uvg266 VVC encoder.
*
* Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
****************************************************************************/
#include "global.h"
#if COMPILE_INTEL_SSE42
#include "uvg266.h"
#include "strategies/sse42/picture-sse42.h"
#include <immintrin.h>
#include <stdlib.h>
#include "strategyselector.h"
static uint32_t uvg_crc32c_4x4_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
{
uint32_t crc = 0xFFFFFFFF;
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[0 * pic_stride]));
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[1 * pic_stride]));
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[2 * pic_stride]));
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[3 * pic_stride]));
return crc ^ 0xFFFFFFFF;
}
static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
{
uint64_t crc = 0xFFFFFFFF;
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride]));
return (uint32_t)(crc ^ 0xFFFFFFFF);
}
static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
{
uint64_t crc = 0xFFFFFFFF;
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[4 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[5 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[6 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[7 * pic_stride]));
return (uint32_t)(crc ^ 0xFFFFFFFF);
}
#endif //COMPILE_INTEL_SSE42
int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) {
bool success = true;
#if COMPILE_INTEL_SSE42
if (bitdepth == 8){
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_8bit_sse42);
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42);
} else {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42);
}
#endif
return success;
}

View file

@ -0,0 +1,45 @@
#pragma once
/*****************************************************************************
* This file is part of uvg266 VVC encoder.
*
* Copyright (c) 2022, Tampere University, ITU/ISO/IEC, project contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
****************************************************************************/
/**
* \ingroup Optimization
* \file
* Optimizations for SSE4.2.
*/
#include "global.h" // IWYU pragma: keep
#include "uvg266.h"
int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth);

View file

@ -41,6 +41,8 @@
// Define function pointers.
crc32c_4x4_func * uvg_crc32c_4x4 = 0;
crc32c_8x8_func * uvg_crc32c_8x8 = 0;
reg_sad_func * uvg_reg_sad = 0;
cost_pixel_nxn_func * uvg_sad_4x4 = 0;
@ -83,6 +85,8 @@ pixel_var_func *uvg_pixel_var = 0;
generate_residual_func *uvg_generate_residual = 0;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
bool success = true;
@ -94,6 +98,9 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
if (uvg_g_hardware_flags.intel_flags.sse41) {
success &= uvg_strategy_register_picture_sse41(opaque, bitdepth);
}
if (uvg_g_hardware_flags.intel_flags.sse42) {
success &= uvg_strategy_register_picture_sse42(opaque, bitdepth);
}
if (uvg_g_hardware_flags.intel_flags.avx2) {
success &= uvg_strategy_register_picture_avx2(opaque, bitdepth);
}
@ -206,3 +213,50 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
return NULL;
}
}
// Precomputed CRC32C lookup table for polynomial 0x04C11DB7
const uint32_t uvg_crc_table[256] = {
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c,
0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c,
0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc,
0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512,
0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad,
0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf,
0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f,
0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f,
0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e,
0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e,
0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de,
0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4,
0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b,
0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5,
0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975,
0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905,
0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8,
0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8,
0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78,
0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6,
0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69,
0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
};

View file

@ -151,7 +151,16 @@ typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
extern const uint32_t uvg_crc_table[256];
typedef uint32_t(crc32c_4x4_func)(const uvg_pixel *buf, uint32_t pic_stride);
typedef uint32_t(crc32c_8x8_func)(const uvg_pixel *buf, uint32_t pic_stride);
// Declare function pointers.
extern crc32c_4x4_func * uvg_crc32c_4x4;
extern crc32c_8x8_func * uvg_crc32c_8x8;
extern reg_sad_func * uvg_reg_sad;
extern cost_pixel_nxn_func * uvg_sad_4x4;
@ -198,6 +207,8 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
#define STRATEGIES_PICTURE_EXPORTS \
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
{"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \
{"reg_sad", (void**) &uvg_reg_sad}, \
{"sad_4x4", (void**) &uvg_sad_4x4}, \
{"sad_8x8", (void**) &uvg_sad_8x8}, \

View file

@ -543,6 +543,8 @@ typedef struct uvg_config
uint8_t dual_tree;
uint8_t intra_rough_search_levels;
uint8_t ibc; /* \brief Intra Block Copy parameter */
} uvg_config;
/**

View file

@ -104,6 +104,8 @@ int uvg_videoframe_free(videoframe_t * const frame)
free(frame);
return 1;
}

View file

@ -41,6 +41,7 @@
#include "cu.h"
#include "global.h" // IWYU pragma: keep
#include "uvg266.h"
#include "hashmap.h"
/**
@ -77,12 +78,22 @@ typedef struct videoframe
struct param_set_map* alf_param_set_map;
int32_t poc; //!< \brief Picture order count
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row
uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row
uint32_t *ibc_hashmap_pos_to_hash; //!< \brief Hashmap reverse search for position to hash
uint32_t ibc_hashmap_pos_to_hash_stride; //!< \brief Hashmap position to hash stride
cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row
uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uint8_t* hmvp_size; //!< \brief HMVP LUT size
bool source_lmcs_mapped; //!< \brief Indicate if source_lmcs is available and mapped to LMCS
bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated
bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS
} videoframe_t;

View file

@ -14,4 +14,5 @@ valgrind_test $common_args --gop=8 --subme=4 --bipred --tmvp
valgrind_test $common_args --transform-skip --tr-skip-max-size=5
valgrind_test $common_args --vaq=8
valgrind_test $common_args --vaq=8 --bitrate 350000
valgrind_test $common_args --vaq=8 --rc-algorithm oba --bitrate 350000
valgrind_test $common_args --vaq=8 --rc-algorithm oba --bitrate 350000
valgrind_test $common_args --ibc=1