mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 12:44:07 +00:00
Merge branch 'implement_ibc'
This commit is contained in:
commit
1a1fea1a19
|
@ -145,6 +145,7 @@ target_include_directories(uvg266 PUBLIC src/strategies)
|
||||||
|
|
||||||
file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c")
|
file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c")
|
||||||
file(GLOB LIB_SOURCES_STRATEGIES_SSE41 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse41/*.c")
|
file(GLOB LIB_SOURCES_STRATEGIES_SSE41 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse41/*.c")
|
||||||
|
file(GLOB LIB_SOURCES_STRATEGIES_SSE42 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse42/*.c")
|
||||||
|
|
||||||
set(CLI_SOURCES "src/encmain.c" "src/cli.c" "src/cli.h" "src/yuv_io.c" "src/yuv_io.h")
|
set(CLI_SOURCES "src/encmain.c" "src/cli.c" "src/cli.h" "src/yuv_io.c" "src/yuv_io.h")
|
||||||
|
|
||||||
|
@ -176,6 +177,7 @@ else()
|
||||||
if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2)
|
if(${CMAKE_SYSTEM_PROCESSOR} IN_LIST ALLOW_AVX2)
|
||||||
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" )
|
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_AVX2} APPEND PROPERTY COMPILE_FLAGS "-mavx2 -mbmi -mpopcnt -mlzcnt -mbmi2" )
|
||||||
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )
|
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE41} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )
|
||||||
|
set_property( SOURCE ${LIB_SOURCES_STRATEGIES_SSE42} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" )
|
||||||
endif()
|
endif()
|
||||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
|
@ -200,7 +202,7 @@ file(GLOB SOURCE_GROUP_CABAC RELATIVE ${PROJECT_SOURCE_DIR} "src/bitstream.*" "s
|
||||||
file(GLOB SOURCE_GROUP_COMPRESSION RELATIVE ${PROJECT_SOURCE_DIR} "src/search*" "src/rdo.*" "src/fast_coeff*")
|
file(GLOB SOURCE_GROUP_COMPRESSION RELATIVE ${PROJECT_SOURCE_DIR} "src/search*" "src/rdo.*" "src/fast_coeff*")
|
||||||
file(GLOB SOURCE_GROUP_CONSTRAINT RELATIVE ${PROJECT_SOURCE_DIR} "src/constraint.*" "src/ml_*")
|
file(GLOB SOURCE_GROUP_CONSTRAINT RELATIVE ${PROJECT_SOURCE_DIR} "src/constraint.*" "src/ml_*")
|
||||||
file(GLOB SOURCE_GROUP_CONTROL RELATIVE ${PROJECT_SOURCE_DIR} "src/cfg.*" "src/encoder.*" "src/encoder_state-c*" "src/encoder_state-g*" "src/encoderstate*" "src/gop.*" "src/input_frame_buffer.*" "src/uvg266*" "src/rate_control.*" "src/mip_data.h")
|
file(GLOB SOURCE_GROUP_CONTROL RELATIVE ${PROJECT_SOURCE_DIR} "src/cfg.*" "src/encoder.*" "src/encoder_state-c*" "src/encoder_state-g*" "src/encoderstate*" "src/gop.*" "src/input_frame_buffer.*" "src/uvg266*" "src/rate_control.*" "src/mip_data.h")
|
||||||
file(GLOB SOURCE_GROUP_DATA_STRUCTURES RELATIVE ${PROJECT_SOURCE_DIR} "src/cu.*" "src/image.*" "src/imagelist.*" "src/videoframe.*")
|
file(GLOB SOURCE_GROUP_DATA_STRUCTURES RELATIVE ${PROJECT_SOURCE_DIR} "src/cu.*" "src/image.*" "src/imagelist.*" "src/videoframe.*" "src/hashmap.*")
|
||||||
file(GLOB SOURCE_GROUP_EXTRAS RELATIVE ${PROJECT_SOURCE_DIR} "src/extras/*.h" "src/extras/*.c")
|
file(GLOB SOURCE_GROUP_EXTRAS RELATIVE ${PROJECT_SOURCE_DIR} "src/extras/*.h" "src/extras/*.c")
|
||||||
file(GLOB_RECURSE SOURCE_GROUP_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.h" "src/strategies/*.c")
|
file(GLOB_RECURSE SOURCE_GROUP_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.h" "src/strategies/*.c")
|
||||||
file(GLOB SOURCE_GROUP_RECON RELATIVE ${PROJECT_SOURCE_DIR} "src/alf.*" "src/filter.*" "src/inter.*" "src/intra.*" "src/reshape.*" "src/sao.*" "src/scalinglist.*" "src/tables.*" "src/transform.*")
|
file(GLOB SOURCE_GROUP_RECON RELATIVE ${PROJECT_SOURCE_DIR} "src/alf.*" "src/filter.*" "src/inter.*" "src/intra.*" "src/reshape.*" "src/sao.*" "src/scalinglist.*" "src/tables.*" "src/transform.*")
|
||||||
|
|
|
@ -122,6 +122,7 @@ typedef struct
|
||||||
cabac_ctx_t transform_skip_gt2[5];
|
cabac_ctx_t transform_skip_gt2[5];
|
||||||
cabac_ctx_t cclm_flag;
|
cabac_ctx_t cclm_flag;
|
||||||
cabac_ctx_t cclm_model;
|
cabac_ctx_t cclm_model;
|
||||||
|
cabac_ctx_t ibc_flag[3];
|
||||||
|
|
||||||
} ctx;
|
} ctx;
|
||||||
} cabac_data_t;
|
} cabac_data_t;
|
||||||
|
|
12
src/cfg.c
12
src/cfg.c
|
@ -222,6 +222,9 @@ int uvg_config_init(uvg_config *cfg)
|
||||||
|
|
||||||
cfg->dual_tree = 0;
|
cfg->dual_tree = 0;
|
||||||
cfg->intra_rough_search_levels = 2;
|
cfg->intra_rough_search_levels = 2;
|
||||||
|
|
||||||
|
cfg->ibc = 0;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1479,7 +1482,14 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
||||||
else if OPT("intra-rough-granularity") {
|
else if OPT("intra-rough-granularity") {
|
||||||
cfg->intra_rough_search_levels = atoi(value);
|
cfg->intra_rough_search_levels = atoi(value);
|
||||||
}
|
}
|
||||||
else {
|
else if OPT ("ibc") {
|
||||||
|
int ibc_value = atoi(value);
|
||||||
|
if (ibc_value < 0 || ibc_value > 2) {
|
||||||
|
fprintf(stderr, "ibc supports only range from 0 to 2\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
cfg->ibc = (uint8_t)ibc_value;
|
||||||
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#undef OPT
|
#undef OPT
|
||||||
|
|
|
@ -192,6 +192,7 @@ static const struct option long_options[] = {
|
||||||
{ "no-dual-tree", no_argument, NULL, 0 },
|
{ "no-dual-tree", no_argument, NULL, 0 },
|
||||||
{ "cabac-debug-file", required_argument, NULL, 0 },
|
{ "cabac-debug-file", required_argument, NULL, 0 },
|
||||||
{ "intra-rough-granularity",required_argument, NULL, 0 },
|
{ "intra-rough-granularity",required_argument, NULL, 0 },
|
||||||
|
{ "ibc", required_argument, NULL, 0 },
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -423,6 +423,13 @@ static const uint8_t INIT_CCLM_MODEL[4] = {
|
||||||
9,
|
9,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const uint8_t INIT_IBC_FLAG[4][3] = {
|
||||||
|
{ 0, 43, 45, },
|
||||||
|
{ 0, 57, 44, },
|
||||||
|
{ 17, 42, 36, },
|
||||||
|
{ 1, 5, 8, },
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
static const uint16_t g_inistateToCount[128] = {
|
static const uint16_t g_inistateToCount[128] = {
|
||||||
614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342,
|
614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342,
|
||||||
|
@ -514,6 +521,7 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
|
||||||
uvg_ctx_init(&cabac->ctx.lfnst_idx_model[i], QP, INIT_LFNST_IDX[slice][i], INIT_LFNST_IDX[3][i]);
|
uvg_ctx_init(&cabac->ctx.lfnst_idx_model[i], QP, INIT_LFNST_IDX[slice][i], INIT_LFNST_IDX[3][i]);
|
||||||
uvg_ctx_init(&cabac->ctx.transform_skip_sig_coeff_group[i], QP, INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[slice][i], INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[3][i]);
|
uvg_ctx_init(&cabac->ctx.transform_skip_sig_coeff_group[i], QP, INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[slice][i], INIT_TRANSFORM_SKIP_SIG_COEFF_GROUP[3][i]);
|
||||||
uvg_ctx_init(&cabac->ctx.transform_skip_sig[i], QP, INIT_TRANSFORM_SKIP_SIG[slice][i], INIT_TRANSFORM_SKIP_SIG[3][i]);
|
uvg_ctx_init(&cabac->ctx.transform_skip_sig[i], QP, INIT_TRANSFORM_SKIP_SIG[slice][i], INIT_TRANSFORM_SKIP_SIG[3][i]);
|
||||||
|
uvg_ctx_init(&cabac->ctx.ibc_flag[i], QP, INIT_IBC_FLAG[slice][i], INIT_IBC_FLAG[3][i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
|
|
3
src/cu.h
3
src/cu.h
|
@ -52,6 +52,7 @@ typedef enum {
|
||||||
CU_INTRA = 1,
|
CU_INTRA = 1,
|
||||||
CU_INTER = 2,
|
CU_INTER = 2,
|
||||||
CU_PCM = 3,
|
CU_PCM = 3,
|
||||||
|
CU_IBC = 4,
|
||||||
} cu_type_t;
|
} cu_type_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -146,7 +147,7 @@ enum uvg_tree_type {
|
||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
uint8_t type : 2; //!< \brief block type, one of cu_type_t values
|
uint8_t type : 3; //!< \brief block type, one of cu_type_t values
|
||||||
uint8_t depth : 3; //!< \brief depth / size of this block
|
uint8_t depth : 3; //!< \brief depth / size of this block
|
||||||
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
|
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
|
||||||
uint8_t tr_depth : 3; //!< \brief transform depth
|
uint8_t tr_depth : 3; //!< \brief transform depth
|
||||||
|
|
|
@ -834,7 +834,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
|
||||||
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, width, height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
|
if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, width, height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
if (state->frame->slicetype == UVG_SLICE_B) {
|
if (state->frame->slicetype == UVG_SLICE_B && cur_cu->type != CU_IBC) {
|
||||||
// Code Inter Dir
|
// Code Inter Dir
|
||||||
uint8_t inter_dir = cur_cu->inter.mv_dir;
|
uint8_t inter_dir = cur_cu->inter.mv_dir;
|
||||||
|
|
||||||
|
@ -860,7 +860,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
|
||||||
// size of the current reference index list (L0/L1)
|
// size of the current reference index list (L0/L1)
|
||||||
uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx];
|
uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx];
|
||||||
|
|
||||||
if (ref_LX_size > 1) {
|
if (ref_LX_size > 1 && cur_cu->type != CU_IBC) {
|
||||||
// parseRefFrmIdx
|
// parseRefFrmIdx
|
||||||
int32_t ref_frame = cur_cu->inter.mv_ref[ref_list_idx];
|
int32_t ref_frame = cur_cu->inter.mv_ref[ref_list_idx];
|
||||||
|
|
||||||
|
@ -906,7 +906,7 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
|
||||||
mv_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0];
|
mv_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0];
|
||||||
mv_t mvd_ver = cur_cu->inter.mv[ref_list_idx][1] - mv_cand[cu_mv_cand][1];
|
mv_t mvd_ver = cur_cu->inter.mv[ref_list_idx][1] - mv_cand[cu_mv_cand][1];
|
||||||
|
|
||||||
uvg_change_precision(INTERNAL_MV_PREC, uvg_g_imv_to_prec[UVG_IMV_OFF], &mvd_hor, &mvd_ver);
|
uvg_change_precision(INTERNAL_MV_PREC, uvg_g_imv_to_prec[(cur_cu->type == CU_IBC)?UVG_IMV_FPEL:UVG_IMV_OFF], &mvd_hor, &mvd_ver);
|
||||||
uvg_encode_mvd(state, cabac, mvd_hor, mvd_ver, bits_out);
|
uvg_encode_mvd(state, cabac, mvd_hor, mvd_ver, bits_out);
|
||||||
|
|
||||||
non_zero_mvd |= (mvd_hor != 0) || (mvd_ver != 0);
|
non_zero_mvd |= (mvd_hor != 0) || (mvd_ver != 0);
|
||||||
|
@ -1262,95 +1262,6 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
|
||||||
if (cabac->only_count && bits_out) *bits_out += bits;
|
if (cabac->only_count && bits_out) *bits_out += bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
static void encode_part_mode(encoder_state_t * const state,
|
|
||||||
cabac_data_t * const cabac,
|
|
||||||
const cu_info_t * const cur_cu,
|
|
||||||
int depth)
|
|
||||||
{
|
|
||||||
// Binarization from Table 9-34 of the HEVC spec:
|
|
||||||
//
|
|
||||||
// | log2CbSize > | log2CbSize ==
|
|
||||||
// | MinCbLog2SizeY | MinCbLog2SizeY
|
|
||||||
// -------+-------+----------+---------+-----------+----------
|
|
||||||
// pred | part | AMP | AMP | |
|
|
||||||
// mode | mode | disabled | enabled | size == 8 | size > 8
|
|
||||||
// -------+-------+----------+---------+-----------+----------
|
|
||||||
// intra | 2Nx2N | - - | 1 1
|
|
||||||
// | NxN | - - | 0 0
|
|
||||||
// -------+-------+--------------------+----------------------
|
|
||||||
// inter | 2Nx2N | 1 1 | 1 1
|
|
||||||
// | 2NxN | 01 011 | 01 01
|
|
||||||
// | Nx2N | 00 001 | 00 001
|
|
||||||
// | NxN | - - | - 000
|
|
||||||
// | 2NxnU | - 0100 | - -
|
|
||||||
// | 2NxnD | - 0101 | - -
|
|
||||||
// | nLx2N | - 0000 | - -
|
|
||||||
// | nRx2N | - 0001 | - -
|
|
||||||
// -------+-------+--------------------+----------------------
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// Context indices from Table 9-37 of the HEVC spec:
|
|
||||||
//
|
|
||||||
// binIdx
|
|
||||||
// | 0 1 2 3
|
|
||||||
// ------------------------------+------------------
|
|
||||||
// log2CbSize == MinCbLog2SizeY | 0 1 2 bypass
|
|
||||||
// log2CbSize > MinCbLog2SizeY | 0 1 3 bypass
|
|
||||||
// ------------------------------+------------------
|
|
||||||
double bits = 0;
|
|
||||||
if (cur_cu->type == CU_INTRA) {
|
|
||||||
if (depth == MAX_DEPTH) {
|
|
||||||
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
|
|
||||||
if (cur_cu->part_size == SIZE_2Nx2N) {
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
|
|
||||||
} else {
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode NxN");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
|
|
||||||
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
|
|
||||||
if (cur_cu->part_size == SIZE_2Nx2N) {
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
|
|
||||||
return bits;
|
|
||||||
}
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode split");
|
|
||||||
|
|
||||||
cabac->cur_ctx = &(cabac->ctx.part_size_model[1]);
|
|
||||||
if (cur_cu->part_size == SIZE_2NxN ||
|
|
||||||
cur_cu->part_size == SIZE_2NxnU ||
|
|
||||||
cur_cu->part_size == SIZE_2NxnD) {
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 1, bits, "part_mode vertical");
|
|
||||||
} else {
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 0, bits, "part_mode horizontal");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state->encoder_control->cfg.amp_enable && depth < MAX_DEPTH) {
|
|
||||||
cabac->cur_ctx = &(cabac->ctx.part_size_model[3]);
|
|
||||||
|
|
||||||
if (cur_cu->part_size == SIZE_2NxN ||
|
|
||||||
cur_cu->part_size == SIZE_Nx2N) {
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 1, bits, "part_mode SMP");
|
|
||||||
return bits;
|
|
||||||
}
|
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 0, bits, "part_mode AMP");
|
|
||||||
|
|
||||||
if (cur_cu->part_size == SIZE_2NxnU ||
|
|
||||||
cur_cu->part_size == SIZE_nLx2N) {
|
|
||||||
CABAC_BINS_EP(cabac, 0, 1, "part_mode AMP");
|
|
||||||
if(cabac->only_count) bits += 1;
|
|
||||||
} else {
|
|
||||||
CABAC_BINS_EP(cabac, 1, 1, "part_mode AMP");
|
|
||||||
if(cabac->only_count) bits += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return bits;
|
|
||||||
}
|
|
||||||
**/
|
|
||||||
|
|
||||||
|
|
||||||
bool uvg_write_split_flag(
|
bool uvg_write_split_flag(
|
||||||
const encoder_state_t * const state,
|
const encoder_state_t * const state,
|
||||||
cabac_data_t* cabac,
|
cabac_data_t* cabac,
|
||||||
|
@ -1547,7 +1458,7 @@ void uvg_encode_coding_tree(
|
||||||
// CABAC_BIN(cabac, 0, "split_transform_flag");
|
// CABAC_BIN(cabac, 0, "split_transform_flag");
|
||||||
}
|
}
|
||||||
|
|
||||||
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1);
|
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1);
|
||||||
|
|
||||||
if (ctrl->cfg.lossless) {
|
if (ctrl->cfg.lossless) {
|
||||||
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
|
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
|
||||||
|
@ -1555,7 +1466,7 @@ void uvg_encode_coding_tree(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Encode skip flag
|
// Encode skip flag
|
||||||
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
|
if ((state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc)) {
|
||||||
|
|
||||||
int8_t ctx_skip = 0;
|
int8_t ctx_skip = 0;
|
||||||
|
|
||||||
|
@ -1565,11 +1476,22 @@ void uvg_encode_coding_tree(
|
||||||
if (above_cu && above_cu->skipped) {
|
if (above_cu && above_cu->skipped) {
|
||||||
ctx_skip++;
|
ctx_skip++;
|
||||||
}
|
}
|
||||||
|
if (cu_width > 4 || state->encoder_control->cfg.ibc) {
|
||||||
cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]);
|
cabac->cur_ctx = &(cabac->ctx.cu_skip_flag_model[ctx_skip]);
|
||||||
CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag");
|
CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag");
|
||||||
|
}
|
||||||
|
|
||||||
if (cur_cu->skipped) {
|
if (cur_cu->skipped) {
|
||||||
|
|
||||||
|
if (state->encoder_control->cfg.ibc && state->frame->slicetype != UVG_SLICE_I)
|
||||||
|
{ // ToDo: Only for luma channel
|
||||||
|
// ToDo: Disable for blocks over 64x64 pixels
|
||||||
|
int8_t ctx_ibc = 0;
|
||||||
|
if (left_cu && left_cu->type == CU_IBC) ctx_ibc++;
|
||||||
|
if (above_cu && above_cu->type == CU_IBC) ctx_ibc++;
|
||||||
|
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
|
||||||
|
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
|
||||||
|
}
|
||||||
DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
|
DBG_PRINT_MV(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
|
||||||
uvg_hmvp_add_mv(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
|
uvg_hmvp_add_mv(state, x, y, (uint32_t)cu_width, (uint32_t)cu_width, cur_cu);
|
||||||
int16_t num_cand = state->encoder_control->cfg.max_merge;
|
int16_t num_cand = state->encoder_control->cfg.max_merge;
|
||||||
|
@ -1597,6 +1519,15 @@ void uvg_encode_coding_tree(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prediction mode
|
// Prediction mode
|
||||||
|
if ((state->frame->slicetype == UVG_SLICE_I || cu_width == 4) && state->encoder_control->cfg.ibc) { // ToDo: Only for luma channel
|
||||||
|
// ToDo: Disable for blocks over 64x64 pixels
|
||||||
|
int8_t ctx_ibc = 0;
|
||||||
|
if (left_cu && left_cu->type == CU_IBC) ctx_ibc++;
|
||||||
|
if (above_cu && above_cu->type == CU_IBC) ctx_ibc++;
|
||||||
|
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
|
||||||
|
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
|
||||||
|
}
|
||||||
|
|
||||||
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
|
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
|
||||||
|
|
||||||
int8_t ctx_predmode = 0;
|
int8_t ctx_predmode = 0;
|
||||||
|
@ -1607,6 +1538,15 @@ void uvg_encode_coding_tree(
|
||||||
|
|
||||||
cabac->cur_ctx = &(cabac->ctx.cu_pred_mode_model[ctx_predmode]);
|
cabac->cur_ctx = &(cabac->ctx.cu_pred_mode_model[ctx_predmode]);
|
||||||
CABAC_BIN(cabac, (cur_cu->type == CU_INTRA), "PredMode");
|
CABAC_BIN(cabac, (cur_cu->type == CU_INTRA), "PredMode");
|
||||||
|
|
||||||
|
// We need IBC flag if the mode is signalled as Inter
|
||||||
|
if (state->encoder_control->cfg.ibc && cur_cu->type != CU_INTRA) {
|
||||||
|
int8_t ctx_ibc = 0;
|
||||||
|
if (left_cu && left_cu->type == CU_IBC) ctx_ibc++;
|
||||||
|
if (above_cu && above_cu->type == CU_IBC) ctx_ibc++;
|
||||||
|
cabac->cur_ctx = &(cabac->ctx.ibc_flag[ctx_ibc]);
|
||||||
|
CABAC_BIN(cabac, (cur_cu->type == CU_IBC), "IBCFlag");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// part_mode
|
// part_mode
|
||||||
|
@ -1657,7 +1597,7 @@ void uvg_encode_coding_tree(
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (cur_cu->type == CU_INTER) {
|
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
|
||||||
uint8_t imv_mode = UVG_IMV_OFF;
|
uint8_t imv_mode = UVG_IMV_OFF;
|
||||||
|
|
||||||
const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size];
|
const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size];
|
||||||
|
@ -1679,10 +1619,10 @@ void uvg_encode_coding_tree(
|
||||||
// 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel
|
// 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel
|
||||||
if (ctrl->cfg.amvr && non_zero_mvd) {
|
if (ctrl->cfg.amvr && non_zero_mvd) {
|
||||||
cabac->cur_ctx = &(cabac->ctx.imv_flag[0]);
|
cabac->cur_ctx = &(cabac->ctx.imv_flag[0]);
|
||||||
CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag");
|
if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag");
|
||||||
if (imv_mode > UVG_IMV_OFF) {
|
if (imv_mode > UVG_IMV_OFF) {
|
||||||
cabac->cur_ctx = &(cabac->ctx.imv_flag[4]);
|
cabac->cur_ctx = &(cabac->ctx.imv_flag[4]);
|
||||||
CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag");
|
if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag");
|
||||||
if (imv_mode < UVG_IMV_HPEL) {
|
if (imv_mode < UVG_IMV_HPEL) {
|
||||||
cabac->cur_ctx = &(cabac->ctx.imv_flag[1]);
|
cabac->cur_ctx = &(cabac->ctx.imv_flag[1]);
|
||||||
CABAC_BIN(cabac, (imv_mode > UVG_IMV_FPEL), "imv_flag"); // 1 indicates 4PEL, 0 FPEL
|
CABAC_BIN(cabac, (imv_mode > UVG_IMV_FPEL), "imv_flag"); // 1 indicates 4PEL, 0 FPEL
|
||||||
|
@ -1860,7 +1800,7 @@ double uvg_mock_encode_coding_unit(
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_pred_mode_model[ctx_predmode]), (cur_cu->type == CU_INTRA), bits, "PredMode");
|
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_pred_mode_model[ctx_predmode]), (cur_cu->type == CU_INTRA), bits, "PredMode");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur_cu->type == CU_INTER) {
|
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
|
||||||
const uint8_t imv_mode = UVG_IMV_OFF;
|
const uint8_t imv_mode = UVG_IMV_OFF;
|
||||||
const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, x, y, cu_width, cu_width, depth, lcu, &bits);
|
const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, x, y, cu_width, cu_width, depth, lcu, &bits);
|
||||||
if (ctrl->cfg.amvr && non_zero_mvd) {
|
if (ctrl->cfg.amvr && non_zero_mvd) {
|
||||||
|
@ -1897,35 +1837,38 @@ void uvg_encode_mvd(encoder_state_t * const state,
|
||||||
const int8_t ver_abs_gr0 = mvd_ver != 0;
|
const int8_t ver_abs_gr0 = mvd_ver != 0;
|
||||||
const uint32_t mvd_hor_abs = abs(mvd_hor);
|
const uint32_t mvd_hor_abs = abs(mvd_hor);
|
||||||
const uint32_t mvd_ver_abs = abs(mvd_ver);
|
const uint32_t mvd_ver_abs = abs(mvd_ver);
|
||||||
|
double temp_bits_out = 0.0;
|
||||||
|
|
||||||
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[0];
|
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[0];
|
||||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), *bits_out, "abs_mvd_greater0_flag_hor");
|
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_hor != 0), temp_bits_out, "abs_mvd_greater0_flag_hor");
|
||||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), *bits_out, "abs_mvd_greater0_flag_ver");
|
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[0], (mvd_ver != 0), temp_bits_out, "abs_mvd_greater0_flag_ver");
|
||||||
|
|
||||||
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[1];
|
cabac->cur_ctx = &cabac->ctx.cu_mvd_model[1];
|
||||||
if (hor_abs_gr0) {
|
if (hor_abs_gr0) {
|
||||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), *bits_out,"abs_mvd_greater1_flag_hor");
|
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_hor_abs>1), temp_bits_out,"abs_mvd_greater1_flag_hor");
|
||||||
}
|
}
|
||||||
if (ver_abs_gr0) {
|
if (ver_abs_gr0) {
|
||||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), *bits_out, "abs_mvd_greater1_flag_ver");
|
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.cu_mvd_model[1], (mvd_ver_abs>1), temp_bits_out, "abs_mvd_greater1_flag_ver");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hor_abs_gr0) {
|
if (hor_abs_gr0) {
|
||||||
if (mvd_hor_abs > 1) {
|
if (mvd_hor_abs > 1) {
|
||||||
uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1);
|
uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_hor_abs - 2, 1);
|
||||||
if(cabac->only_count) *bits_out += bits;
|
if(cabac->only_count) temp_bits_out += bits;
|
||||||
}
|
}
|
||||||
uint32_t mvd_hor_sign = (mvd_hor > 0) ? 0 : 1;
|
uint32_t mvd_hor_sign = (mvd_hor > 0) ? 0 : 1;
|
||||||
CABAC_BIN_EP(cabac, mvd_hor_sign, "mvd_sign_flag_hor");
|
CABAC_BIN_EP(cabac, mvd_hor_sign, "mvd_sign_flag_hor");
|
||||||
if (cabac->only_count) *bits_out += 1;
|
if (cabac->only_count) temp_bits_out += 1;
|
||||||
}
|
}
|
||||||
if (ver_abs_gr0) {
|
if (ver_abs_gr0) {
|
||||||
if (mvd_ver_abs > 1) {
|
if (mvd_ver_abs > 1) {
|
||||||
uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1);
|
uint32_t bits = uvg_cabac_write_ep_ex_golomb(state, cabac, mvd_ver_abs - 2, 1);
|
||||||
if (cabac->only_count) *bits_out += bits;
|
if (cabac->only_count) temp_bits_out += bits;
|
||||||
}
|
}
|
||||||
uint32_t mvd_ver_sign = mvd_ver > 0 ? 0 : 1;
|
uint32_t mvd_ver_sign = mvd_ver > 0 ? 0 : 1;
|
||||||
CABAC_BIN_EP(cabac, mvd_ver_sign, "mvd_sign_flag_ver");
|
CABAC_BIN_EP(cabac, mvd_ver_sign, "mvd_sign_flag_ver");
|
||||||
if (cabac->only_count) *bits_out += 1;
|
if (cabac->only_count) temp_bits_out += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(bits_out) *bits_out = temp_bits_out;
|
||||||
}
|
}
|
||||||
|
|
|
@ -154,9 +154,8 @@ static void encoder_state_write_bitstream_PTL(bitstream_t *stream,
|
||||||
// end Profile Tier
|
// end Profile Tier
|
||||||
|
|
||||||
//uint8_t level = state->encoder_control->cfg.level;
|
//uint8_t level = state->encoder_control->cfg.level;
|
||||||
// ToDo: level hardcoded to 5.2
|
// ToDo: level hardcoded to 6.3
|
||||||
WRITE_U(stream, 86, 8, "general_level_idc");
|
WRITE_U(stream, 105, 8, "general_level_idc");
|
||||||
|
|
||||||
|
|
||||||
WRITE_U(stream, 0, 1, "ptl_frame_only_constraint_flag");
|
WRITE_U(stream, 0, 1, "ptl_frame_only_constraint_flag");
|
||||||
WRITE_U(stream, 0, 1, "ptl_multilayer_enabled_flag");
|
WRITE_U(stream, 0, 1, "ptl_multilayer_enabled_flag");
|
||||||
|
@ -694,7 +693,11 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
||||||
WRITE_UE(stream, 0, "sps_internal_bit_depth_minus_input_bit_depth");
|
WRITE_UE(stream, 0, "sps_internal_bit_depth_minus_input_bit_depth");
|
||||||
}
|
}
|
||||||
|
|
||||||
WRITE_U(stream, 0, 1, "sps_ibc_enabled_flag");
|
WRITE_U(stream, encoder->cfg.ibc > 0 ? 1 : 0, 1, "sps_ibc_enabled_flag");
|
||||||
|
|
||||||
|
if (encoder->cfg.ibc) {
|
||||||
|
WRITE_UE(stream,6 - IBC_MRG_MAX_NUM_CANDS, "sps_six_minus_max_num_ibc_merge_cand");
|
||||||
|
}
|
||||||
|
|
||||||
#if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
|
#if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
|
||||||
// if(!no_ladf_constraint_flag)
|
// if(!no_ladf_constraint_flag)
|
||||||
|
|
|
@ -122,6 +122,31 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
|
||||||
state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
|
state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
|
||||||
state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu);
|
state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu);
|
||||||
|
|
||||||
|
// Allocate the HMVP for IBC in any case
|
||||||
|
state->tile->frame->hmvp_lut_ibc = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
|
||||||
|
state->tile->frame->hmvp_size_ibc = calloc(1, sizeof(uint8_t) * height_in_lcu);
|
||||||
|
|
||||||
|
if (state->encoder_control->cfg.ibc) {
|
||||||
|
// Allocate pixel buffer for each LCU row
|
||||||
|
state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
||||||
|
state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
||||||
|
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
||||||
|
state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu);
|
||||||
|
|
||||||
|
if (state->encoder_control->cfg.ibc & 2) {
|
||||||
|
state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE);
|
||||||
|
state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) *
|
||||||
|
((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
|
||||||
|
state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2);
|
||||||
|
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
|
||||||
|
state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE];
|
||||||
|
state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
state->tile->frame->rec = NULL;
|
state->tile->frame->rec = NULL;
|
||||||
|
|
||||||
state->tile->frame->source = NULL;
|
state->tile->frame->source = NULL;
|
||||||
|
@ -197,6 +222,24 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
|
||||||
FREE_POINTER(state->tile->frame->hmvp_lut);
|
FREE_POINTER(state->tile->frame->hmvp_lut);
|
||||||
FREE_POINTER(state->tile->frame->hmvp_size);
|
FREE_POINTER(state->tile->frame->hmvp_size);
|
||||||
|
|
||||||
|
FREE_POINTER(state->tile->frame->hmvp_lut_ibc);
|
||||||
|
FREE_POINTER(state->tile->frame->hmvp_size_ibc);
|
||||||
|
|
||||||
|
if (state->encoder_control->cfg.ibc) {
|
||||||
|
if (state->encoder_control->cfg.ibc & 2) {
|
||||||
|
FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
|
||||||
|
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
|
||||||
|
uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]);
|
||||||
|
}
|
||||||
|
FREE_POINTER(state->tile->frame->ibc_hashmap_row);
|
||||||
|
FREE_POINTER(state->tile->frame->ibc_buffer_y);
|
||||||
|
FREE_POINTER(state->tile->frame->ibc_buffer_u);
|
||||||
|
FREE_POINTER(state->tile->frame->ibc_buffer_v);
|
||||||
|
}
|
||||||
|
|
||||||
uvg_videoframe_free(state->tile->frame);
|
uvg_videoframe_free(state->tile->frame);
|
||||||
state->tile->frame = NULL;
|
state->tile->frame = NULL;
|
||||||
FREE_POINTER(state->tile->wf_jobs);
|
FREE_POINTER(state->tile->wf_jobs);
|
||||||
|
|
|
@ -45,17 +45,20 @@
|
||||||
#include "encode_coding_tree.h"
|
#include "encode_coding_tree.h"
|
||||||
#include "encoder_state-bitstream.h"
|
#include "encoder_state-bitstream.h"
|
||||||
#include "filter.h"
|
#include "filter.h"
|
||||||
|
#include "hashmap.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "rate_control.h"
|
#include "rate_control.h"
|
||||||
#include "sao.h"
|
#include "sao.h"
|
||||||
#include "search.h"
|
#include "search.h"
|
||||||
#include "tables.h"
|
#include "tables.h"
|
||||||
|
#include "threads.h"
|
||||||
#include "threadqueue.h"
|
#include "threadqueue.h"
|
||||||
#include "alf.h"
|
#include "alf.h"
|
||||||
#include "reshape.h"
|
#include "reshape.h"
|
||||||
|
|
||||||
#include "strategies/strategies-picture.h"
|
#include "strategies/strategies-picture.h"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
|
* \brief Strength of QP adjustments when using adaptive QP for 360 video.
|
||||||
*
|
*
|
||||||
|
@ -251,6 +254,58 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fill IBC buffer
|
||||||
|
if (state->encoder_control->cfg.ibc) {
|
||||||
|
|
||||||
|
uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH >= IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x;
|
||||||
|
uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1;
|
||||||
|
uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH;
|
||||||
|
|
||||||
|
// If the buffer is full shift all the lines LCU_WIDTH left
|
||||||
|
if (lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH) {
|
||||||
|
for (uint32_t i = 0; i < LCU_WIDTH; i++) {
|
||||||
|
memmove(
|
||||||
|
&frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH],
|
||||||
|
&frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH + LCU_WIDTH],
|
||||||
|
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH - LCU_WIDTH));
|
||||||
|
}
|
||||||
|
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||||
|
for (uint32_t i = 0; i < LCU_WIDTH_C; i++) {
|
||||||
|
memmove(
|
||||||
|
&frame->ibc_buffer_u[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C],
|
||||||
|
&frame->ibc_buffer_u[ibc_buffer_row]
|
||||||
|
[i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C],
|
||||||
|
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C));
|
||||||
|
memmove(
|
||||||
|
&frame->ibc_buffer_v[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C],
|
||||||
|
&frame->ibc_buffer_v[ibc_buffer_row]
|
||||||
|
[i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C],
|
||||||
|
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
|
||||||
|
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
|
||||||
|
|
||||||
|
uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x],
|
||||||
|
&frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x],
|
||||||
|
ibc_block_width, ibc_block_height,
|
||||||
|
frame->rec->stride, IBC_BUFFER_WIDTH);
|
||||||
|
|
||||||
|
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||||
|
uvg_pixels_blit(&frame->rec->u[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)],
|
||||||
|
&frame->ibc_buffer_u[ibc_buffer_row][ibc_buffer_pos_x_c],
|
||||||
|
ibc_block_width>>1, ibc_block_height>>1,
|
||||||
|
frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C);
|
||||||
|
uvg_pixels_blit(&frame->rec->v[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)],
|
||||||
|
&frame->ibc_buffer_v[ibc_buffer_row][ibc_buffer_pos_x_c],
|
||||||
|
ibc_block_width>>1, ibc_block_height>>1,
|
||||||
|
frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -692,9 +747,53 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
||||||
|
|
||||||
cu_info_t original_lut[MAX_NUM_HMVP_CANDS];
|
cu_info_t original_lut[MAX_NUM_HMVP_CANDS];
|
||||||
uint8_t original_lut_size = state->tile->frame->hmvp_size[ctu_row];
|
uint8_t original_lut_size = state->tile->frame->hmvp_size[ctu_row];
|
||||||
|
cu_info_t original_lut_ibc[MAX_NUM_HMVP_CANDS];
|
||||||
|
uint8_t original_lut_size_ibc = state->tile->frame->hmvp_size_ibc[ctu_row];
|
||||||
|
|
||||||
// Store original HMVP lut before search and restore after, since it's modified
|
// Store original HMVP lut before search and restore after, since it's modified
|
||||||
if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
if(state->frame->slicetype != UVG_SLICE_I) memcpy(original_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
if(state->encoder_control->cfg.ibc) memcpy(original_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
|
||||||
|
|
||||||
|
if (state->encoder_control->cfg.ibc & 2) {
|
||||||
|
videoframe_t * const frame = state->tile->frame;
|
||||||
|
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
|
||||||
|
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
|
||||||
|
int items = 0;
|
||||||
|
// Hash the current LCU to the IBC hashmap
|
||||||
|
for (int32_t xx = 0; xx < (int32_t)(ibc_block_width)-7; xx+=UVG_HASHMAP_BLOCKSIZE>>1) {
|
||||||
|
for (int32_t yy = 0; yy < (int32_t)(ibc_block_height)-7; yy+=UVG_HASHMAP_BLOCKSIZE>>1) {
|
||||||
|
int cur_x = lcu->position_px.x + xx;
|
||||||
|
int cur_y = lcu->position_px.y + yy;
|
||||||
|
|
||||||
|
// Skip blocks that seem to be the same value for the whole block
|
||||||
|
uint64_t first_line =
|
||||||
|
*(uint64_t *)&frame->source->y[cur_y * frame->source->stride + cur_x];
|
||||||
|
bool same_data = true;
|
||||||
|
for (int y_temp = 1; y_temp < 8; y_temp++) {
|
||||||
|
if (*(uint64_t *)&frame->source->y[(cur_y+y_temp) * frame->source->stride + cur_x] != first_line) {
|
||||||
|
same_data = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!same_data || (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0)) {
|
||||||
|
uint32_t crc = uvg_crc32c_8x8(&frame->source->y[cur_y * frame->source->stride + cur_x],frame->source->stride);
|
||||||
|
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||||
|
crc += uvg_crc32c_4x4(&frame->source->u[(cur_y>>1) * (frame->source->stride>>1) + (cur_x>>1)],frame->source->stride>>1);
|
||||||
|
crc += uvg_crc32c_4x4(&frame->source->v[(cur_y>>1) * (frame->source->stride>>1) + (cur_x>>1)],frame->source->stride>>1);
|
||||||
|
}
|
||||||
|
if (xx % UVG_HASHMAP_BLOCKSIZE == 0 && yy % UVG_HASHMAP_BLOCKSIZE == 0) {
|
||||||
|
state->tile->frame->ibc_hashmap_pos_to_hash[(cur_y / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + cur_x / UVG_HASHMAP_BLOCKSIZE] = crc;
|
||||||
|
}
|
||||||
|
uvg_hashmap_insert(frame->ibc_hashmap_row[ctu_row], crc, ((cur_x&0xffff)<<16) | (cur_y&0xffff));
|
||||||
|
items++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//fprintf(stderr, "Inserted %d items to %dx%d at %dx%d\r\n", items, ibc_block_width, ibc_block_height, lcu->position_px.x, lcu->position_px.y);
|
||||||
|
|
||||||
|
|
||||||
//This part doesn't write to bitstream, it's only search, deblock and sao
|
//This part doesn't write to bitstream, it's only search, deblock and sao
|
||||||
uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff);
|
uvg_search_lcu(state, lcu->position_px.x, lcu->position_px.y, state->tile->hor_buf_search, state->tile->ver_buf_search, lcu->coeff);
|
||||||
|
@ -703,6 +802,10 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
||||||
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], original_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], original_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
state->tile->frame->hmvp_size[ctu_row] = original_lut_size;
|
state->tile->frame->hmvp_size[ctu_row] = original_lut_size;
|
||||||
}
|
}
|
||||||
|
if (state->encoder_control->cfg.ibc) {
|
||||||
|
memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], original_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
state->tile->frame->hmvp_size_ibc[ctu_row] = original_lut_size_ibc;
|
||||||
|
}
|
||||||
|
|
||||||
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
|
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
|
||||||
|
|
||||||
|
@ -899,8 +1002,13 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
||||||
bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
|
bool wavefront = state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW;
|
||||||
|
|
||||||
// Clear hmvp lut size before each leaf
|
// Clear hmvp lut size before each leaf
|
||||||
if (!wavefront) memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
if (!wavefront) {
|
||||||
else state->tile->frame->hmvp_size[state->wfrow->lcu_offset_y] = 0;
|
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
||||||
|
if(cfg->ibc) memset(state->tile->frame->hmvp_size_ibc, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
||||||
|
} else {
|
||||||
|
state->tile->frame->hmvp_size[state->wfrow->lcu_offset_y] = 0;
|
||||||
|
state->tile->frame->hmvp_size_ibc[state->wfrow->lcu_offset_y] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control);
|
bool use_parallel_encoding = (wavefront && state->parent->children[1].encoder_control);
|
||||||
if (!use_parallel_encoding) {
|
if (!use_parallel_encoding) {
|
||||||
|
@ -1644,6 +1752,7 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
|
||||||
|
|
||||||
if (!state->encoder_control->tiles_enable) {
|
if (!state->encoder_control->tiles_enable) {
|
||||||
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
memset(state->tile->frame->hmvp_size, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
||||||
|
memset(state->tile->frame->hmvp_size_ibc, 0, sizeof(uint8_t) * state->tile->frame->height_in_lcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ROI / delta QP maps
|
// ROI / delta QP maps
|
||||||
|
|
|
@ -192,9 +192,6 @@ typedef struct encoder_state_config_frame_t {
|
||||||
double *c_para;
|
double *c_para;
|
||||||
double *k_para;
|
double *k_para;
|
||||||
|
|
||||||
|
|
||||||
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
|
|
||||||
uint8_t* hmvp_size; //!< \brief HMVP LUT size
|
|
||||||
bool jccr_sign;
|
bool jccr_sign;
|
||||||
|
|
||||||
} encoder_state_config_frame_t;
|
} encoder_state_config_frame_t;
|
||||||
|
|
16
src/filter.c
16
src/filter.c
|
@ -789,10 +789,10 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
||||||
cu_p->inter.mv[1][0] = 0;
|
cu_p->inter.mv[1][0] = 0;
|
||||||
cu_p->inter.mv[1][1] = 0;
|
cu_p->inter.mv[1][1] = 0;
|
||||||
}
|
}
|
||||||
const int refP0 = (cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1;
|
const int refP0 = (cu_p->type == CU_IBC)?-2:(cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1;
|
||||||
const int refP1 = (cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1;
|
const int refP1 = (cu_p->type == CU_IBC)?-2:(cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1;
|
||||||
const int refQ0 = (cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1;
|
const int refQ0 = (cu_q->type == CU_IBC)?-2:(cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1;
|
||||||
const int refQ1 = (cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1;
|
const int refQ1 = (cu_q->type == CU_IBC)?-2:(cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1;
|
||||||
const mv_t* mvQ0 = cu_q->inter.mv[0];
|
const mv_t* mvQ0 = cu_q->inter.mv[0];
|
||||||
const mv_t* mvQ1 = cu_q->inter.mv[1];
|
const mv_t* mvQ1 = cu_q->inter.mv[1];
|
||||||
|
|
||||||
|
@ -830,12 +830,14 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else /*if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3)*/ { //is P-slice
|
else /*if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3)*/ { //is P-slice
|
||||||
if (cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) {
|
const int refP = (cu_p->type == CU_IBC)?-2:state->frame->ref_LX[0][cu_p->inter.mv_ref[0]];
|
||||||
|
const int refQ = (cu_q->type == CU_IBC)?-2:state->frame->ref_LX[0][cu_q->inter.mv_ref[0]];
|
||||||
|
if (refP != refQ) {
|
||||||
// Reference pictures are different
|
// Reference pictures are different
|
||||||
strength = 1;
|
strength = 1;
|
||||||
} else if (
|
} else if (
|
||||||
((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) ||
|
((abs(cu_q->inter.mv[0][0] - cu_p->inter.mv[0][0]) >= mvdThreashold) ||
|
||||||
(abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) {
|
(abs(cu_q->inter.mv[0][1] - cu_p->inter.mv[0][1]) >= mvdThreashold))) {
|
||||||
// Absolute motion vector diff between blocks >= 0.5 (Integer pixel)
|
// Absolute motion vector diff between blocks >= 0.5 (Integer pixel)
|
||||||
strength = 1;
|
strength = 1;
|
||||||
}
|
}
|
||||||
|
|
10
src/global.h
10
src/global.h
|
@ -176,7 +176,6 @@ typedef int32_t mv_t;
|
||||||
//! pow(2, MIN_SIZE)
|
//! pow(2, MIN_SIZE)
|
||||||
#define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE)
|
#define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE)
|
||||||
|
|
||||||
//! Round frame size up to this interval (8 pixels)
|
|
||||||
#define CONF_WINDOW_PAD_IN_PIXELS ((1 << MIN_SIZE)<<1)
|
#define CONF_WINDOW_PAD_IN_PIXELS ((1 << MIN_SIZE)<<1)
|
||||||
|
|
||||||
//! spec: CtbSizeY
|
//! spec: CtbSizeY
|
||||||
|
@ -254,6 +253,15 @@ typedef int32_t mv_t;
|
||||||
#define AMVP_MAX_NUM_CANDS 2
|
#define AMVP_MAX_NUM_CANDS 2
|
||||||
#define AMVP_MAX_NUM_CANDS_MEM 3
|
#define AMVP_MAX_NUM_CANDS_MEM 3
|
||||||
#define MRG_MAX_NUM_CANDS 6
|
#define MRG_MAX_NUM_CANDS 6
|
||||||
|
/**
|
||||||
|
* \brief Max number of merge candidates in Intra Block Copy
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define IBC_MRG_MAX_NUM_CANDS 6
|
||||||
|
#define IBC_BUFFER_SIZE (128*128)
|
||||||
|
#define IBC_BUFFER_WIDTH (IBC_BUFFER_SIZE / LCU_WIDTH)
|
||||||
|
#define IBC_BUFFER_WIDTH_C ((IBC_BUFFER_SIZE / LCU_WIDTH) >> 1)
|
||||||
|
|
||||||
|
|
||||||
#define MAX_NUM_HMVP_CANDS 5
|
#define MAX_NUM_HMVP_CANDS 5
|
||||||
|
|
||||||
|
|
150
src/hashmap.c
Normal file
150
src/hashmap.c
Normal file
|
@ -0,0 +1,150 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of uvg266 VVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
* are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||||
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include "hashmap.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function creates a node for the uvg_hashmap.
|
||||||
|
*
|
||||||
|
* \param key the key of the node to be created
|
||||||
|
* \param value the value of the node to be created
|
||||||
|
* \return uvg_hashmap_node a node with the given key and value
|
||||||
|
*/
|
||||||
|
uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value) {
|
||||||
|
uvg_hashmap_node_t* new_node = (uvg_hashmap_node_t*)malloc(sizeof(uvg_hashmap_node_t));
|
||||||
|
new_node->key = key;
|
||||||
|
new_node->value = value;
|
||||||
|
new_node->next = NULL;
|
||||||
|
new_node->size = 1;
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function creates a new uvg_hashmap with a given bucket size.
|
||||||
|
*
|
||||||
|
* \param bucket_size the size of the hashmap bucket
|
||||||
|
* \return uvg_hashmap a new uvg_hashmap with the given bucket size
|
||||||
|
*/
|
||||||
|
uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size)
|
||||||
|
{
|
||||||
|
uvg_hashmap_t* new_hashmap = (uvg_hashmap_t*)malloc(sizeof(uvg_hashmap_t));
|
||||||
|
new_hashmap->bucket_size = bucket_size;
|
||||||
|
new_hashmap->table = (uvg_hashmap_node_t**)malloc(sizeof(uvg_hashmap_node_t*) * bucket_size);
|
||||||
|
for (int i = 0; i < bucket_size; i++) {
|
||||||
|
new_hashmap->table[i] = NULL;
|
||||||
|
}
|
||||||
|
return new_hashmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function calculates the hash index for a given
|
||||||
|
* key and bucket size using the Jenkins hash function.
|
||||||
|
*
|
||||||
|
* \param key the key to be hashed
|
||||||
|
* \param bucket_size the size of the hashmap bucket
|
||||||
|
* \return the hashed index for the given key and bucket size.
|
||||||
|
*/
|
||||||
|
static uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size)
|
||||||
|
{
|
||||||
|
//key ^= (key >> 20) ^ (key >> 12);
|
||||||
|
//return (key ^ (key >> 7) ^ (key >> 4) ^ 2654435769U) % bucket_size;
|
||||||
|
return key % bucket_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function inserts a new node into the hashmap.
|
||||||
|
*
|
||||||
|
* \param map the hashmap to insert the new node into
|
||||||
|
* \param key the key of the new node
|
||||||
|
* \param value the value of the new node
|
||||||
|
*/
|
||||||
|
void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value) {
|
||||||
|
uint32_t hash_index = uvg_hashmap_hash(key, map->bucket_size);
|
||||||
|
uvg_hashmap_node_t* new_node = uvg_hashmap_create_node(key, value);
|
||||||
|
new_node->next = (void*)map->table[hash_index];
|
||||||
|
if (new_node->next != NULL) new_node->size = ((uvg_hashmap_node_t*)new_node->next)->size + 1;
|
||||||
|
map->table[hash_index] = new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function searches the hashmap for the given key.
|
||||||
|
*
|
||||||
|
* \param map the hashmap to search in
|
||||||
|
* \param key the key to search for
|
||||||
|
* \return uvg_hashmap_node the node with the given key, NULL if not found.
|
||||||
|
*/
|
||||||
|
uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key) {
|
||||||
|
uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size);
|
||||||
|
return map->table[hashIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key)
|
||||||
|
{
|
||||||
|
uint32_t hashIndex = uvg_hashmap_hash(key, map->bucket_size);
|
||||||
|
uvg_hashmap_node_t* temp = map->table[hashIndex];
|
||||||
|
// Search key in chain and return the first match
|
||||||
|
while (temp) {
|
||||||
|
if (temp->key == key) {
|
||||||
|
return temp->value;
|
||||||
|
}
|
||||||
|
temp = (uvg_hashmap_node_t*)temp->next;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function frees the memory of a given hashmap node.
|
||||||
|
*
|
||||||
|
* \param node the node to free the memory of.
|
||||||
|
*/
|
||||||
|
void uvg_hashmap_node_free(uvg_hashmap_node_t* node)
|
||||||
|
{
|
||||||
|
while (node) {
|
||||||
|
uvg_hashmap_node_t* to_delete = node;
|
||||||
|
node = (uvg_hashmap_node_t*)node->next;
|
||||||
|
free(to_delete);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief This function frees the memory of a given hashmap.
|
||||||
|
*
|
||||||
|
* \param map the hashmap to free the memory of.
|
||||||
|
*/
|
||||||
|
void uvg_hashmap_free(uvg_hashmap_t* map) {
|
||||||
|
for (int i = 0; i < map->bucket_size; i++) {
|
||||||
|
uvg_hashmap_node_t* temp = map->table[i];
|
||||||
|
uvg_hashmap_node_free(temp);
|
||||||
|
}
|
||||||
|
free(map->table);
|
||||||
|
free(map);
|
||||||
|
}
|
70
src/hashmap.h
Normal file
70
src/hashmap.h
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of uvg266 VVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
* are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||||
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
// The ratio of the hashmap bucket size to the maximum number of elements
|
||||||
|
#define UVG_HASHMAP_RATIO 12.0
|
||||||
|
// Use Hashmap for 4x4 blocks
|
||||||
|
#define UVG_HASHMAP_BLOCKSIZE 8
|
||||||
|
|
||||||
|
typedef struct uvg_hashmap_node {
|
||||||
|
void* next;
|
||||||
|
uint32_t key;
|
||||||
|
uint32_t value;
|
||||||
|
uint32_t size;
|
||||||
|
} uvg_hashmap_node_t;
|
||||||
|
|
||||||
|
typedef struct uvg_hashmap {
|
||||||
|
uint32_t bucket_size;
|
||||||
|
uvg_hashmap_node_t** table;
|
||||||
|
} uvg_hashmap_t;
|
||||||
|
|
||||||
|
uvg_hashmap_node_t* uvg_hashmap_create_node(uint32_t key, uint32_t value);
|
||||||
|
|
||||||
|
uvg_hashmap_t* uvg_hashmap_create(uint32_t bucket_size);
|
||||||
|
|
||||||
|
//uint32_t uvg_hashmap_hash(uint32_t key, uint32_t bucket_size);
|
||||||
|
|
||||||
|
void uvg_hashmap_insert(uvg_hashmap_t* map, uint32_t key, uint32_t value);
|
||||||
|
|
||||||
|
uvg_hashmap_node_t* uvg_hashmap_search(uvg_hashmap_t* map, uint32_t key);
|
||||||
|
|
||||||
|
uint32_t uvg_hashmap_search_return_first(uvg_hashmap_t* map, uint32_t key);
|
||||||
|
|
||||||
|
void uvg_hashmap_node_free(uvg_hashmap_node_t* node);
|
||||||
|
|
||||||
|
void uvg_hashmap_free(uvg_hashmap_t* map);
|
635
src/inter.c
635
src/inter.c
|
@ -593,6 +593,67 @@ void uvg_inter_recon_cu(const encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ibc_recon_cu(const encoder_state_t * const state,
|
||||||
|
lcu_t *lcu,
|
||||||
|
int32_t x,
|
||||||
|
int32_t y,
|
||||||
|
int32_t width,
|
||||||
|
bool predict_luma,
|
||||||
|
bool predict_chroma,
|
||||||
|
int i_pu)
|
||||||
|
{
|
||||||
|
const int x_scu = SUB_SCU(x);
|
||||||
|
const int y_scu = SUB_SCU(y);
|
||||||
|
uint32_t offset = x_scu + y_scu * LCU_WIDTH;
|
||||||
|
uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
|
||||||
|
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
|
||||||
|
|
||||||
|
int32_t mv_x = cu->inter.mv[0][0] >> INTERNAL_MV_PREC;
|
||||||
|
int32_t mv_y = cu->inter.mv[0][1] >> INTERNAL_MV_PREC;
|
||||||
|
uint32_t ibc_row = y / LCU_WIDTH;
|
||||||
|
|
||||||
|
int32_t buffer_x = ((x - x_scu) + LCU_WIDTH <= IBC_BUFFER_WIDTH ?
|
||||||
|
x :
|
||||||
|
x - (((x - x_scu)) - IBC_BUFFER_WIDTH)) + mv_x;
|
||||||
|
int32_t buffer_y = y_scu + mv_y;
|
||||||
|
|
||||||
|
// The whole block must be to the left of the current position
|
||||||
|
assert((-mv_x >= width || -mv_y >= width) && x >= 0 && y >= 0);
|
||||||
|
|
||||||
|
// Predicted block completely outside of this LCU
|
||||||
|
if (mv_x + x_scu + width <= 0) {
|
||||||
|
if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width, width, IBC_BUFFER_WIDTH, LCU_WIDTH);
|
||||||
|
if (predict_chroma) {
|
||||||
|
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
}
|
||||||
|
} else if (mv_x + x_scu + width >= width) { // Completely in current LCU
|
||||||
|
if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x], lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH);
|
||||||
|
if (predict_chroma) {
|
||||||
|
uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x) / 2], lcu->rec.u + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x) / 2], lcu->rec.v + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
}
|
||||||
|
} else { // Partly on the buffer and party on the current LCU rec
|
||||||
|
|
||||||
|
uint32_t width_buffer = -(mv_x + x_scu);
|
||||||
|
uint32_t width_lcu = width - width_buffer;
|
||||||
|
if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width_buffer, width, IBC_BUFFER_WIDTH, LCU_WIDTH);
|
||||||
|
if (predict_chroma) {
|
||||||
|
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += width_buffer;
|
||||||
|
offset_c += width_buffer/2 + (width_buffer&1);
|
||||||
|
|
||||||
|
if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x + width_buffer], lcu->rec.y + offset, width_lcu, width, LCU_WIDTH, LCU_WIDTH);
|
||||||
|
if (predict_chroma && (width_lcu / 2)) {
|
||||||
|
uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.u + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.v + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Predict a single PU.
|
* Predict a single PU.
|
||||||
*
|
*
|
||||||
|
@ -626,49 +687,56 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
|
||||||
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
|
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
|
||||||
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
|
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
|
||||||
|
|
||||||
if (pu->inter.mv_dir == 3) {
|
if (cu->type == CU_IBC) {
|
||||||
const uvg_picture *const refs[2] = {
|
ibc_recon_cu(state, lcu, x, y, width, predict_luma, predict_chroma, i_pu);
|
||||||
state->frame->ref->images[
|
} else {
|
||||||
state->frame->ref_LX[0][
|
|
||||||
pu->inter.mv_ref[0]]],
|
if (pu->inter.mv_dir == 3) {
|
||||||
state->frame->ref->images[
|
const uvg_picture * const refs[2] = {
|
||||||
state->frame->ref_LX[1][
|
state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]],
|
||||||
pu->inter.mv_ref[1]]],
|
state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]],
|
||||||
};
|
};
|
||||||
uvg_inter_recon_bipred(state,
|
uvg_inter_recon_bipred(
|
||||||
refs[0], refs[1],
|
state,
|
||||||
pu_x, pu_y,
|
refs[0],
|
||||||
pu_w, pu_h,
|
refs[1],
|
||||||
pu->inter.mv,
|
pu_x,
|
||||||
lcu,
|
pu_y,
|
||||||
predict_luma, predict_chroma);
|
pu_w,
|
||||||
|
pu_h,
|
||||||
|
pu->inter.mv,
|
||||||
|
lcu,
|
||||||
|
predict_luma,
|
||||||
|
predict_chroma);
|
||||||
|
} else {
|
||||||
|
const int mv_idx = pu->inter.mv_dir - 1;
|
||||||
|
const uvg_picture * const ref =
|
||||||
|
state->frame->ref->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]];
|
||||||
|
|
||||||
|
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
|
||||||
|
const unsigned offset_chroma =
|
||||||
|
SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
|
||||||
|
yuv_t lcu_adapter;
|
||||||
|
lcu_adapter.size = pu_w * pu_h;
|
||||||
|
lcu_adapter.y = lcu->rec.y + offset_luma,
|
||||||
|
lcu_adapter.u = lcu->rec.u + offset_chroma,
|
||||||
|
lcu_adapter.v = lcu->rec.v + offset_chroma,
|
||||||
|
|
||||||
|
inter_recon_unipred(
|
||||||
|
state,
|
||||||
|
ref,
|
||||||
|
pu_x,
|
||||||
|
pu_y,
|
||||||
|
pu_w,
|
||||||
|
pu_h,
|
||||||
|
LCU_WIDTH,
|
||||||
|
pu->inter.mv[mv_idx],
|
||||||
|
&lcu_adapter,
|
||||||
|
NULL,
|
||||||
|
predict_luma,
|
||||||
|
predict_chroma);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
const int mv_idx = pu->inter.mv_dir - 1;
|
|
||||||
const uvg_picture *const ref =
|
|
||||||
state->frame->ref->images[
|
|
||||||
state->frame->ref_LX[mv_idx][
|
|
||||||
pu->inter.mv_ref[mv_idx]]];
|
|
||||||
|
|
||||||
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
|
|
||||||
const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
|
|
||||||
yuv_t lcu_adapter;
|
|
||||||
lcu_adapter.size = pu_w * pu_h;
|
|
||||||
lcu_adapter.y = lcu->rec.y + offset_luma,
|
|
||||||
lcu_adapter.u = lcu->rec.u + offset_chroma,
|
|
||||||
lcu_adapter.v = lcu->rec.v + offset_chroma,
|
|
||||||
|
|
||||||
inter_recon_unipred(state,
|
|
||||||
ref,
|
|
||||||
pu_x, pu_y,
|
|
||||||
pu_w, pu_h,
|
|
||||||
LCU_WIDTH,
|
|
||||||
pu->inter.mv[mv_idx],
|
|
||||||
&lcu_adapter,
|
|
||||||
NULL,
|
|
||||||
predict_luma, predict_chroma);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (predict_chroma && state->encoder_control->cfg.jccr) {
|
if (predict_chroma && state->encoder_control->cfg.jccr) {
|
||||||
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
|
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
|
||||||
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||||
|
@ -917,6 +985,259 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static INLINE mv_t get_scaled_mv(mv_t mv, int scale)
|
||||||
|
{
|
||||||
|
int32_t scaled = scale * mv;
|
||||||
|
return CLIP(-131072, 131071, (scaled + 127 + (scaled < 0)) >> 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MV_EXPONENT_BITCOUNT 4
|
||||||
|
#define MV_MANTISSA_BITCOUNT 6
|
||||||
|
#define MV_MANTISSA_UPPER_LIMIT ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1)
|
||||||
|
#define MV_MANTISSA_LIMIT (1 << (MV_MANTISSA_BITCOUNT - 1))
|
||||||
|
#define MV_EXPONENT_MASK ((1 << MV_EXPONENT_BITCOUNT) - 1)
|
||||||
|
|
||||||
|
static int convert_mv_fixed_to_float(int32_t val)
|
||||||
|
{
|
||||||
|
uint32_t sign = val >> 31;
|
||||||
|
int scale = uvg_math_floor_log2((val ^ sign) | MV_MANTISSA_UPPER_LIMIT) - (MV_MANTISSA_BITCOUNT - 1);
|
||||||
|
|
||||||
|
int exponent;
|
||||||
|
uint32_t mantissa;
|
||||||
|
if (scale >= 0)
|
||||||
|
{
|
||||||
|
int round = (1 << scale) >> 1;
|
||||||
|
int n = (val + round) >> scale;
|
||||||
|
exponent = scale + ((n ^ sign) >> (MV_MANTISSA_BITCOUNT - 1));
|
||||||
|
mantissa = (n & MV_MANTISSA_UPPER_LIMIT) | (sign << (MV_MANTISSA_BITCOUNT - 1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
exponent = 0;
|
||||||
|
mantissa = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
return exponent | (mantissa << MV_EXPONENT_BITCOUNT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int convert_mv_float_to_fixed(int val)
|
||||||
|
{
|
||||||
|
int exponent = val & MV_EXPONENT_MASK;
|
||||||
|
uint32_t mantissa = val >> MV_EXPONENT_BITCOUNT;
|
||||||
|
return exponent == 0 ? mantissa : (mantissa ^ MV_MANTISSA_LIMIT) << (exponent - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int round_mv_comp(int x)
|
||||||
|
{
|
||||||
|
return convert_mv_float_to_fixed(convert_mv_fixed_to_float(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void apply_mv_scaling_pocs(int32_t current_poc,
|
||||||
|
int32_t current_ref_poc,
|
||||||
|
int32_t neighbor_poc,
|
||||||
|
int32_t neighbor_ref_poc,
|
||||||
|
mv_t mv_cand[2])
|
||||||
|
{
|
||||||
|
int32_t diff_current = current_poc - current_ref_poc;
|
||||||
|
int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc;
|
||||||
|
|
||||||
|
if (diff_current == diff_neighbor) return;
|
||||||
|
|
||||||
|
diff_current = CLIP(-128, 127, diff_current);
|
||||||
|
diff_neighbor = CLIP(-128, 127, diff_neighbor);
|
||||||
|
|
||||||
|
int scale = CLIP(-4096, 4095,
|
||||||
|
(diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6);
|
||||||
|
|
||||||
|
mv_cand[0] = get_scaled_mv(mv_cand[0], scale);
|
||||||
|
mv_cand[1] = get_scaled_mv(mv_cand[1], scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE void apply_mv_scaling(const encoder_state_t *state,
|
||||||
|
const cu_info_t *current_cu,
|
||||||
|
const cu_info_t *neighbor_cu,
|
||||||
|
int8_t current_reflist,
|
||||||
|
int8_t neighbor_reflist,
|
||||||
|
mv_t mv_cand[2])
|
||||||
|
{
|
||||||
|
apply_mv_scaling_pocs(state->frame->poc,
|
||||||
|
state->frame->ref->pocs[
|
||||||
|
state->frame->ref_LX[current_reflist][
|
||||||
|
current_cu->inter.mv_ref[current_reflist]]],
|
||||||
|
state->frame->poc,
|
||||||
|
state->frame->ref->pocs[
|
||||||
|
state->frame->ref_LX[neighbor_reflist][
|
||||||
|
neighbor_cu->inter.mv_ref[neighbor_reflist]]],
|
||||||
|
mv_cand);
|
||||||
|
}
|
||||||
|
|
||||||
|
static INLINE bool add_mvp_candidate(const encoder_state_t *state,
|
||||||
|
const cu_info_t *cur_cu,
|
||||||
|
const cu_info_t *cand,
|
||||||
|
int8_t reflist,
|
||||||
|
bool scaling,
|
||||||
|
mv_t mv_cand_out[2])
|
||||||
|
{
|
||||||
|
if (!cand) return false;
|
||||||
|
|
||||||
|
assert(cand->inter.mv_dir != 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
const int cand_list = i == 0 ? reflist : !reflist;
|
||||||
|
|
||||||
|
if ((cand->inter.mv_dir & (1 << cand_list)) == 0) continue;
|
||||||
|
|
||||||
|
if (scaling) {
|
||||||
|
mv_cand_out[0] = cand->inter.mv[cand_list][0];
|
||||||
|
mv_cand_out[1] = cand->inter.mv[cand_list][1];
|
||||||
|
apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] ==
|
||||||
|
state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]])
|
||||||
|
{
|
||||||
|
mv_cand_out[0] = cand->inter.mv[cand_list][0];
|
||||||
|
mv_cand_out[1] = cand->inter.mv[cand_list][1];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool is_duplicate_candidate_ibc(const cu_info_t* cu1, const cu_info_t* cu2)
|
||||||
|
{
|
||||||
|
if (!cu2) return false;
|
||||||
|
|
||||||
|
if (cu1->inter.mv[0][0] != cu2->inter.mv[0][0] ||
|
||||||
|
cu1->inter.mv[0][1] != cu2->inter.mv[0][1]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Get merge candidates for current block.
|
||||||
|
*
|
||||||
|
* The output parameters b0, b1, b2, a0, a1 are pointed to the
|
||||||
|
* corresponding cu_info_t struct in lcu->cu, or set to NULL, if the
|
||||||
|
* candidate is not available.
|
||||||
|
*
|
||||||
|
* \param x block x position in pixels
|
||||||
|
* \param y block y position in pixels
|
||||||
|
* \param width block width in pixels
|
||||||
|
* \param height block height in pixels
|
||||||
|
* \param picture_width tile width in pixels
|
||||||
|
* \param picture_height tile height in pixels
|
||||||
|
* \param lcu current LCU
|
||||||
|
* \param cand_out will be filled with A and B candidates
|
||||||
|
*/
|
||||||
|
static void get_ibc_merge_candidates(const encoder_state_t * const state,
|
||||||
|
const cu_info_t * const cur_cu,
|
||||||
|
lcu_t *lcu,
|
||||||
|
const cu_array_t *cua,
|
||||||
|
int32_t x,
|
||||||
|
int32_t y,
|
||||||
|
int32_t width,
|
||||||
|
int32_t height,
|
||||||
|
mv_t mv_cand[IBC_MRG_MAX_NUM_CANDS][2]
|
||||||
|
)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Predictor block locations
|
||||||
|
____ _______
|
||||||
|
|B2|______|B1|B0|
|
||||||
|
| |
|
||||||
|
| Cur CU |
|
||||||
|
__| |
|
||||||
|
|A1|_________|
|
||||||
|
|A0|
|
||||||
|
*/
|
||||||
|
int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
|
||||||
|
int32_t y_local = SUB_SCU(y);
|
||||||
|
|
||||||
|
cu_info_t *a1 = NULL;
|
||||||
|
cu_info_t *b1 = NULL;
|
||||||
|
|
||||||
|
uint8_t candidates = 0;
|
||||||
|
|
||||||
|
// A1 availability testing
|
||||||
|
if (x != 0) {
|
||||||
|
a1 = lcu != NULL?LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1): uvg_cu_array_at_const(cua, x - 1, y + height - 1);
|
||||||
|
// Do not check a1->coded because the block above is always coded before
|
||||||
|
// the current one and the flag is not set when searching an SMP block.
|
||||||
|
if (a1->type == CU_IBC) {
|
||||||
|
inter_clear_cu_unused(a1);
|
||||||
|
mv_cand[candidates][0] = a1->inter.mv[0][0];
|
||||||
|
mv_cand[candidates][1] = a1->inter.mv[0][1];
|
||||||
|
candidates++;
|
||||||
|
} else {
|
||||||
|
a1 = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// B1 availability testing
|
||||||
|
if (y != 0) {
|
||||||
|
b1 = lcu != NULL?LCU_GET_CU_AT_PX(lcu, x_local + width - 1, y_local - 1): uvg_cu_array_at_const(cua, x + width - 1, y - 1);
|
||||||
|
// Do not check b1->coded because the block to the left is always coded
|
||||||
|
// before the current one and the flag is not set when searching an SMP
|
||||||
|
// block.
|
||||||
|
if (b1->type == CU_IBC) {
|
||||||
|
if(!is_duplicate_candidate_ibc(b1, a1)) {
|
||||||
|
inter_clear_cu_unused(b1);
|
||||||
|
mv_cand[candidates][0] = b1->inter.mv[0][0];
|
||||||
|
mv_cand[candidates][1] = b1->inter.mv[0][1];
|
||||||
|
candidates++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
b1 = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (candidates > 0)
|
||||||
|
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
||||||
|
if (candidates > 1)
|
||||||
|
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
||||||
|
|
||||||
|
if (candidates < IBC_MRG_MAX_NUM_CANDS)
|
||||||
|
{
|
||||||
|
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
|
||||||
|
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
|
||||||
|
int32_t num_cand = state->tile->frame->hmvp_size_ibc[ctu_row];
|
||||||
|
for (int i = 0; i < MIN(MAX_NUM_HMVP_CANDS,num_cand); i++) {
|
||||||
|
cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + i];
|
||||||
|
bool duplicate = false;
|
||||||
|
|
||||||
|
// Check that the HMVP candidate is not duplicate
|
||||||
|
if (is_duplicate_candidate_ibc(cand, a1)) {
|
||||||
|
duplicate = true;
|
||||||
|
} else if(is_duplicate_candidate_ibc(cand, b1)) {
|
||||||
|
duplicate = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// allow duplicates after the first hmvp lut item
|
||||||
|
if (!duplicate || i > 0) {
|
||||||
|
mv_cand[candidates][0] = cand->inter.mv[0][0];
|
||||||
|
mv_cand[candidates][1] = cand->inter.mv[0][1];
|
||||||
|
candidates++;
|
||||||
|
if (candidates == IBC_MRG_MAX_NUM_CANDS) return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill with (0,0)
|
||||||
|
while (candidates < IBC_MRG_MAX_NUM_CANDS) {
|
||||||
|
mv_cand[candidates][0] = 0;
|
||||||
|
mv_cand[candidates][1] = 0;
|
||||||
|
candidates++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Get merge candidates for current block.
|
* \brief Get merge candidates for current block.
|
||||||
*
|
*
|
||||||
|
@ -1093,92 +1414,6 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE mv_t get_scaled_mv(mv_t mv, int scale)
|
|
||||||
{
|
|
||||||
int32_t scaled = scale * mv;
|
|
||||||
return CLIP(-131072, 131071, (scaled + 127 + (scaled < 0)) >> 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MV_EXPONENT_BITCOUNT 4
|
|
||||||
#define MV_MANTISSA_BITCOUNT 6
|
|
||||||
#define MV_MANTISSA_UPPER_LIMIT ((1 << (MV_MANTISSA_BITCOUNT - 1)) - 1)
|
|
||||||
#define MV_MANTISSA_LIMIT (1 << (MV_MANTISSA_BITCOUNT - 1))
|
|
||||||
#define MV_EXPONENT_MASK ((1 << MV_EXPONENT_BITCOUNT) - 1)
|
|
||||||
|
|
||||||
static int convert_mv_fixed_to_float(int32_t val)
|
|
||||||
{
|
|
||||||
uint32_t sign = val >> 31;
|
|
||||||
int scale = uvg_math_floor_log2((val ^ sign) | MV_MANTISSA_UPPER_LIMIT) - (MV_MANTISSA_BITCOUNT - 1);
|
|
||||||
|
|
||||||
int exponent;
|
|
||||||
uint32_t mantissa;
|
|
||||||
if (scale >= 0)
|
|
||||||
{
|
|
||||||
int round = (1 << scale) >> 1;
|
|
||||||
int n = (val + round) >> scale;
|
|
||||||
exponent = scale + ((n ^ sign) >> (MV_MANTISSA_BITCOUNT - 1));
|
|
||||||
mantissa = (n & MV_MANTISSA_UPPER_LIMIT) | (sign << (MV_MANTISSA_BITCOUNT - 1));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
exponent = 0;
|
|
||||||
mantissa = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
return exponent | (mantissa << MV_EXPONENT_BITCOUNT);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int convert_mv_float_to_fixed(int val)
|
|
||||||
{
|
|
||||||
int exponent = val & MV_EXPONENT_MASK;
|
|
||||||
uint32_t mantissa = val >> MV_EXPONENT_BITCOUNT;
|
|
||||||
return exponent == 0 ? mantissa : (mantissa ^ MV_MANTISSA_LIMIT) << (exponent - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int round_mv_comp(int x)
|
|
||||||
{
|
|
||||||
return convert_mv_float_to_fixed(convert_mv_fixed_to_float(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void apply_mv_scaling_pocs(int32_t current_poc,
|
|
||||||
int32_t current_ref_poc,
|
|
||||||
int32_t neighbor_poc,
|
|
||||||
int32_t neighbor_ref_poc,
|
|
||||||
mv_t mv_cand[2])
|
|
||||||
{
|
|
||||||
int32_t diff_current = current_poc - current_ref_poc;
|
|
||||||
int32_t diff_neighbor = neighbor_poc - neighbor_ref_poc;
|
|
||||||
|
|
||||||
if (diff_current == diff_neighbor) return;
|
|
||||||
|
|
||||||
diff_current = CLIP(-128, 127, diff_current);
|
|
||||||
diff_neighbor = CLIP(-128, 127, diff_neighbor);
|
|
||||||
|
|
||||||
int scale = CLIP(-4096, 4095,
|
|
||||||
(diff_current * ((0x4000 + (abs(diff_neighbor) >> 1)) / diff_neighbor) + 32) >> 6);
|
|
||||||
|
|
||||||
mv_cand[0] = get_scaled_mv(mv_cand[0], scale);
|
|
||||||
mv_cand[1] = get_scaled_mv(mv_cand[1], scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
static INLINE void apply_mv_scaling(const encoder_state_t *state,
|
|
||||||
const cu_info_t *current_cu,
|
|
||||||
const cu_info_t *neighbor_cu,
|
|
||||||
int8_t current_reflist,
|
|
||||||
int8_t neighbor_reflist,
|
|
||||||
mv_t mv_cand[2])
|
|
||||||
{
|
|
||||||
apply_mv_scaling_pocs(state->frame->poc,
|
|
||||||
state->frame->ref->pocs[
|
|
||||||
state->frame->ref_LX[current_reflist][
|
|
||||||
current_cu->inter.mv_ref[current_reflist]]],
|
|
||||||
state->frame->poc,
|
|
||||||
state->frame->ref->pocs[
|
|
||||||
state->frame->ref_LX[neighbor_reflist][
|
|
||||||
neighbor_cu->inter.mv_ref[neighbor_reflist]]],
|
|
||||||
mv_cand);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Try to add a temporal MVP or merge candidate.
|
* \brief Try to add a temporal MVP or merge candidate.
|
||||||
*
|
*
|
||||||
|
@ -1246,41 +1481,6 @@ static bool add_temporal_candidate(const encoder_state_t *state,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE bool add_mvp_candidate(const encoder_state_t *state,
|
|
||||||
const cu_info_t *cur_cu,
|
|
||||||
const cu_info_t *cand,
|
|
||||||
int8_t reflist,
|
|
||||||
bool scaling,
|
|
||||||
mv_t mv_cand_out[2])
|
|
||||||
{
|
|
||||||
if (!cand) return false;
|
|
||||||
|
|
||||||
assert(cand->inter.mv_dir != 0);
|
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++) {
|
|
||||||
const int cand_list = i == 0 ? reflist : !reflist;
|
|
||||||
|
|
||||||
if ((cand->inter.mv_dir & (1 << cand_list)) == 0) continue;
|
|
||||||
|
|
||||||
if (scaling) {
|
|
||||||
mv_cand_out[0] = cand->inter.mv[cand_list][0];
|
|
||||||
mv_cand_out[1] = cand->inter.mv[cand_list][1];
|
|
||||||
apply_mv_scaling(state, cur_cu, cand, reflist, cand_list, mv_cand_out);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (state->frame->ref_LX[cand_list][cand->inter.mv_ref[cand_list]] ==
|
|
||||||
state->frame->ref_LX[reflist][cur_cu->inter.mv_ref[reflist]])
|
|
||||||
{
|
|
||||||
mv_cand_out[0] = cand->inter.mv[cand_list][0];
|
|
||||||
mv_cand_out[1] = cand->inter.mv[cand_list][1];
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Pick two mv candidates from the spatial and temporal candidates.
|
* \brief Pick two mv candidates from the spatial and temporal candidates.
|
||||||
*/
|
*/
|
||||||
|
@ -1407,14 +1607,20 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
|
||||||
{
|
{
|
||||||
merge_candidates_t merge_cand = { 0 };
|
merge_candidates_t merge_cand = { 0 };
|
||||||
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
||||||
get_spatial_merge_candidates(x, y, width, height,
|
if (cur_cu->type == CU_IBC) {
|
||||||
state->tile->frame->width,
|
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
|
||||||
state->tile->frame->height,
|
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
|
||||||
lcu,
|
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
|
||||||
&merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp);
|
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
|
||||||
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
} else {
|
||||||
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
|
get_spatial_merge_candidates(x, y, width, height,
|
||||||
|
state->tile->frame->width,
|
||||||
|
state->tile->frame->height,
|
||||||
|
lcu,
|
||||||
|
&merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp);
|
||||||
|
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
||||||
|
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
|
||||||
|
}
|
||||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
||||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
||||||
}
|
}
|
||||||
|
@ -1443,17 +1649,30 @@ void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state,
|
||||||
merge_candidates_t merge_cand = { 0 };
|
merge_candidates_t merge_cand = { 0 };
|
||||||
|
|
||||||
const cu_array_t *cua = state->tile->frame->cu_array;
|
const cu_array_t *cua = state->tile->frame->cu_array;
|
||||||
get_spatial_merge_candidates_cua(cua,
|
if (cur_cu->type == CU_IBC) {
|
||||||
x, y, width, height,
|
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
|
||||||
state->tile->frame->width, state->tile->frame->height,
|
get_ibc_merge_candidates(state, cur_cu, NULL,cua,x, y, width, height,ibc_mv_cand);
|
||||||
&merge_cand, state->encoder_control->cfg.wpp);
|
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
|
||||||
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
|
||||||
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
|
} else {
|
||||||
|
get_spatial_merge_candidates_cua(cua,
|
||||||
|
x, y, width, height,
|
||||||
|
state->tile->frame->width, state->tile->frame->height,
|
||||||
|
&merge_cand, state->encoder_control->cfg.wpp);
|
||||||
|
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
||||||
|
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
|
||||||
|
}
|
||||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
||||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
• \brief Checks if two CUs have similar motion vectors. The function takes two CUs and compares their motion vectors.
|
||||||
|
• \param cu1 first CU
|
||||||
|
• \param cu2 second CU
|
||||||
|
• \return returns 0 if the two CUs have dissimilar motion vectors, and 1 if the motions are similar.
|
||||||
|
*/
|
||||||
|
|
||||||
static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2)
|
static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2)
|
||||||
{
|
{
|
||||||
if (!cu2) return false;
|
if (!cu2) return false;
|
||||||
|
@ -1472,6 +1691,16 @@ static bool is_duplicate_candidate(const cu_info_t* cu1, const cu_info_t* cu2)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a merge candidate to the list of possible candidates, if it is not a duplicate.
|
||||||
|
*
|
||||||
|
* \param cand The candidate to be added.
|
||||||
|
* \param possible_duplicate1 The first possible duplicate candidate to check for duplication.
|
||||||
|
* \param possible_duplicate2 The second possible duplicate candidate to check for duplication.
|
||||||
|
* \param merge_cand_out The output parameter to store the merge candidate information.
|
||||||
|
*
|
||||||
|
* @return Returns true if the merge candidate was added successfully, false otherwise.
|
||||||
|
*/
|
||||||
static bool add_merge_candidate(const cu_info_t *cand,
|
static bool add_merge_candidate(const cu_info_t *cand,
|
||||||
const cu_info_t *possible_duplicate1,
|
const cu_info_t *possible_duplicate1,
|
||||||
const cu_info_t *possible_duplicate2,
|
const cu_info_t *possible_duplicate2,
|
||||||
|
@ -1503,14 +1732,23 @@ static void hmvp_shift_lut(cu_info_t* lut, int32_t size, int32_t start, int32_t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu) {
|
static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu, bool ibc) {
|
||||||
|
|
||||||
int8_t duplicate = -1;
|
int8_t duplicate = -1;
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
if (ibc) {
|
||||||
if (is_duplicate_candidate(cu, (const cu_info_t*)&lut[i])) {
|
for (int i = 0; i < size; i++) {
|
||||||
duplicate = i;
|
if (is_duplicate_candidate_ibc(cu, (const cu_info_t *)&lut[i])) {
|
||||||
break;
|
duplicate = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (is_duplicate_candidate(cu, (const cu_info_t *)&lut[i])) {
|
||||||
|
duplicate = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If duplicate found, shift the whole lut up to the duplicate, otherwise to the end
|
// If duplicate found, shift the whole lut up to the duplicate, otherwise to the end
|
||||||
|
@ -1534,21 +1772,28 @@ static bool hmvp_push_lut_item(cu_info_t* lut, int32_t size, const cu_info_t* cu
|
||||||
void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu)
|
void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu)
|
||||||
{
|
{
|
||||||
//if (!cu.geoFlag && !cu.affine)
|
//if (!cu.geoFlag && !cu.affine)
|
||||||
if(cu->type == CU_INTER)
|
if(cu->type != CU_INTRA)
|
||||||
{
|
{
|
||||||
|
|
||||||
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
||||||
const uint32_t xBr = block_width + pic_x;
|
const uint32_t xBr = block_width + pic_x;
|
||||||
const uint32_t yBr = block_height + pic_y;
|
const uint32_t yBr = block_height + pic_y;
|
||||||
bool hmvp_possible = ((xBr >> parallel_merge_level) > (pic_x >> parallel_merge_level)) && ((yBr >> parallel_merge_level) > (pic_y >> parallel_merge_level));
|
bool hmvp_possible = ((xBr >> parallel_merge_level) > (pic_x >> parallel_merge_level)) && ((yBr >> parallel_merge_level) > (pic_y >> parallel_merge_level));
|
||||||
if (hmvp_possible) { // ToDo: check for IBC
|
if (hmvp_possible || cu->type == CU_IBC) {
|
||||||
const uint32_t ctu_row = (pic_y >> LOG2_LCU_WIDTH);
|
const uint32_t ctu_row = (pic_y >> LOG2_LCU_WIDTH);
|
||||||
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
|
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
|
||||||
|
|
||||||
|
|
||||||
bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut[ctu_row_mul_five], state->tile->frame->hmvp_size[ctu_row], cu);
|
if (cu->type == CU_IBC) {
|
||||||
if(add_row && state->tile->frame->hmvp_size[ctu_row] < MAX_NUM_HMVP_CANDS) {
|
bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], state->tile->frame->hmvp_size_ibc[ctu_row], cu, true);
|
||||||
state->tile->frame->hmvp_size[ctu_row]++;
|
if(add_row && state->tile->frame->hmvp_size_ibc[ctu_row] < MAX_NUM_HMVP_CANDS) {
|
||||||
|
state->tile->frame->hmvp_size_ibc[ctu_row]++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bool add_row = hmvp_push_lut_item(&state->tile->frame->hmvp_lut[ctu_row_mul_five], state->tile->frame->hmvp_size[ctu_row], cu, false);
|
||||||
|
if(add_row && state->tile->frame->hmvp_size[ctu_row] < MAX_NUM_HMVP_CANDS) {
|
||||||
|
state->tile->frame->hmvp_size[ctu_row]++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1652,6 +1897,19 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
||||||
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
||||||
merge_candidates_t merge_cand = { 0 };
|
merge_candidates_t merge_cand = { 0 };
|
||||||
const uint8_t max_num_cands = state->encoder_control->cfg.max_merge;
|
const uint8_t max_num_cands = state->encoder_control->cfg.max_merge;
|
||||||
|
|
||||||
|
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
||||||
|
if(cur_cu->type == CU_IBC) {
|
||||||
|
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
|
||||||
|
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
|
||||||
|
for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) {
|
||||||
|
mv_cand[i].dir = 1;
|
||||||
|
mv_cand[i].mv[0][0] = ibc_mv_cand[i][0];
|
||||||
|
mv_cand[i].mv[0][1] = ibc_mv_cand[i][1];
|
||||||
|
}
|
||||||
|
return IBC_MRG_MAX_NUM_CANDS;
|
||||||
|
}
|
||||||
|
|
||||||
get_spatial_merge_candidates(x, y, width, height,
|
get_spatial_merge_candidates(x, y, width, height,
|
||||||
state->tile->frame->width,
|
state->tile->frame->width,
|
||||||
state->tile->frame->height,
|
state->tile->frame->height,
|
||||||
|
@ -1721,7 +1979,6 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
||||||
|
|
||||||
for (int i = 0; i < num_cand; i++) {
|
for (int i = 0; i < num_cand; i++) {
|
||||||
const cu_info_t* hmvp_cand = &state->tile->frame->hmvp_lut[ctu_row_mul_five + i];
|
const cu_info_t* hmvp_cand = &state->tile->frame->hmvp_lut[ctu_row_mul_five + i];
|
||||||
// ToDo: Add IBC condition
|
|
||||||
if (i > 1 || ((!is_duplicate_candidate(hmvp_cand, a[1]))
|
if (i > 1 || ((!is_duplicate_candidate(hmvp_cand, a[1]))
|
||||||
&& (!is_duplicate_candidate(hmvp_cand, b[1]))) ) {
|
&& (!is_duplicate_candidate(hmvp_cand, b[1]))) ) {
|
||||||
mv_cand[candidates].mv[0][0] = state->tile->frame->hmvp_lut[ctu_row_mul_five + i].inter.mv[0][0];
|
mv_cand[candidates].mv[0][0] = state->tile->frame->hmvp_lut[ctu_row_mul_five + i].inter.mv[0][0];
|
||||||
|
|
103
src/rdo.c
103
src/rdo.c
|
@ -1773,6 +1773,109 @@ double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
|
||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** MVD cost calculation with CABAC
|
||||||
|
* \returns int
|
||||||
|
* Calculates Motion Vector cost and related costs using CABAC coding
|
||||||
|
*/
|
||||||
|
double uvg_calc_ibc_mvd_cost_cabac(const encoder_state_t * state,
|
||||||
|
int x,
|
||||||
|
int y,
|
||||||
|
int mv_shift,
|
||||||
|
mv_t mv_cand[2][2],
|
||||||
|
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
||||||
|
int16_t num_cand,
|
||||||
|
int32_t ref_idx,
|
||||||
|
double* bitcost)
|
||||||
|
{
|
||||||
|
cabac_data_t state_cabac_copy;
|
||||||
|
cabac_data_t* cabac;
|
||||||
|
uint32_t merge_idx;
|
||||||
|
vector2d_t mvd = { 0, 0 };
|
||||||
|
int8_t merged = 0;
|
||||||
|
int8_t cur_mv_cand = 0;
|
||||||
|
|
||||||
|
x *= 1 << mv_shift;
|
||||||
|
y *= 1 << mv_shift;
|
||||||
|
|
||||||
|
// Check every candidate to find a match
|
||||||
|
for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
|
||||||
|
if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x &&
|
||||||
|
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y)
|
||||||
|
{
|
||||||
|
merged = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store cabac state and contexts
|
||||||
|
memcpy(&state_cabac_copy, &state->search_cabac, sizeof(cabac_data_t));
|
||||||
|
|
||||||
|
// Clear bytes and bits and set mode to "count"
|
||||||
|
state_cabac_copy.only_count = 1;
|
||||||
|
|
||||||
|
cabac = &state_cabac_copy;
|
||||||
|
double bits = 0;
|
||||||
|
|
||||||
|
if (!merged) {
|
||||||
|
vector2d_t mvd1 = {
|
||||||
|
x - mv_cand[0][0],
|
||||||
|
y - mv_cand[0][1],
|
||||||
|
};
|
||||||
|
vector2d_t mvd2 = {
|
||||||
|
x - mv_cand[1][0],
|
||||||
|
y - mv_cand[1][1],
|
||||||
|
};
|
||||||
|
|
||||||
|
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1);
|
||||||
|
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
|
||||||
|
|
||||||
|
double cand1_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
|
||||||
|
double cand2_cost = uvg_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
|
||||||
|
|
||||||
|
// Select candidate 1 if it has lower cost
|
||||||
|
if (cand2_cost < cand1_cost) {
|
||||||
|
cur_mv_cand = 1;
|
||||||
|
mvd = mvd2;
|
||||||
|
} else {
|
||||||
|
mvd = mvd1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
|
||||||
|
|
||||||
|
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_flag_ext_model), merged, bits, "MergeFlag");
|
||||||
|
num_cand = state->encoder_control->cfg.max_merge;
|
||||||
|
if (merged) {
|
||||||
|
if (num_cand > 1) {
|
||||||
|
int32_t ui;
|
||||||
|
for (ui = 0; ui < num_cand - 1; ui++) {
|
||||||
|
int32_t symbol = (ui != merge_idx);
|
||||||
|
if (ui == 0) {
|
||||||
|
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_merge_idx_ext_model), symbol, bits, "MergeIndex");
|
||||||
|
} else {
|
||||||
|
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
|
||||||
|
bits += 1;
|
||||||
|
}
|
||||||
|
if (symbol == 0) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// It is safe to drop const here because cabac->only_count is set.
|
||||||
|
uvg_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y, &bits);
|
||||||
|
|
||||||
|
// Signal which candidate MV to use
|
||||||
|
cabac->cur_ctx = &(cabac->ctx.mvp_idx_model);
|
||||||
|
CABAC_BIN(cabac, cur_mv_cand, "mvp_flag");
|
||||||
|
}
|
||||||
|
|
||||||
|
*bitcost = bits;
|
||||||
|
|
||||||
|
// Store bitcost before restoring cabac
|
||||||
|
return *bitcost * state->lambda_sqrt;
|
||||||
|
}
|
||||||
|
|
||||||
/** MVD cost calculation with CABAC
|
/** MVD cost calculation with CABAC
|
||||||
* \returns int
|
* \returns int
|
||||||
* Calculates Motion Vector cost and related costs using CABAC coding
|
* Calculates Motion Vector cost and related costs using CABAC coding
|
||||||
|
|
|
@ -88,6 +88,7 @@ uint32_t uvg_get_coded_level(encoder_state_t * state, double* coded_cost, double
|
||||||
int32_t q_bits,double temp, int8_t last, int8_t type);
|
int32_t q_bits,double temp, int8_t last, int8_t type);
|
||||||
|
|
||||||
uvg_mvd_cost_func uvg_calc_mvd_cost_cabac;
|
uvg_mvd_cost_func uvg_calc_mvd_cost_cabac;
|
||||||
|
uvg_mvd_cost_func uvg_calc_ibc_mvd_cost_cabac;
|
||||||
|
|
||||||
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
|
double uvg_get_mvd_coding_cost_cabac(const encoder_state_t* state,
|
||||||
const cabac_data_t* cabac,
|
const cabac_data_t* cabac,
|
||||||
|
|
64
src/search.c
64
src/search.c
|
@ -45,6 +45,7 @@
|
||||||
#include "rdo.h"
|
#include "rdo.h"
|
||||||
#include "search_inter.h"
|
#include "search_inter.h"
|
||||||
#include "search_intra.h"
|
#include "search_intra.h"
|
||||||
|
#include "search_ibc.h"
|
||||||
#include "threadqueue.h"
|
#include "threadqueue.h"
|
||||||
#include "transform.h"
|
#include "transform.h"
|
||||||
#include "videoframe.h"
|
#include "videoframe.h"
|
||||||
|
@ -179,7 +180,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
|
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type)
|
||||||
{
|
{
|
||||||
const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
|
const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
|
||||||
const int num_pu = uvg_part_mode_num_parts[part_mode];
|
const int num_pu = uvg_part_mode_num_parts[part_mode];
|
||||||
|
@ -191,7 +192,7 @@ static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
|
||||||
const int height_pu = PU_GET_H(part_mode, cu_width, i);
|
const int height_pu = PU_GET_H(part_mode, cu_width, i);
|
||||||
|
|
||||||
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
|
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
|
||||||
pu->type = CU_INTER;
|
pu->type = type;
|
||||||
lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
|
lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,7 +307,7 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
||||||
lcu_t *const lcu)
|
lcu_t *const lcu)
|
||||||
{
|
{
|
||||||
const int width = LCU_WIDTH >> depth;
|
const int width = LCU_WIDTH >> depth;
|
||||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
|
||||||
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
|
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
|
||||||
|
|
||||||
// cur_cu is used for TU parameters.
|
// cur_cu is used for TU parameters.
|
||||||
|
@ -380,7 +381,7 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
||||||
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
|
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
|
||||||
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
|
||||||
|
|
||||||
double tr_tree_bits = 0;
|
double tr_tree_bits = 0;
|
||||||
double coeff_bits = 0;
|
double coeff_bits = 0;
|
||||||
|
@ -477,7 +478,7 @@ static double cu_rd_cost_tr_split_accurate(
|
||||||
enum uvg_tree_type tree_type) {
|
enum uvg_tree_type tree_type) {
|
||||||
const int width = LCU_WIDTH >> depth;
|
const int width = LCU_WIDTH >> depth;
|
||||||
|
|
||||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
|
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
|
||||||
// cur_cu is used for TU parameters.
|
// cur_cu is used for TU parameters.
|
||||||
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||||
|
|
||||||
|
@ -499,7 +500,7 @@ static double cu_rd_cost_tr_split_accurate(
|
||||||
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
|
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
|
||||||
// Only need to signal coded block flag if not skipped or merged
|
// Only need to signal coded block flag if not skipped or merged
|
||||||
// skip = no coded residual, merge = coded residual
|
// skip = no coded residual, merge = coded residual
|
||||||
if (pred_cu->type == CU_INTER && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
|
if (pred_cu->type != CU_INTRA && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
|
||||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
|
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -803,9 +804,12 @@ static double search_cu(
|
||||||
|
|
||||||
cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS];
|
cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS];
|
||||||
uint8_t hmvp_lut_size = state->tile->frame->hmvp_size[ctu_row];
|
uint8_t hmvp_lut_size = state->tile->frame->hmvp_size[ctu_row];
|
||||||
|
cu_info_t hmvp_lut_ibc[MAX_NUM_HMVP_CANDS];
|
||||||
|
uint8_t hmvp_lut_size_ibc = state->tile->frame->hmvp_size_ibc[ctu_row];
|
||||||
|
|
||||||
// Store original HMVP lut before search and restore after, since it's modified
|
// Store original HMVP lut before search and restore after, since it's modified
|
||||||
if (state->frame->slicetype != UVG_SLICE_I) memcpy(hmvp_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
if (state->frame->slicetype != UVG_SLICE_I) memcpy(hmvp_lut, &state->tile->frame->hmvp_lut[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
if(state->encoder_control->cfg.ibc) memcpy(hmvp_lut_ibc, &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
int32_t min;
|
int32_t min;
|
||||||
|
@ -1006,6 +1010,34 @@ static double search_cu(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Simple IBC search
|
||||||
|
if (can_use_intra //&& state->frame->slicetype == UVG_SLICE_I
|
||||||
|
&& state->encoder_control->cfg.ibc
|
||||||
|
&& cost > 1000
|
||||||
|
&& cu_width > 4
|
||||||
|
&& (x >= cu_width || y >= cu_width)
|
||||||
|
&& !cur_cu->skipped) {
|
||||||
|
|
||||||
|
cu_info_t backup_cu = *cur_cu;
|
||||||
|
|
||||||
|
double mode_cost;
|
||||||
|
double mode_bitcost;
|
||||||
|
uvg_search_cu_ibc(state,
|
||||||
|
x, y,
|
||||||
|
depth,
|
||||||
|
lcu,
|
||||||
|
&mode_cost, &mode_bitcost);
|
||||||
|
if (mode_cost < cost) {
|
||||||
|
cost = mode_cost;
|
||||||
|
inter_bitcost = mode_bitcost;
|
||||||
|
cur_cu->type = CU_IBC;
|
||||||
|
cur_cu->inter.mv_dir = 1;
|
||||||
|
cur_cu->joint_cb_cr = 0;
|
||||||
|
} else {
|
||||||
|
*cur_cu = backup_cu;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Reconstruct best mode because we need the reconstructed pixels for
|
// Reconstruct best mode because we need the reconstructed pixels for
|
||||||
// mode search of adjacent CUs.
|
// mode search of adjacent CUs.
|
||||||
if (cur_cu->type == CU_INTRA) {
|
if (cur_cu->type == CU_INTRA) {
|
||||||
|
@ -1035,7 +1067,7 @@ static double search_cu(
|
||||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||||
|
|
||||||
|
|
||||||
} else if (cur_cu->type == CU_INTER) {
|
} else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
|
||||||
|
|
||||||
if (!cur_cu->skipped) {
|
if (!cur_cu->skipped) {
|
||||||
|
|
||||||
|
@ -1081,12 +1113,12 @@ static double search_cu(
|
||||||
inter_bitcost += cur_cu->merge_idx;
|
inter_bitcost += cur_cu->merge_idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lcu_fill_inter(lcu, x_local, y_local, cu_width);
|
lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type);
|
||||||
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
|
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
|
||||||
double bits = 0;
|
double bits = 0;
|
||||||
cabac_data_t* cabac = &state->search_cabac;
|
cabac_data_t* cabac = &state->search_cabac;
|
||||||
cabac->update = 1;
|
cabac->update = 1;
|
||||||
|
@ -1290,6 +1322,13 @@ static double search_cu(
|
||||||
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
|
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
|
||||||
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
|
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
|
||||||
|
}
|
||||||
|
if (state->encoder_control->cfg.ibc) {
|
||||||
|
memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], hmvp_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
state->tile->frame->hmvp_size_ibc[ctu_row] = hmvp_lut_size_ibc;
|
||||||
|
}
|
||||||
|
// Add candidate when in inter slice or ibc is enabled
|
||||||
|
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
|
||||||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1312,6 +1351,13 @@ static double search_cu(
|
||||||
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
|
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
|
||||||
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
memcpy(&state->tile->frame->hmvp_lut[ctu_row_mul_five], hmvp_lut, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
|
state->tile->frame->hmvp_size[ctu_row] = hmvp_lut_size;
|
||||||
|
}
|
||||||
|
if (state->encoder_control->cfg.ibc) {
|
||||||
|
memcpy(&state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five], hmvp_lut_ibc, sizeof(cu_info_t) * MAX_NUM_HMVP_CANDS);
|
||||||
|
state->tile->frame->hmvp_size_ibc[ctu_row] = hmvp_lut_size_ibc;
|
||||||
|
}
|
||||||
|
// Add candidate when in inter slice or ibc is enabled
|
||||||
|
if(state->frame->slicetype != UVG_SLICE_I || state->encoder_control->cfg.ibc) {
|
||||||
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
uvg_hmvp_add_mv(state, x, y, cu_width, cu_width, cur_cu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
1389
src/search_ibc.c
Normal file
1389
src/search_ibc.c
Normal file
File diff suppressed because it is too large
Load diff
55
src/search_ibc.h
Normal file
55
src/search_ibc.h
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of uvg266 VVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
* are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||||
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup Compression
|
||||||
|
* \file
|
||||||
|
* Inter prediction parameter search.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cu.h"
|
||||||
|
#include "encoderstate.h"
|
||||||
|
#include "global.h" // IWYU pragma: keep
|
||||||
|
#include "inter.h"
|
||||||
|
#include "uvg266.h"
|
||||||
|
|
||||||
|
|
||||||
|
void uvg_search_cu_ibc(encoder_state_t * const state,
|
||||||
|
int x, int y, int depth,
|
||||||
|
lcu_t *lcu,
|
||||||
|
double *inter_cost,
|
||||||
|
double* inter_bitcost);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -312,6 +312,55 @@ static void select_starting_point(inter_search_info_t *info,
|
||||||
check_mv_cost(info, extra_mv.x, extra_mv.y, best_cost, best_bits, best_mv);
|
check_mv_cost(info, extra_mv.x, extra_mv.y, best_cost, best_bits, best_mv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (info->state->encoder_control->cfg.ibc & 2) {
|
||||||
|
int origin_x = info->origin.x;
|
||||||
|
int origin_y = info->origin.y;
|
||||||
|
|
||||||
|
int ibc_origin_x = origin_x / UVG_HASHMAP_BLOCKSIZE;
|
||||||
|
int ibc_origin_y = origin_y / UVG_HASHMAP_BLOCKSIZE;
|
||||||
|
|
||||||
|
int own_location = ((origin_x & 0xffff) << 16) | (origin_y & 0xffff);
|
||||||
|
|
||||||
|
uint32_t ibc_buffer_row = origin_y / LCU_WIDTH;
|
||||||
|
|
||||||
|
uint32_t crc = info->state->tile->frame->ibc_hashmap_pos_to_hash
|
||||||
|
[(origin_y / UVG_HASHMAP_BLOCKSIZE) *
|
||||||
|
info->state->tile->frame->ibc_hashmap_pos_to_hash_stride +
|
||||||
|
origin_x / UVG_HASHMAP_BLOCKSIZE];
|
||||||
|
|
||||||
|
uvg_hashmap_node_t *result = uvg_hashmap_search(
|
||||||
|
info->state->tile->frame->ibc_hashmap_row[ibc_buffer_row], crc);
|
||||||
|
|
||||||
|
while (result != NULL) {
|
||||||
|
if (result->key == crc && result->value != own_location) {
|
||||||
|
int pos_x = result->value >> 16;
|
||||||
|
int pos_y = result->value & 0xffff;
|
||||||
|
int mv_x = pos_x - origin_x;
|
||||||
|
int mv_y = pos_y - origin_y;
|
||||||
|
|
||||||
|
int ibc_pos_x = pos_x / UVG_HASHMAP_BLOCKSIZE;
|
||||||
|
int ibc_pos_y = pos_y / UVG_HASHMAP_BLOCKSIZE;
|
||||||
|
|
||||||
|
bool full_block = true;
|
||||||
|
for (int ibc_x = 0; ibc_x < info->width / UVG_HASHMAP_BLOCKSIZE; ibc_x++) {
|
||||||
|
for (int ibc_y = 0; ibc_y < info->height / UVG_HASHMAP_BLOCKSIZE; ibc_y++) {
|
||||||
|
uint32_t neighbor_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash
|
||||||
|
[(ibc_pos_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_pos_x + ibc_x];
|
||||||
|
uint32_t other_crc = info->state->tile->frame->ibc_hashmap_pos_to_hash
|
||||||
|
[(ibc_origin_y+ibc_y) * info->state->tile->frame->ibc_hashmap_pos_to_hash_stride + ibc_origin_x + ibc_x];
|
||||||
|
if (other_crc != neighbor_crc) {
|
||||||
|
full_block = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!full_block) break;
|
||||||
|
}
|
||||||
|
if (full_block) check_mv_cost(info, mv_x, mv_y, best_cost, best_bits, best_mv);
|
||||||
|
}
|
||||||
|
result = result->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Go through candidates
|
// Go through candidates
|
||||||
for (int32_t i = 0; i < info->num_merge_cand; ++i) {
|
for (int32_t i = 0; i < info->num_merge_cand; ++i) {
|
||||||
if (info->merge_cand[i].dir == 3) continue;
|
if (info->merge_cand[i].dir == 3) continue;
|
||||||
|
|
|
@ -793,9 +793,83 @@ static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf)
|
||||||
|
{
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF];
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf)
|
||||||
|
{
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF];
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF];
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
|
{
|
||||||
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]);
|
||||||
|
return crc ^ 0xFFFFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
|
{
|
||||||
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride] + 4);
|
||||||
|
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride] + 4);
|
||||||
|
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride] + 4);
|
||||||
|
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride] + 4);
|
||||||
|
return crc ^ 0xFFFFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
|
{
|
||||||
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]);
|
||||||
|
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]);
|
||||||
|
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]);
|
||||||
|
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]);
|
||||||
|
return crc ^ 0xFFFFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
|
int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
|
||||||
{
|
{
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
if (bitdepth == 8) {
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_8bit_generic);
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic);
|
||||||
|
} else {
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
success &= uvg_strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic);
|
success &= uvg_strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic);
|
||||||
|
|
||||||
|
|
95
src/strategies/sse42/picture-sse42.c
Normal file
95
src/strategies/sse42/picture-sse42.c
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of uvg266 VVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2023, Tampere University, ITU/ISO/IEC, project contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
* are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||||
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include "global.h"
|
||||||
|
|
||||||
|
#if COMPILE_INTEL_SSE42
|
||||||
|
#include "uvg266.h"
|
||||||
|
|
||||||
|
#include "strategies/sse42/picture-sse42.h"
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "strategyselector.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static uint32_t uvg_crc32c_4x4_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
|
{
|
||||||
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
|
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[0 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[1 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[2 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u32(crc, *((uint32_t *)&buf[3 * pic_stride]));
|
||||||
|
return crc ^ 0xFFFFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
|
{
|
||||||
|
uint64_t crc = 0xFFFFFFFF;
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride]));
|
||||||
|
return (uint32_t)(crc ^ 0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
|
{
|
||||||
|
uint64_t crc = 0xFFFFFFFF;
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[4 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[5 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[6 * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[7 * pic_stride]));
|
||||||
|
return (uint32_t)(crc ^ 0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif //COMPILE_INTEL_SSE42
|
||||||
|
|
||||||
|
int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) {
|
||||||
|
bool success = true;
|
||||||
|
#if COMPILE_INTEL_SSE42
|
||||||
|
if (bitdepth == 8){
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_8bit_sse42);
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42);
|
||||||
|
} else {
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return success;
|
||||||
|
}
|
45
src/strategies/sse42/picture-sse42.h
Normal file
45
src/strategies/sse42/picture-sse42.h
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of uvg266 VVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022, Tampere University, ITU/ISO/IEC, project contributors
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
* are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||||
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \ingroup Optimization
|
||||||
|
* \file
|
||||||
|
* Optimizations for SSE4.2.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "global.h" // IWYU pragma: keep
|
||||||
|
#include "uvg266.h"
|
||||||
|
|
||||||
|
|
||||||
|
int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth);
|
|
@ -41,6 +41,8 @@
|
||||||
|
|
||||||
|
|
||||||
// Define function pointers.
|
// Define function pointers.
|
||||||
|
crc32c_4x4_func * uvg_crc32c_4x4 = 0;
|
||||||
|
crc32c_8x8_func * uvg_crc32c_8x8 = 0;
|
||||||
reg_sad_func * uvg_reg_sad = 0;
|
reg_sad_func * uvg_reg_sad = 0;
|
||||||
|
|
||||||
cost_pixel_nxn_func * uvg_sad_4x4 = 0;
|
cost_pixel_nxn_func * uvg_sad_4x4 = 0;
|
||||||
|
@ -83,6 +85,8 @@ pixel_var_func *uvg_pixel_var = 0;
|
||||||
generate_residual_func *uvg_generate_residual = 0;
|
generate_residual_func *uvg_generate_residual = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
|
@ -94,6 +98,9 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
||||||
if (uvg_g_hardware_flags.intel_flags.sse41) {
|
if (uvg_g_hardware_flags.intel_flags.sse41) {
|
||||||
success &= uvg_strategy_register_picture_sse41(opaque, bitdepth);
|
success &= uvg_strategy_register_picture_sse41(opaque, bitdepth);
|
||||||
}
|
}
|
||||||
|
if (uvg_g_hardware_flags.intel_flags.sse42) {
|
||||||
|
success &= uvg_strategy_register_picture_sse42(opaque, bitdepth);
|
||||||
|
}
|
||||||
if (uvg_g_hardware_flags.intel_flags.avx2) {
|
if (uvg_g_hardware_flags.intel_flags.avx2) {
|
||||||
success &= uvg_strategy_register_picture_avx2(opaque, bitdepth);
|
success &= uvg_strategy_register_picture_avx2(opaque, bitdepth);
|
||||||
}
|
}
|
||||||
|
@ -206,3 +213,50 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Precomputed CRC32C lookup table for polynomial 0x04C11DB7
|
||||||
|
const uint32_t uvg_crc_table[256] = {
|
||||||
|
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c,
|
||||||
|
0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
|
||||||
|
0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c,
|
||||||
|
0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
|
||||||
|
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc,
|
||||||
|
0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
|
||||||
|
0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512,
|
||||||
|
0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
|
||||||
|
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad,
|
||||||
|
0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
|
||||||
|
0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf,
|
||||||
|
0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
|
||||||
|
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f,
|
||||||
|
0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
|
||||||
|
0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f,
|
||||||
|
0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
|
||||||
|
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e,
|
||||||
|
0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
|
||||||
|
0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e,
|
||||||
|
0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
|
||||||
|
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de,
|
||||||
|
0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
|
||||||
|
0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4,
|
||||||
|
0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
|
||||||
|
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b,
|
||||||
|
0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
|
||||||
|
0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5,
|
||||||
|
0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
|
||||||
|
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975,
|
||||||
|
0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
|
||||||
|
0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905,
|
||||||
|
0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
|
||||||
|
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8,
|
||||||
|
0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
|
||||||
|
0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8,
|
||||||
|
0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
|
||||||
|
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78,
|
||||||
|
0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
|
||||||
|
0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6,
|
||||||
|
0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
|
||||||
|
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69,
|
||||||
|
0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
|
||||||
|
0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
|
||||||
|
};
|
|
@ -151,7 +151,16 @@ typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
|
||||||
|
|
||||||
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
|
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
|
||||||
|
|
||||||
|
|
||||||
|
extern const uint32_t uvg_crc_table[256];
|
||||||
|
|
||||||
|
typedef uint32_t(crc32c_4x4_func)(const uvg_pixel *buf, uint32_t pic_stride);
|
||||||
|
typedef uint32_t(crc32c_8x8_func)(const uvg_pixel *buf, uint32_t pic_stride);
|
||||||
|
|
||||||
// Declare function pointers.
|
// Declare function pointers.
|
||||||
|
extern crc32c_4x4_func * uvg_crc32c_4x4;
|
||||||
|
extern crc32c_8x8_func * uvg_crc32c_8x8;
|
||||||
|
|
||||||
extern reg_sad_func * uvg_reg_sad;
|
extern reg_sad_func * uvg_reg_sad;
|
||||||
|
|
||||||
extern cost_pixel_nxn_func * uvg_sad_4x4;
|
extern cost_pixel_nxn_func * uvg_sad_4x4;
|
||||||
|
@ -198,6 +207,8 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
|
||||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
|
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
|
||||||
|
|
||||||
#define STRATEGIES_PICTURE_EXPORTS \
|
#define STRATEGIES_PICTURE_EXPORTS \
|
||||||
|
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
|
||||||
|
{"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \
|
||||||
{"reg_sad", (void**) &uvg_reg_sad}, \
|
{"reg_sad", (void**) &uvg_reg_sad}, \
|
||||||
{"sad_4x4", (void**) &uvg_sad_4x4}, \
|
{"sad_4x4", (void**) &uvg_sad_4x4}, \
|
||||||
{"sad_8x8", (void**) &uvg_sad_8x8}, \
|
{"sad_8x8", (void**) &uvg_sad_8x8}, \
|
||||||
|
|
|
@ -543,6 +543,8 @@ typedef struct uvg_config
|
||||||
uint8_t dual_tree;
|
uint8_t dual_tree;
|
||||||
|
|
||||||
uint8_t intra_rough_search_levels;
|
uint8_t intra_rough_search_levels;
|
||||||
|
|
||||||
|
uint8_t ibc; /* \brief Intra Block Copy parameter */
|
||||||
} uvg_config;
|
} uvg_config;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -104,6 +104,8 @@ int uvg_videoframe_free(videoframe_t * const frame)
|
||||||
|
|
||||||
free(frame);
|
free(frame);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
#include "cu.h"
|
#include "cu.h"
|
||||||
#include "global.h" // IWYU pragma: keep
|
#include "global.h" // IWYU pragma: keep
|
||||||
#include "uvg266.h"
|
#include "uvg266.h"
|
||||||
|
#include "hashmap.h"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -77,12 +78,22 @@ typedef struct videoframe
|
||||||
struct param_set_map* alf_param_set_map;
|
struct param_set_map* alf_param_set_map;
|
||||||
|
|
||||||
int32_t poc; //!< \brief Picture order count
|
int32_t poc; //!< \brief Picture order count
|
||||||
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
|
|
||||||
|
|
||||||
|
uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row
|
||||||
|
uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row
|
||||||
|
uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row
|
||||||
|
uvg_hashmap_t **ibc_hashmap_row; //!< \brief Hashmap for IBC hash search for each LCU row
|
||||||
|
uint32_t *ibc_hashmap_pos_to_hash; //!< \brief Hashmap reverse search for position to hash
|
||||||
|
uint32_t ibc_hashmap_pos_to_hash_stride; //!< \brief Hashmap position to hash stride
|
||||||
|
cu_info_t* hmvp_lut_ibc; //!< \brief Look-up table for HMVP in IBC, one for each LCU row
|
||||||
|
uint8_t* hmvp_size_ibc; //!< \brief HMVP IBC LUT size
|
||||||
|
|
||||||
|
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
|
||||||
uint8_t* hmvp_size; //!< \brief HMVP LUT size
|
uint8_t* hmvp_size; //!< \brief HMVP LUT size
|
||||||
bool source_lmcs_mapped; //!< \brief Indicate if source_lmcs is available and mapped to LMCS
|
bool source_lmcs_mapped; //!< \brief Indicate if source_lmcs is available and mapped to LMCS
|
||||||
bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated
|
bool lmcs_top_level; //!< \brief Indicate that in this level the LMCS images are allocated
|
||||||
bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS
|
bool rec_lmcs_mapped; //!< \brief Indicate if rec_lmcs is available and mapped to LMCS
|
||||||
|
|
||||||
} videoframe_t;
|
} videoframe_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,3 +15,4 @@ valgrind_test $common_args --transform-skip --tr-skip-max-size=5
|
||||||
valgrind_test $common_args --vaq=8
|
valgrind_test $common_args --vaq=8
|
||||||
valgrind_test $common_args --vaq=8 --bitrate 350000
|
valgrind_test $common_args --vaq=8 --bitrate 350000
|
||||||
valgrind_test $common_args --vaq=8 --rc-algorithm oba --bitrate 350000
|
valgrind_test $common_args --vaq=8 --rc-algorithm oba --bitrate 350000
|
||||||
|
valgrind_test $common_args --ibc=1
|
Loading…
Reference in a new issue