mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Merge branch 'release-prep' into master
This commit is contained in:
commit
84580aebb0
|
@ -105,7 +105,7 @@ file(GLOB LIB_SOURCES RELATIVE ${PROJECT_SOURCE_DIR} "src/*.h" "src/*.c")
|
|||
list(REMOVE_ITEM LIB_SOURCES "src/encmain.c" "src/cli.c" "src/cli.h" "src/yuv_io.c" "src/yuv_io.h")
|
||||
|
||||
# Add also all the strategies
|
||||
file(GLOB_RECURSE LIB_SOURCES_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.c")
|
||||
file(GLOB_RECURSE LIB_SOURCES_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.h" "src/strategies/*.c")
|
||||
|
||||
# ToDo: do something with encode_coding_tree-avx2, currently not converted to VVC
|
||||
list(REMOVE_ITEM LIB_SOURCES_STRATEGIES "src/strategies/avx2/encode_coding_tree-avx2.c")
|
||||
|
@ -340,6 +340,9 @@ if(NOT DEFINED MSVC)
|
|||
if(NOT "test_external_symbols" IN_LIST XFAIL)
|
||||
add_test( NAME test_external_symbols COMMAND ${PROJECT_SOURCE_DIR}/tests/test_external_symbols.sh WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
|
||||
endif()
|
||||
if(NOT "test_mtt" IN_LIST XFAIL)
|
||||
add_test( NAME test_mtt COMMAND ${PROJECT_SOURCE_DIR}/tests/test_mtt.sh WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
|
||||
endif()
|
||||
if(NOT "test_intra" IN_LIST XFAIL)
|
||||
add_test( NAME test_intra COMMAND ${PROJECT_SOURCE_DIR}/tests/test_intra.sh WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
|
||||
endif()
|
||||
|
|
|
@ -77,6 +77,8 @@ typedef struct
|
|||
cabac_ctx_t mts_idx_model[4];
|
||||
cabac_ctx_t split_flag_model[9]; //!< \brief split flag context models
|
||||
cabac_ctx_t qt_split_flag_model[6]; //!< \brief qt split flag context models
|
||||
cabac_ctx_t mtt_vertical_model[5];
|
||||
cabac_ctx_t mtt_binary_model[4];
|
||||
cabac_ctx_t intra_luma_mpm_flag_model; //!< \brief intra mode context models
|
||||
cabac_ctx_t intra_subpart_model[2]; //!< \brief intra sub part context models
|
||||
cabac_ctx_t chroma_pred_model;
|
||||
|
|
100
src/cfg.c
100
src/cfg.c
|
@ -80,7 +80,6 @@ int uvg_config_init(uvg_config *cfg)
|
|||
cfg->trskip_max_size = 2; //Default to 4x4
|
||||
cfg->mts = 0;
|
||||
cfg->mts_implicit = 0;
|
||||
cfg->tr_depth_intra = 0;
|
||||
cfg->ime_algorithm = 0; /* hexbs */
|
||||
cfg->fme_level = 4;
|
||||
cfg->source_scan_type = 0; /* progressive */
|
||||
|
@ -207,6 +206,8 @@ int uvg_config_init(uvg_config *cfg)
|
|||
|
||||
cfg->lfnst = false;
|
||||
|
||||
cfg->isp = false;
|
||||
|
||||
parse_qp_map(cfg, 0);
|
||||
|
||||
cfg->jccr = 0;
|
||||
|
@ -221,10 +222,27 @@ int uvg_config_init(uvg_config *cfg)
|
|||
cfg->cabac_debug_file_name = NULL;
|
||||
|
||||
cfg->dual_tree = 0;
|
||||
|
||||
cfg->min_qt_size[0] = 4;
|
||||
cfg->min_qt_size[1] = 4;
|
||||
cfg->min_qt_size[2] = 4;
|
||||
|
||||
cfg->max_btt_depth[0] = 0;
|
||||
cfg->max_btt_depth[1] = 0;
|
||||
cfg->max_btt_depth[2] = 0;
|
||||
|
||||
cfg->max_tt_size[0] = 64;
|
||||
cfg->max_bt_size[0] = 64;
|
||||
cfg->max_tt_size[1] = 64;
|
||||
cfg->max_bt_size[1] = 64;
|
||||
cfg->max_tt_size[2] = 64;
|
||||
cfg->max_bt_size[2] = 64;
|
||||
|
||||
cfg->intra_rough_search_levels = 2;
|
||||
|
||||
cfg->ibc = 0;
|
||||
|
||||
cfg->dep_quant = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -333,7 +351,7 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil
|
|||
|
||||
return 1;
|
||||
}
|
||||
/*
|
||||
|
||||
static int parse_uint8(const char *numstr,uint8_t* number,int min, int max)
|
||||
{
|
||||
char *tail;
|
||||
|
@ -349,7 +367,7 @@ static int parse_uint8(const char *numstr,uint8_t* number,int min, int max)
|
|||
return 1;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
static int parse_int8(const char *numstr,int8_t* number,int min, int max)
|
||||
{
|
||||
char *tail;
|
||||
|
@ -365,7 +383,7 @@ static int parse_int8(const char *numstr,int8_t* number,int min, int max)
|
|||
return 1;
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
||||
static int parse_array(const char *array, uint8_t *coeff_key, int size,
|
||||
int min, int max)
|
||||
{
|
||||
|
@ -389,15 +407,15 @@ static int parse_array(const char *array, uint8_t *coeff_key, int size,
|
|||
free(key);
|
||||
return 0;
|
||||
}
|
||||
else if (i<size){
|
||||
fprintf(stderr, "parsing failed : too few members.\n");
|
||||
//else if (i<size){
|
||||
// fprintf(stderr, "parsing failed : too few members.\n");
|
||||
// free(key);
|
||||
// return 0;
|
||||
//}
|
||||
free(key);
|
||||
return 0;
|
||||
return i;
|
||||
}
|
||||
free(key);
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
static int parse_qp_scale_array(const char *array, int8_t *out)
|
||||
{
|
||||
|
@ -928,8 +946,6 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
|||
cfg->mts = mts_type;
|
||||
cfg->mts_implicit = (mts_type == UVG_MTS_IMPLICIT);
|
||||
}
|
||||
else if OPT("tr-depth-intra")
|
||||
cfg->tr_depth_intra = atoi(value);
|
||||
else if OPT("me") {
|
||||
int8_t ime_algorithm = 0;
|
||||
if (!parse_enum(value, me_names, &ime_algorithm)) return 0;
|
||||
|
@ -1454,6 +1470,9 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
|||
else if OPT("lfnst") {
|
||||
cfg->lfnst = atobool(value);
|
||||
}
|
||||
else if OPT("isp") {
|
||||
cfg->isp = atobool(value);
|
||||
}
|
||||
else if OPT("jccr") {
|
||||
cfg->jccr = (bool)atobool(value);
|
||||
}
|
||||
|
@ -1479,6 +1498,49 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
|||
else if OPT("dual-tree") {
|
||||
cfg->dual_tree = atobool(value);
|
||||
}
|
||||
else if OPT("mtt-depth-intra") {
|
||||
cfg->max_btt_depth[0] = atoi(value);
|
||||
}
|
||||
else if OPT("mtt-depth-intra-chroma") {
|
||||
cfg->max_btt_depth[2] = atoi(value);
|
||||
}
|
||||
else if OPT("mtt-depth-inter") {
|
||||
cfg->max_btt_depth[1] = atoi(value);
|
||||
}
|
||||
else if OPT("max-bt-size") {
|
||||
uint8_t sizes[3];
|
||||
const int got = parse_array(value, sizes, 3, 0, 128);
|
||||
if (got == 1) {
|
||||
cfg->max_bt_size[0] = sizes[0];
|
||||
cfg->max_bt_size[1] = sizes[0];
|
||||
cfg->max_bt_size[2] = sizes[0];
|
||||
}
|
||||
else if (got == 3) {
|
||||
cfg->max_bt_size[0] = sizes[0];
|
||||
cfg->max_bt_size[1] = sizes[1];
|
||||
cfg->max_bt_size[2] = sizes[2];
|
||||
} else {
|
||||
fprintf(stderr, "Incorrect amount of values provided for max-bt-size\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if OPT("max-tt-size") {
|
||||
uint8_t sizes[3];
|
||||
const int got = parse_array(value, sizes, 3, 0, 128);
|
||||
if (got == 1) {
|
||||
cfg->max_tt_size[0] = sizes[0];
|
||||
cfg->max_tt_size[1] = sizes[0];
|
||||
cfg->max_tt_size[2] = sizes[0];
|
||||
}
|
||||
else if (got == 3) {
|
||||
cfg->max_tt_size[0] = sizes[0];
|
||||
cfg->max_tt_size[1] = sizes[1];
|
||||
cfg->max_tt_size[2] = sizes[2];
|
||||
} else {
|
||||
fprintf(stderr, "Incorrect amount of values provided for max-tt-size\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if OPT("intra-rough-granularity") {
|
||||
cfg->intra_rough_search_levels = atoi(value);
|
||||
}
|
||||
|
@ -1489,7 +1551,11 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value)
|
|||
return 0;
|
||||
}
|
||||
cfg->ibc = (uint8_t)ibc_value;
|
||||
} else {
|
||||
}
|
||||
else if OPT("dep-quant") {
|
||||
cfg->dep_quant = (bool)atobool(value);
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
#undef OPT
|
||||
|
@ -1681,12 +1747,6 @@ int uvg_config_validate(const uvg_config *const cfg)
|
|||
error = 1;
|
||||
}
|
||||
|
||||
if (cfg->tr_depth_intra < 0 || cfg->tr_depth_intra > 4) {
|
||||
// range is 0 .. CtbLog2SizeY - Log2MinTrafoSize
|
||||
fprintf(stderr, "Input error: --tr-depth-intra is out of range [0..4]\n");
|
||||
error = 1;
|
||||
}
|
||||
|
||||
if (cfg->fme_level != 0 && cfg->fme_level > 4) {
|
||||
fprintf(stderr, "Input error: invalid --subme parameter (must be in range 0-4)\n");
|
||||
error = 1;
|
||||
|
|
39
src/cli.c
39
src/cli.c
|
@ -76,7 +76,6 @@ static const struct option long_options[] = {
|
|||
{ "tr-skip-max-size", required_argument, NULL, 0 },
|
||||
{ "mts", required_argument, NULL, 0 },
|
||||
{ "no-mts", no_argument, NULL, 0 },
|
||||
{ "tr-depth-intra", required_argument, NULL, 0 },
|
||||
{ "me", required_argument, NULL, 0 },
|
||||
{ "subme", required_argument, NULL, 0 },
|
||||
{ "source-scan-type", required_argument, NULL, 0 },
|
||||
|
@ -178,6 +177,8 @@ static const struct option long_options[] = {
|
|||
{ "no-mip", no_argument, NULL, 0 },
|
||||
{ "lfnst", no_argument, NULL, 0 },
|
||||
{ "no-lfnst", no_argument, NULL, 0 },
|
||||
{ "isp", no_argument, NULL, 0 },
|
||||
{ "no-isp", no_argument, NULL, 0 },
|
||||
{ "jccr", no_argument, NULL, 0 },
|
||||
{ "no-jccr", no_argument, NULL, 0 },
|
||||
{ "amvr", no_argument, NULL, 0 },
|
||||
|
@ -191,8 +192,15 @@ static const struct option long_options[] = {
|
|||
{ "dual-tree", no_argument, NULL, 0 },
|
||||
{ "no-dual-tree", no_argument, NULL, 0 },
|
||||
{ "cabac-debug-file", required_argument, NULL, 0 },
|
||||
{ "mtt-depth-intra", required_argument, NULL, 0 },
|
||||
{ "mtt-depth-inter", required_argument, NULL, 0 },
|
||||
{ "mtt-depth-intra-chroma", required_argument, NULL, 0 },
|
||||
{ "max-bt-size", required_argument, NULL, 0 },
|
||||
{ "max-tt-size", required_argument, NULL, 0 },
|
||||
{ "intra-rough-granularity",required_argument, NULL, 0 },
|
||||
{ "ibc", required_argument, NULL, 0 },
|
||||
{ "dep-quant", no_argument, NULL, 0 },
|
||||
{ "no-dep-quant", no_argument, NULL, 0 },
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
@ -571,6 +579,7 @@ void print_help(void)
|
|||
" - full: Full ALF\n"
|
||||
" --(no-)rdoq : Rate-distortion optimized quantization [enabled]\n"
|
||||
" --(no-)rdoq-skip : Skip RDOQ for 4x4 blocks. [disabled]\n"
|
||||
" --(no-)dep-quant : Use dependent quantization. [disabled]\n"
|
||||
" --(no-)signhide : Sign hiding [disabled]\n"
|
||||
" --rd <integer> : Intra mode search complexity [0]\n"
|
||||
" - 0: Skip intra if inter is good enough.\n"
|
||||
|
@ -602,14 +611,14 @@ void print_help(void)
|
|||
" - 2: + 1/2-pixel diagonal\n"
|
||||
" - 3: + 1/4-pixel horizontal and vertical\n"
|
||||
" - 4: + 1/4-pixel diagonal\n"
|
||||
" --pu-depth-inter <int>-<int> : Inter prediction units sizes [0-3]\n"
|
||||
" - 0, 1, 2, 3: from 64x64 to 8x8\n"
|
||||
" --pu-depth-inter <int>-<int> : Maximum and minimum split depths where\n"
|
||||
" inter search is performed 0..8. [0-3]\n"
|
||||
" - Accepts a list of values separated by ','\n"
|
||||
" for setting separate depths per GOP layer\n"
|
||||
" (values can be omitted to use the first\n"
|
||||
" value for the respective layer).\n"
|
||||
" --pu-depth-intra <int>-<int> : Intra prediction units sizes [1-4]\n"
|
||||
" - 0, 1, 2, 3, 4: from 64x64 to 4x4\n"
|
||||
" --pu-depth-intra <int>-<int> : Maximum and minimum split depths where\n"
|
||||
" intra search is performed 0..8. [1-4]\n"
|
||||
" - Accepts a list of values separated by ','\n"
|
||||
" for setting separate depths per GOP layer\n"
|
||||
" (values can be omitted to use the first\n"
|
||||
|
@ -617,6 +626,22 @@ void print_help(void)
|
|||
" --ml-pu-depth-intra : Predict the pu-depth-intra using machine\n"
|
||||
" learning trees, overrides the\n"
|
||||
" --pu-depth-intra parameter. [disabled]\n"
|
||||
" --mtt-depth-intra : Depth of mtt for intra slices 0..3.[0]\n"
|
||||
" --mtt-depth-intra-chroma : Depth of mtt for chroma dual tree in\n"
|
||||
" intra slices 0..3.[0]\n"
|
||||
" --mtt-depth-inter : Depth of mtt for inter slices 0..3.[0]\n"
|
||||
" All MTTs are currently experimental and\n"
|
||||
" require disabling some avx2 optimizations.\n"
|
||||
" --max-bt-size : maximum size for a CU resulting from\n"
|
||||
" a bt split. A singular value shared for all\n"
|
||||
" or a list of three values for the different\n"
|
||||
" slices types (intra, inter, intra-chroma)\n"
|
||||
" can be provided. [64, 64, 32]\n"
|
||||
" --max-tt-size : maximum size for a CU resulting from\n"
|
||||
" a tt split. A singular value shared for all\n"
|
||||
" or a list of three values for the different\n"
|
||||
" slices types (intra, inter, intra-chroma)\n"
|
||||
" can be provided. [64, 64, 32]\n"
|
||||
" --intra-rough-granularity : How many levels are used for the\n"
|
||||
" logarithmic intra rough search. 0..4\n"
|
||||
" With 0 all of the modes are checked \n"
|
||||
|
@ -634,7 +659,6 @@ void print_help(void)
|
|||
" This is mostly for debugging and is not\n"
|
||||
" guaranteed to produce sensible bitstream or\n"
|
||||
" work at all. [disabled]\n"
|
||||
" --tr-depth-intra <int> : Transform split depth for intra blocks [0]\n"
|
||||
" --(no-)bipred : Bi-prediction [disabled]\n"
|
||||
" --cu-split-termination <string> : CU split search termination [zero]\n"
|
||||
" - off: Don't terminate early.\n"
|
||||
|
@ -671,6 +695,9 @@ void print_help(void)
|
|||
" --(no-)mip : Enable matrix weighted intra prediction.\n"
|
||||
" --(no-)lfnst : Enable low frequency non-separable transform.\n"
|
||||
" [disabled]\n"
|
||||
" --(no-)isp : Enable intra sub partitions. [disabled]\n"
|
||||
" Experimental, requires disabling some avx2\n"
|
||||
" optimizations.\n"
|
||||
" --mts <string> : Multiple Transform Selection [off].\n"
|
||||
" (Currently only implemented for intra\n"
|
||||
" and has effect only when rd >= 2)\n"
|
||||
|
|
|
@ -50,6 +50,21 @@ static const uint8_t INIT_QT_SPLIT_FLAG[4][6] = {
|
|||
{ 0, 8, 8, 12, 12, 8, },
|
||||
};
|
||||
|
||||
|
||||
static const uint8_t INIT_VERTICAL_SPLIT_FLAG[4][5] = {
|
||||
{ 43, 42, 37, 42, 44, },
|
||||
{ 43, 35, 37, 34, 52, },
|
||||
{ 43, 42, 29, 27, 44, },
|
||||
{ 9, 8, 9, 8, 5, },
|
||||
};
|
||||
|
||||
static const uint8_t INIT_BINARY_SPLIT_FLAG[4][4] = {
|
||||
{ 28, 29, 28, 29, },
|
||||
{ 43, 37, 21, 22, },
|
||||
{ 36, 45, 36, 45, },
|
||||
{ 12, 13, 12, 13, },
|
||||
};
|
||||
|
||||
static const uint8_t INIT_SKIP_FLAG[4][3] = {
|
||||
{ 57, 60, 46, },
|
||||
{ 57, 59, 45, },
|
||||
|
@ -574,6 +589,11 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
|
|||
uvg_ctx_init(&cabac->ctx.part_size_model[i], QP, INIT_PART_SIZE[slice][i], INIT_PART_SIZE[3][i]);
|
||||
uvg_ctx_init(&cabac->ctx.bdpcm_mode[i], QP, BDPCM_MODE_INIT[slice][i], BDPCM_MODE_INIT[3][i]);
|
||||
uvg_ctx_init(&cabac->ctx.qt_cbf_model_luma[i], QP, INIT_QT_CBF[slice][i], INIT_QT_CBF[3][i]);
|
||||
uvg_ctx_init(&cabac->ctx.mtt_binary_model[i], QP, INIT_BINARY_SPLIT_FLAG[slice][i], INIT_BINARY_SPLIT_FLAG[3][i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
uvg_ctx_init(&cabac->ctx.mtt_vertical_model[i], QP, INIT_VERTICAL_SPLIT_FLAG[slice][i], INIT_VERTICAL_SPLIT_FLAG[3][i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
|
@ -618,13 +638,14 @@ void uvg_context_copy(encoder_state_t * const target_state, const encoder_state_
|
|||
uint32_t uvg_context_get_sig_coeff_group( uint32_t *sig_coeff_group_flag,
|
||||
uint32_t pos_x,
|
||||
uint32_t pos_y,
|
||||
int32_t width)
|
||||
int32_t width,
|
||||
int32_t height)
|
||||
{
|
||||
uint32_t uiRight = 0;
|
||||
uint32_t uiLower = 0;
|
||||
uint32_t position = pos_y * width + pos_x;
|
||||
if (pos_x + 1 < (uint32_t)width) uiRight = sig_coeff_group_flag[position + 1];
|
||||
if (pos_y + 1 < (uint32_t)width) uiLower = sig_coeff_group_flag[position + width];
|
||||
if (pos_y + 1 < (uint32_t)height) uiLower = sig_coeff_group_flag[position + width];
|
||||
|
||||
return uiRight || uiLower;
|
||||
}
|
||||
|
@ -656,7 +677,7 @@ uint32_t uvg_context_get_sig_coeff_group_ts(uint32_t* sig_coeff_group_flag,
|
|||
* \returns context index for current scan position
|
||||
*/
|
||||
uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
||||
uint32_t height, uint32_t width, int8_t type,
|
||||
uint32_t width, uint32_t height, int8_t color,
|
||||
int32_t* temp_diag, int32_t* temp_sum)
|
||||
{
|
||||
const coeff_t* data = coeff + pos_x + pos_y * width;
|
||||
|
@ -686,7 +707,7 @@ uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, u
|
|||
}
|
||||
#undef UPDATE
|
||||
int ctx_ofs = MIN((sum_abs+1)>>1, 3) + (diag < 2 ? 4 : 0);
|
||||
if (type == 0 /* Luma */)
|
||||
if (color == COLOR_Y)
|
||||
{
|
||||
ctx_ofs += diag < 5 ? 4 : 0;
|
||||
}
|
||||
|
@ -814,7 +835,7 @@ unsigned uvg_lrg1_ctx_id_abs_ts(const coeff_t* coeff, int32_t pos_x, int32_t pos
|
|||
* \returns context go rice parameter
|
||||
*/
|
||||
uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
||||
uint32_t height, uint32_t width, uint32_t baselevel)
|
||||
uint32_t width, uint32_t height, uint32_t baselevel)
|
||||
{
|
||||
#define UPDATE(x) sum+=abs(x)/*-(x?1:0)*/
|
||||
|
||||
|
@ -856,8 +877,8 @@ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
|||
* \returns context go rice parameter
|
||||
*/
|
||||
uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
||||
uint32_t height, uint32_t width, uint32_t baselevel)
|
||||
uint32_t width, uint32_t height, uint32_t baselevel)
|
||||
{
|
||||
uint32_t check = uvg_abs_sum(coeff, pos_x, pos_y, height, width, baselevel);
|
||||
uint32_t check = uvg_abs_sum(coeff, pos_x, pos_y, width, height, baselevel);
|
||||
return g_go_rice_pars[check];
|
||||
}
|
|
@ -49,10 +49,10 @@ void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice);
|
|||
|
||||
void uvg_context_copy(encoder_state_t * target_state, const encoder_state_t * source_state);
|
||||
|
||||
uint32_t uvg_context_get_sig_coeff_group( uint32_t *sig_coeff_group_flag,uint32_t pos_x, uint32_t pos_y,int32_t width);
|
||||
uint32_t uvg_context_get_sig_coeff_group( uint32_t *sig_coeff_group_flag,uint32_t pos_x, uint32_t pos_y,int32_t width, int32_t height);
|
||||
uint32_t uvg_context_get_sig_coeff_group_ts(uint32_t* sig_coeff_group_flag, uint32_t pos_x, uint32_t pos_y, int32_t width);
|
||||
uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
||||
uint32_t height, uint32_t width, int8_t type,
|
||||
uint32_t width, uint32_t height, int8_t type,
|
||||
int32_t* temp_diag, int32_t* temp_sum);
|
||||
|
||||
uint32_t uvg_context_get_sig_ctx_idx_abs_ts(const coeff_t* coeff, int32_t pos_x, int32_t pos_y,
|
||||
|
@ -66,7 +66,7 @@ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
|||
uint32_t height, uint32_t width, uint32_t baselevel);
|
||||
|
||||
uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
||||
uint32_t height, uint32_t width, uint32_t baselevel);
|
||||
uint32_t width, uint32_t height, uint32_t baselevel);
|
||||
|
||||
#define CNU 35
|
||||
#define DWS 8
|
||||
|
|
255
src/cu.c
255
src/cu.c
|
@ -34,6 +34,9 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
#include "cu.h"
|
||||
|
||||
#include "alf.h"
|
||||
#include "encoderstate.h"
|
||||
#include "threads.h"
|
||||
|
||||
|
||||
|
@ -97,6 +100,42 @@ cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px)
|
|||
}
|
||||
|
||||
|
||||
void uvg_get_isp_cu_arr_coords(int *x, int *y, int dim)
|
||||
{
|
||||
// Do nothing if dimensions are divisible by 4
|
||||
if (*y % 4 == 0 && *x % 4 == 0) return;
|
||||
const int remainder_y = *y % 4;
|
||||
const int remainder_x = *x % 4;
|
||||
|
||||
if (remainder_y != 0) {
|
||||
// Horizontal ISP split
|
||||
if (remainder_y % 2 == 0 && dim == 8) {
|
||||
// 8x2 block
|
||||
*y -= 2;
|
||||
*x += 4;
|
||||
}
|
||||
else {
|
||||
// 16x1 block
|
||||
*y -= remainder_y;
|
||||
*x += remainder_y * 4;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Vertical ISP split
|
||||
if (*x % 2 == 0 && dim == 8) {
|
||||
// 2x8 block
|
||||
*y += 4;
|
||||
*x -= 2;
|
||||
}
|
||||
else {
|
||||
// 1x16 block
|
||||
*y += remainder_x * 4;
|
||||
*x -= remainder_x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px)
|
||||
{
|
||||
assert(x_px < cua->width);
|
||||
|
@ -237,10 +276,10 @@ cu_array_t * uvg_cu_array_copy_ref(cu_array_t* cua)
|
|||
* \param dst_y y-coordinate of the top edge of the copied area in dst
|
||||
* \param src source lcu
|
||||
*/
|
||||
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type tree_type)
|
||||
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src)
|
||||
{
|
||||
const int dst_stride = dst->stride >> 2;
|
||||
const int width = tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C;
|
||||
const int width = LCU_WIDTH;
|
||||
for (int y = 0; y < width; y += SCU_WIDTH) {
|
||||
for (int x = 0; x < width; x += SCU_WIDTH) {
|
||||
const cu_info_t *from_cu = LCU_GET_CU_AT_PX(src, x, y);
|
||||
|
@ -251,3 +290,215 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief Constructs cu_loc_t based on given parameters. Calculates chroma dimensions automatically.
|
||||
*
|
||||
* \param loc Destination cu_loc.
|
||||
* \param x Block top left x coordinate.
|
||||
* \param y Block top left y coordinate.
|
||||
* \param width Block width.
|
||||
* \param height Block height.
|
||||
*/
|
||||
void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
|
||||
{
|
||||
assert(x >= 0 && y >= 0 && width >= 0 && height >= 0 && "Cannot give negative coordinates or block dimensions.");
|
||||
assert(!(width > LCU_WIDTH || height > LCU_WIDTH) && "Luma CU dimension exceeds maximum (dim > LCU_WIDTH).");
|
||||
// This check is no longer valid. With non-square blocks and ISP enabled, even 1x16 and 16x1 (ISP needs at least 16 samples) blocks are valid
|
||||
//assert(!(width < 4 || height < 4) && "Luma CU dimension smaller than 4.");
|
||||
|
||||
loc->x = x;
|
||||
loc->y = y;
|
||||
loc->local_x = x % LCU_WIDTH;
|
||||
loc->local_y = y % LCU_WIDTH;
|
||||
loc->width = width;
|
||||
loc->height = height;
|
||||
// TODO: when MTT is implemented, chroma dimensions can be minimum 2.
|
||||
// Chroma width is half of luma width, when not at maximum depth.
|
||||
loc->chroma_width = width >> 1;
|
||||
loc->chroma_height = height >> 1;
|
||||
}
|
||||
|
||||
|
||||
int uvg_get_split_locs(
|
||||
const cu_loc_t* const origin,
|
||||
enum split_type split,
|
||||
cu_loc_t out[4],
|
||||
uint8_t* separate_chroma)
|
||||
{
|
||||
const int half_width = origin->width >> 1;
|
||||
const int half_height = origin->height >> 1;
|
||||
const int quarter_width = origin->width >> 2;
|
||||
const int quarter_height = origin->height >> 2;
|
||||
if (origin->width == 4 && separate_chroma) *separate_chroma = 1;
|
||||
|
||||
switch (split) {
|
||||
case NO_SPLIT:
|
||||
assert(0 && "trying to get split from no split");
|
||||
break;
|
||||
case QT_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, half_height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, half_height);
|
||||
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + half_height, half_width, half_height);
|
||||
uvg_cu_loc_ctor(&out[3], origin->x + half_width, origin->y + half_height, half_width, half_height);
|
||||
if (half_height == 4 && separate_chroma) *separate_chroma = 1;
|
||||
return 4;
|
||||
case BT_HOR_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, half_height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + half_height, origin->width, half_height);
|
||||
if (half_height * origin->width < 64 && separate_chroma) *separate_chroma = 1;
|
||||
return 2;
|
||||
case BT_VER_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height);
|
||||
if ((half_width == 4 || half_width * origin->height < 64) && separate_chroma) *separate_chroma = 1;
|
||||
return 2;
|
||||
case TT_HOR_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x, origin->y + quarter_height, origin->width, half_height);
|
||||
uvg_cu_loc_ctor(&out[2], origin->x, origin->y + quarter_height + half_height, origin->width, quarter_height);
|
||||
if (quarter_height * origin->width < 64 && separate_chroma) *separate_chroma = 1;
|
||||
return 3;
|
||||
case TT_VER_SPLIT:
|
||||
uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height);
|
||||
uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height);
|
||||
uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height);
|
||||
if ((quarter_width == 4 || quarter_width * origin->height < 64) && separate_chroma) *separate_chroma = 1;
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int uvg_get_implicit_split(
|
||||
const encoder_state_t* const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
uint8_t max_mtt_depth)
|
||||
{
|
||||
bool right_ok = (state->tile->frame->width) >= cu_loc->x + cu_loc->width;
|
||||
bool bottom_ok = (state->tile->frame->height) >= cu_loc->y + cu_loc->height;
|
||||
|
||||
if (right_ok && bottom_ok) return NO_SPLIT;
|
||||
if (right_ok && max_mtt_depth != 0) return BT_HOR_SPLIT;
|
||||
if (bottom_ok && max_mtt_depth != 0) return BT_VER_SPLIT;
|
||||
return QT_SPLIT;
|
||||
}
|
||||
|
||||
|
||||
int uvg_get_possible_splits(const encoder_state_t * const state,
|
||||
const cu_loc_t * const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6])
|
||||
{
|
||||
const unsigned width = cu_loc->width;
|
||||
const unsigned height = cu_loc->height;
|
||||
const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1;
|
||||
|
||||
const unsigned max_btd =
|
||||
state->encoder_control->cfg.max_btt_depth[slice_type] + split_tree.implicit_mtt_depth;
|
||||
const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type];
|
||||
const unsigned min_bt_size = 1 << MIN_SIZE;
|
||||
const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type];
|
||||
const unsigned min_tt_size = 1 << MIN_SIZE;
|
||||
const unsigned min_qt_size = state->encoder_control->cfg.min_qt_size[slice_type];
|
||||
|
||||
const enum split_type implicitSplit = uvg_get_implicit_split(state, cu_loc, max_btd);
|
||||
|
||||
splits[NO_SPLIT] = splits[QT_SPLIT] = splits[BT_HOR_SPLIT] = splits[TT_HOR_SPLIT] = splits[BT_VER_SPLIT] = splits[TT_VER_SPLIT] = true;
|
||||
bool can_btt = split_tree.mtt_depth < max_btd;
|
||||
|
||||
const enum split_type last_split = GET_SPLITDATA(&split_tree, split_tree.current_depth - 1);
|
||||
const enum split_type parl_split = last_split == TT_HOR_SPLIT ? BT_HOR_SPLIT : BT_VER_SPLIT;
|
||||
|
||||
// don't allow QT-splitting below a BT split
|
||||
if (split_tree.current_depth != 0 && last_split != QT_SPLIT /* && !(width > 64 || height > 64)*/) splits[QT_SPLIT] = false;
|
||||
if (width <= min_qt_size) splits[QT_SPLIT] = false;
|
||||
|
||||
if (tree_type == UVG_CHROMA_T && width <= 8) splits[QT_SPLIT] = false;
|
||||
|
||||
if (implicitSplit != NO_SPLIT)
|
||||
{
|
||||
splits[NO_SPLIT] = splits[TT_HOR_SPLIT] = splits[TT_VER_SPLIT] = false;
|
||||
|
||||
splits[BT_HOR_SPLIT] = implicitSplit == BT_HOR_SPLIT && height <= max_bt_size;
|
||||
splits[BT_VER_SPLIT] = implicitSplit == BT_VER_SPLIT && width <= max_bt_size;
|
||||
if (tree_type == UVG_CHROMA_T && width <= 8) splits[BT_VER_SPLIT] = false;
|
||||
if (!splits[BT_HOR_SPLIT] && !splits[BT_VER_SPLIT] && !splits[QT_SPLIT]) splits[QT_SPLIT] = true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((last_split == TT_HOR_SPLIT || last_split == TT_VER_SPLIT) && split_tree.part_index == 1)
|
||||
{
|
||||
splits[BT_HOR_SPLIT] = parl_split != BT_HOR_SPLIT;
|
||||
splits[BT_VER_SPLIT] = parl_split != BT_VER_SPLIT;
|
||||
}
|
||||
|
||||
if (can_btt && (width <= min_bt_size && height <= min_bt_size)
|
||||
&& ((width <= min_tt_size && height <= min_tt_size)))
|
||||
{
|
||||
can_btt = false;
|
||||
}
|
||||
if (can_btt && (width > max_bt_size || height > max_bt_size)
|
||||
&& ((width > max_tt_size || height > max_tt_size)))
|
||||
{
|
||||
can_btt = false;
|
||||
}
|
||||
|
||||
if (!can_btt)
|
||||
{
|
||||
splits[BT_HOR_SPLIT] = splits[TT_HOR_SPLIT] = splits[BT_VER_SPLIT] = splits[TT_VER_SPLIT] = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (width > max_bt_size || height > max_bt_size)
|
||||
{
|
||||
splits[BT_HOR_SPLIT] = splits[BT_VER_SPLIT] = false;
|
||||
}
|
||||
|
||||
// specific check for BT splits
|
||||
if (height <= min_bt_size) splits[BT_HOR_SPLIT] = false;
|
||||
if (width > 64 && height <= 64) splits[BT_HOR_SPLIT] = false;
|
||||
if (tree_type == UVG_CHROMA_T && width * height <= 64) splits[BT_HOR_SPLIT] = false;
|
||||
|
||||
if (width <= min_bt_size) splits[BT_VER_SPLIT] = false;
|
||||
if (width <= 64 && height > 64) splits[BT_VER_SPLIT] = false;
|
||||
if (tree_type == UVG_CHROMA_T && (width * height <= 64 || width <= 8)) splits[BT_VER_SPLIT] = false;
|
||||
|
||||
//if (modeType == MODE_TYPE_INTER && width * height == 32) splits[BT_VER_SPLIT] = splits[BT_HOR_SPLIT] = false;
|
||||
|
||||
if (height <= 2 * min_tt_size || height > max_tt_size || width > max_tt_size)
|
||||
splits[TT_HOR_SPLIT] = false;
|
||||
if (width > 64 || height > 64) splits[TT_HOR_SPLIT] = false;
|
||||
if (tree_type == UVG_CHROMA_T && width * height <= 64 * 2) splits[TT_HOR_SPLIT] = false;
|
||||
|
||||
if (width <= 2 * min_tt_size || width > max_tt_size || height > max_tt_size)
|
||||
splits[TT_VER_SPLIT] = false;
|
||||
if (width > 64 || height > 64) splits[TT_VER_SPLIT] = false;
|
||||
if (tree_type == UVG_CHROMA_T && (width * height <= 64 * 2 || width <= 16)) splits[TT_VER_SPLIT] = false;
|
||||
|
||||
//if (modeType == MODE_TYPE_INTER && width * height == 64) splits[TT_VER_SPLIT] = splits[TT_HOR_SPLIT] = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left)
|
||||
{
|
||||
if ((left && cu_loc->x == 0) || (!left && cu_loc->y == 0)) {
|
||||
return 0;
|
||||
}
|
||||
if (left && cu_loc->local_x == 0) return (LCU_WIDTH - cu_loc->local_y) / 4;
|
||||
if (!left && cu_loc->local_y == 0) return (cu_loc->width) / 2;
|
||||
|
||||
int amount = left ? cu_loc->height & ~3 : cu_loc->width & ~3;
|
||||
if(left) {
|
||||
const cu_info_t* cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y);
|
||||
if (cu_loc->local_y == 0 && cu_loc->local_x == 32 && cu->log2_height == 6 && cu->log2_width == 6) return 8;
|
||||
while (cu_loc->local_y + amount < LCU_WIDTH && LCU_GET_CU_AT_PX(lcu, cu_loc->local_x - TR_MIN_WIDTH, cu_loc->local_y + amount)->type != CU_NOTSET) {
|
||||
amount += TR_MIN_WIDTH;
|
||||
}
|
||||
return MAX(amount / TR_MIN_WIDTH, cu_loc->height / TR_MIN_WIDTH);
|
||||
}
|
||||
while (cu_loc->local_x + amount < LCU_WIDTH && LCU_GET_CU_AT_PX(lcu, cu_loc->local_x + amount, cu_loc->local_y - TR_MIN_WIDTH)->type != CU_NOTSET) {
|
||||
amount += TR_MIN_WIDTH;
|
||||
}
|
||||
return MAX(amount / TR_MIN_WIDTH, cu_loc->width / TR_MIN_WIDTH);
|
||||
}
|
||||
|
|
167
src/cu.h
167
src/cu.h
|
@ -77,55 +77,6 @@ typedef enum {
|
|||
MTS_TR_NUM = 6,
|
||||
} mts_idx;
|
||||
|
||||
extern const uint8_t uvg_part_mode_num_parts[];
|
||||
extern const uint8_t uvg_part_mode_offsets[][4][2];
|
||||
extern const uint8_t uvg_part_mode_sizes[][4][2];
|
||||
|
||||
/**
|
||||
* \brief Get the x coordinate of a PU.
|
||||
*
|
||||
* \param part_mode partition mode of the containing CU
|
||||
* \param cu_width width of the containing CU
|
||||
* \param cu_x x coordinate of the containing CU
|
||||
* \param i number of the PU
|
||||
* \return location of the left edge of the PU
|
||||
*/
|
||||
#define PU_GET_X(part_mode, cu_width, cu_x, i) \
|
||||
((cu_x) + uvg_part_mode_offsets[(part_mode)][(i)][0] * (cu_width) / 4)
|
||||
|
||||
/**
|
||||
* \brief Get the y coordinate of a PU.
|
||||
*
|
||||
* \param part_mode partition mode of the containing CU
|
||||
* \param cu_width width of the containing CU
|
||||
* \param cu_y y coordinate of the containing CU
|
||||
* \param i number of the PU
|
||||
* \return location of the top edge of the PU
|
||||
*/
|
||||
#define PU_GET_Y(part_mode, cu_width, cu_y, i) \
|
||||
((cu_y) + uvg_part_mode_offsets[(part_mode)][(i)][1] * (cu_width) / 4)
|
||||
|
||||
/**
|
||||
* \brief Get the width of a PU.
|
||||
*
|
||||
* \param part_mode partition mode of the containing CU
|
||||
* \param cu_width width of the containing CU
|
||||
* \param i number of the PU
|
||||
* \return width of the PU
|
||||
*/
|
||||
#define PU_GET_W(part_mode, cu_width, i) \
|
||||
(uvg_part_mode_sizes[(part_mode)][(i)][0] * (cu_width) / 4)
|
||||
|
||||
/**
|
||||
* \brief Get the height of a PU.
|
||||
*
|
||||
* \param part_mode partition mode of the containing CU
|
||||
* \param cu_width width of the containing CU
|
||||
* \param i number of the PU
|
||||
* \return height of the PU
|
||||
*/
|
||||
#define PU_GET_H(part_mode, cu_width, i) \
|
||||
(uvg_part_mode_sizes[(part_mode)][(i)][1] * (cu_width) / 4)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// TYPES
|
||||
|
@ -142,24 +93,53 @@ enum uvg_tree_type {
|
|||
UVG_CHROMA_T = 2
|
||||
};
|
||||
|
||||
enum split_type {
|
||||
NO_SPLIT = 0,
|
||||
QT_SPLIT = 1,
|
||||
BT_HOR_SPLIT = 2,
|
||||
BT_VER_SPLIT = 3,
|
||||
TT_HOR_SPLIT = 4,
|
||||
TT_VER_SPLIT = 5,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t split_tree;
|
||||
uint8_t current_depth;
|
||||
uint8_t mtt_depth;
|
||||
uint8_t implicit_mtt_depth;
|
||||
uint8_t part_index;
|
||||
} split_tree_t;
|
||||
|
||||
|
||||
// Split for each depth takes three bits like xxy where if either x bit is set
|
||||
// it is a MTT split, and if there are any MTT split QT split is not allowed
|
||||
#define CAN_QT_SPLIT(x) (((x) & 6DB6DB6) == 0)
|
||||
|
||||
/**
|
||||
* \brief Struct for CU info
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint8_t type : 3; //!< \brief block type, one of cu_type_t values
|
||||
uint8_t depth : 3; //!< \brief depth / size of this block
|
||||
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
|
||||
uint8_t tr_depth : 3; //!< \brief transform depth
|
||||
uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped
|
||||
uint8_t merged : 1; //!< \brief flag to indicate this block is merged
|
||||
uint8_t merge_idx : 3; //!< \brief merge index
|
||||
uint8_t tr_skip : 3; //!< \brief transform skip flag
|
||||
uint8_t tr_idx : 3; //!< \brief transform index
|
||||
uint8_t joint_cb_cr : 3; //!< \brief joint chroma residual coding
|
||||
uint8_t joint_cb_cr : 2; //!< \brief joint chroma residual coding
|
||||
|
||||
uint8_t log2_width : 3;
|
||||
uint8_t log2_height : 3;
|
||||
|
||||
uint8_t log2_chroma_width : 3;
|
||||
uint8_t log2_chroma_height : 3;
|
||||
|
||||
uint16_t cbf;
|
||||
|
||||
uint8_t root_cbf;
|
||||
|
||||
uint32_t split_tree : 3 * 9;
|
||||
|
||||
/**
|
||||
* \brief QP used for the CU.
|
||||
*
|
||||
|
@ -172,12 +152,15 @@ typedef struct
|
|||
uint8_t violates_mts_coeff_constraint : 1;
|
||||
uint8_t mts_last_scan_pos : 1;
|
||||
|
||||
uint8_t violates_lfnst_constrained_luma : 1; // Two types, luma and chroma. Luma index is 0.
|
||||
uint8_t violates_lfnst_constrained_chroma : 1; // Two types, luma and chroma. Luma index is 0.
|
||||
uint8_t violates_lfnst_constrained_luma : 1;
|
||||
uint8_t violates_lfnst_constrained_chroma : 1;
|
||||
uint8_t lfnst_last_scan_pos : 1;
|
||||
uint8_t lfnst_idx : 2;
|
||||
uint8_t cr_lfnst_idx : 2;
|
||||
|
||||
uint8_t luma_deblocking : 2;
|
||||
uint8_t chroma_deblocking : 2;
|
||||
|
||||
union {
|
||||
struct {
|
||||
int8_t mode;
|
||||
|
@ -185,6 +168,9 @@ typedef struct
|
|||
uint8_t multi_ref_idx;
|
||||
int8_t mip_flag;
|
||||
int8_t mip_is_transposed;
|
||||
int8_t isp_mode;
|
||||
uint8_t isp_cbfs : 4;
|
||||
uint8_t isp_index : 2;
|
||||
} intra;
|
||||
struct {
|
||||
mv_t mv[2][2]; // \brief Motion vectors for L0 and L1
|
||||
|
@ -200,12 +186,25 @@ typedef struct
|
|||
typedef struct {
|
||||
int16_t x;
|
||||
int16_t y;
|
||||
uint8_t local_x;
|
||||
uint8_t local_y;
|
||||
int8_t width;
|
||||
int8_t height;
|
||||
int8_t chroma_width;
|
||||
int8_t chroma_height;
|
||||
} cu_loc_t;
|
||||
|
||||
void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height);
|
||||
typedef struct encoder_state_t encoder_state_t;
|
||||
|
||||
int uvg_get_split_locs(
|
||||
const cu_loc_t* const origin,
|
||||
enum split_type split,
|
||||
cu_loc_t out[4],
|
||||
uint8_t* separate_chroma);
|
||||
int uvg_get_possible_splits(const encoder_state_t* const state,
|
||||
const cu_loc_t* const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6]);
|
||||
|
||||
|
||||
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \
|
||||
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)
|
||||
|
@ -219,7 +218,7 @@ typedef struct {
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECKPOINT_CU(prefix_str, cu) CHECKPOINT(prefix_str " type=%d depth=%d part_size=%d tr_depth=%d coded=%d " \
|
||||
#define CHECKPOINT_CU(prefix_str, cu) CHECKPOINT(prefix_str " type=%d part_size=%d coded=%d " \
|
||||
"skipped=%d merged=%d merge_idx=%d cbf.y=%d cbf.u=%d cbf.v=%d " \
|
||||
"intra[0].cost=%u intra[0].bitcost=%u intra[0].mode=%d intra[0].mode_chroma=%d intra[0].tr_skip=%d " \
|
||||
"intra[1].cost=%u intra[1].bitcost=%u intra[1].mode=%d intra[1].mode_chroma=%d intra[1].tr_skip=%d " \
|
||||
|
@ -227,7 +226,7 @@ typedef struct {
|
|||
"intra[3].cost=%u intra[3].bitcost=%u intra[3].mode=%d intra[3].mode_chroma=%d intra[3].tr_skip=%d " \
|
||||
"inter.cost=%u inter.bitcost=%u inter.mv[0]=%d inter.mv[1]=%d inter.mvd[0]=%d inter.mvd[1]=%d " \
|
||||
"inter.mv_cand=%d inter.mv_ref=%d inter.mv_dir=%d inter.mode=%d" \
|
||||
, (cu).type, (cu).depth, (cu).part_size, (cu).tr_depth, (cu).coded, \
|
||||
, (cu).type, (cu).part_size, (cu).coded, \
|
||||
(cu).skipped, (cu).merged, (cu).merge_idx, (cu).cbf.y, (cu).cbf.u, (cu).cbf.v, \
|
||||
(cu).intra[0].cost, (cu).intra[0].bitcost, (cu).intra[0].mode, (cu).intra[0].mode_chroma, (cu).intra[0].tr_skip, \
|
||||
(cu).intra[1].cost, (cu).intra[1].bitcost, (cu).intra[1].mode, (cu).intra[1].mode_chroma, (cu).intra[1].tr_skip, \
|
||||
|
@ -246,6 +245,7 @@ typedef struct cu_array_t {
|
|||
} cu_array_t;
|
||||
|
||||
cu_info_t* uvg_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px);
|
||||
void uvg_get_isp_cu_arr_coords(int* x, int* y, int dim);
|
||||
const cu_info_t* uvg_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px);
|
||||
|
||||
cu_array_t * uvg_cu_array_alloc(const int width, const int height);
|
||||
|
@ -382,8 +382,9 @@ typedef struct {
|
|||
cu_info_t cu[LCU_T_CU_WIDTH * LCU_T_CU_WIDTH + 1];
|
||||
} lcu_t;
|
||||
|
||||
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type
|
||||
tree_type);
|
||||
void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src);
|
||||
|
||||
int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left);
|
||||
|
||||
/**
|
||||
* \brief Return pointer to the top right reference CU.
|
||||
|
@ -412,9 +413,11 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu
|
|||
*/
|
||||
static INLINE void copy_coeffs(const coeff_t *__restrict src,
|
||||
coeff_t *__restrict dest,
|
||||
size_t width)
|
||||
size_t width, size_t height, const int lcu_width)
|
||||
{
|
||||
memcpy(dest, src, width * width * sizeof(coeff_t));
|
||||
for (int j = 0; j < height; ++j) {
|
||||
memcpy(dest + j * lcu_width, src + j * lcu_width, width * sizeof(coeff_t));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -554,56 +557,52 @@ static INLINE unsigned xy_to_zorder(unsigned width, unsigned x, unsigned y)
|
|||
} while(0)
|
||||
|
||||
|
||||
#define NUM_CBF_DEPTHS 5
|
||||
static const uint16_t cbf_masks[NUM_CBF_DEPTHS] = { 0x1f, 0x0f, 0x07, 0x03, 0x1 };
|
||||
|
||||
/**
|
||||
* Check if CBF in a given level >= depth is true.
|
||||
*/
|
||||
static INLINE int cbf_is_set(uint16_t cbf, int depth, color_t plane)
|
||||
static INLINE int cbf_is_set(uint16_t cbf, color_t plane)
|
||||
{
|
||||
return (cbf & (cbf_masks[depth] << (NUM_CBF_DEPTHS * plane))) != 0;
|
||||
return (cbf & (1 << (plane))) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if CBF in a given level >= depth is true.
|
||||
*/
|
||||
static INLINE int cbf_is_set_any(uint16_t cbf, int depth)
|
||||
static INLINE int cbf_is_set_any(uint16_t cbf)
|
||||
{
|
||||
return cbf_is_set(cbf, depth, COLOR_Y) ||
|
||||
cbf_is_set(cbf, depth, COLOR_U) ||
|
||||
cbf_is_set(cbf, depth, COLOR_V);
|
||||
return cbf_is_set(cbf, COLOR_Y) ||
|
||||
cbf_is_set(cbf, COLOR_U) ||
|
||||
cbf_is_set(cbf, COLOR_V);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set CBF in a level to true.
|
||||
*/
|
||||
static INLINE void cbf_set(uint16_t *cbf, int depth, color_t plane)
|
||||
static INLINE void cbf_set(uint16_t *cbf, color_t plane)
|
||||
{
|
||||
// Return value of the bit corresponding to the level.
|
||||
*cbf |= (0x10 >> depth) << (NUM_CBF_DEPTHS * plane);
|
||||
*cbf |= (1) << (plane);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set CBF in a level to true if it is set at a lower level in any of
|
||||
* the child_cbfs.
|
||||
*/
|
||||
static INLINE void cbf_set_conditionally(uint16_t *cbf, uint16_t child_cbfs[3], int depth, color_t plane)
|
||||
static INLINE void cbf_set_conditionally(uint16_t *cbf, uint16_t child_cbfs[3], color_t plane)
|
||||
{
|
||||
bool child_cbf_set = cbf_is_set(child_cbfs[0], depth + 1, plane) ||
|
||||
cbf_is_set(child_cbfs[1], depth + 1, plane) ||
|
||||
cbf_is_set(child_cbfs[2], depth + 1, plane);
|
||||
bool child_cbf_set = cbf_is_set(child_cbfs[0], plane) ||
|
||||
cbf_is_set(child_cbfs[1], plane) ||
|
||||
cbf_is_set(child_cbfs[2], plane);
|
||||
if (child_cbf_set) {
|
||||
cbf_set(cbf, depth, plane);
|
||||
cbf_set(cbf, plane);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set CBF in a levels <= depth to false.
|
||||
*/
|
||||
static INLINE void cbf_clear(uint16_t *cbf, int depth, color_t plane)
|
||||
static INLINE void cbf_clear(uint16_t *cbf, color_t plane)
|
||||
{
|
||||
*cbf &= ~(cbf_masks[depth] << (NUM_CBF_DEPTHS * plane));
|
||||
*cbf &= ~(1 << (plane));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -611,11 +610,11 @@ static INLINE void cbf_clear(uint16_t *cbf, int depth, color_t plane)
|
|||
*/
|
||||
static INLINE void cbf_copy(uint16_t *cbf, uint16_t src, color_t plane)
|
||||
{
|
||||
cbf_clear(cbf, 0, plane);
|
||||
*cbf |= src & (cbf_masks[0] << (NUM_CBF_DEPTHS * plane));
|
||||
cbf_clear(cbf, plane);
|
||||
*cbf |= src & (1 << plane);
|
||||
}
|
||||
|
||||
#define GET_SPLITDATA(CU,curDepth) ((CU)->depth > curDepth)
|
||||
#define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
|
||||
#define GET_SPLITDATA(CU,curDepth) ((CU)->split_tree >> ((MAX((curDepth), 0) * 3)) & 7)
|
||||
#define PU_IS_TU(cu) ((cu)->log2_width <= TR_MAX_LOG2_SIZE && (cu)->log2_height <= TR_MAX_LOG2_SIZE)
|
||||
|
||||
#endif
|
||||
|
|
1139
src/dep_quant.c
Normal file
1139
src/dep_quant.c
Normal file
File diff suppressed because it is too large
Load diff
247
src/dep_quant.h
Normal file
247
src/dep_quant.h
Normal file
|
@ -0,0 +1,247 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef DEP_QUANT_H_
|
||||
#define DEP_QUANT_H_
|
||||
|
||||
#include "cu.h"
|
||||
#include "global.h"
|
||||
|
||||
#define SM_NUM_CTX_SETS_SIG 3
|
||||
#define SM_NUM_CTX_SETS_GTX 2
|
||||
#define SM_MAX_NUM_SIG_SBB_CTX 2
|
||||
#define SM_MAX_NUM_SIG_CTX 12
|
||||
#define SM_MAX_NUM_GTX_CTX 21
|
||||
#define SCALE_BITS 15
|
||||
#define RICEMAX 32
|
||||
|
||||
typedef struct encoder_control_t encoder_control_t;
|
||||
|
||||
enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 };
|
||||
|
||||
struct dep_quant_scan_info
|
||||
{
|
||||
uint8_t sig_ctx_offset[2];
|
||||
uint8_t gtx_ctx_offset[2];
|
||||
uint16_t cg_pos;
|
||||
uint16_t pos_y;
|
||||
uint16_t pos_x;
|
||||
uint8_t next_sbb_right;
|
||||
uint8_t next_sbb_below;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int m_QShift;
|
||||
int64_t m_QAdd;
|
||||
int64_t m_QScale;
|
||||
int64_t m_maxQIdx;
|
||||
int64_t m_thresLast;
|
||||
int64_t m_thresSSbb;
|
||||
// distortion normalization
|
||||
int m_DistShift;
|
||||
int64_t m_DistAdd;
|
||||
int64_t m_DistStepAdd;
|
||||
int64_t m_DistOrgFact;
|
||||
bool needs_init;
|
||||
} quant_block;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32_t m_lastBitsX[TR_MAX_WIDTH];
|
||||
int32_t m_lastBitsY[TR_MAX_WIDTH];
|
||||
uint32_t m_sigSbbFracBits[SM_MAX_NUM_SIG_SBB_CTX][2];
|
||||
uint32_t m_sigFracBits[SM_NUM_CTX_SETS_SIG][SM_MAX_NUM_SIG_CTX][2];
|
||||
int32_t m_gtxFracBits[SM_MAX_NUM_GTX_CTX][6];
|
||||
bool needs_init;
|
||||
} rate_estimator_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t num;
|
||||
uint8_t inPos[5];
|
||||
} NbInfoSbb;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint16_t maxDist;
|
||||
uint16_t num;
|
||||
uint16_t outPos[5];
|
||||
} NbInfoOut;
|
||||
|
||||
typedef struct {
|
||||
int32_t absLevel[4];
|
||||
int64_t deltaDist[4];
|
||||
} PQData;
|
||||
|
||||
typedef struct {
|
||||
int64_t ALIGNED(32) rdCost[8];
|
||||
int32_t ALIGNED(32) absLevel[8];
|
||||
int32_t ALIGNED(32) prevId[8];
|
||||
} Decision;
|
||||
|
||||
|
||||
typedef struct {
|
||||
uint8_t* sbbFlags;
|
||||
uint8_t* levels;
|
||||
} SbbCtx;
|
||||
|
||||
typedef struct {
|
||||
const NbInfoOut* m_nbInfo;
|
||||
uint32_t m_sbbFlagBits[2][2];
|
||||
SbbCtx m_allSbbCtx[2];
|
||||
int m_curr_sbb_ctx_offset;
|
||||
int m_prev_sbb_ctx_offset;
|
||||
uint8_t sbb_memory[8 * 1024];
|
||||
uint8_t level_memory[8 * TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
int num_coeff;
|
||||
} common_context;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int64_t m_rdCost;
|
||||
uint16_t m_absLevelsAndCtxInit[24]; // 16x8bit for abs levels + 16x16bit for ctx init id
|
||||
int8_t m_numSigSbb;
|
||||
int m_remRegBins;
|
||||
int8_t m_refSbbCtxId;
|
||||
uint32_t m_sbbFracBits[2];
|
||||
uint32_t m_sigFracBits[2];
|
||||
int32_t m_coeffFracBits[6];
|
||||
int8_t m_goRicePar;
|
||||
int8_t m_goRiceZero;
|
||||
int8_t m_stateId;
|
||||
uint32_t* m_sigFracBitsArray[12];
|
||||
int32_t* m_gtxFracBitsArray[21];
|
||||
common_context* m_commonCtx;
|
||||
|
||||
unsigned effWidth;
|
||||
unsigned effHeight;
|
||||
} depquant_state;
|
||||
typedef struct {
|
||||
int64_t ALIGNED(32) m_rdCost[12];
|
||||
uint8_t ALIGNED(32) m_absLevels[3][16 * 4];
|
||||
uint16_t ALIGNED(32) m_ctxInit[3][16 * 4];
|
||||
int8_t ALIGNED(16) m_numSigSbb[12];
|
||||
int ALIGNED(32) m_remRegBins[12];
|
||||
int8_t ALIGNED(16) m_refSbbCtxId[12];
|
||||
uint32_t ALIGNED(32) m_sbbFracBits[12][2];
|
||||
uint32_t ALIGNED(32) m_sigFracBits[12][2];
|
||||
int32_t ALIGNED(32) m_coeffFracBits[12][6];
|
||||
int8_t ALIGNED(16) m_goRicePar[12];
|
||||
int8_t ALIGNED(16) m_goRiceZero[12];
|
||||
int8_t ALIGNED(16) m_stateId[12];
|
||||
uint32_t ALIGNED(32) m_sigFracBitsArray[12][12][2];
|
||||
int32_t ALIGNED(32) m_gtxFracBitsArray[21][6];
|
||||
common_context* m_commonCtx;
|
||||
|
||||
unsigned effWidth;
|
||||
unsigned effHeight;
|
||||
|
||||
bool all_gte_four;
|
||||
bool all_lt_four;
|
||||
} all_depquant_states;
|
||||
|
||||
typedef struct {
|
||||
common_context m_common_context;
|
||||
all_depquant_states m_allStates;
|
||||
int m_curr_state_offset;
|
||||
int m_prev_state_offset;
|
||||
int m_skip_state_offset;
|
||||
depquant_state m_startState;
|
||||
quant_block* m_quant;
|
||||
Decision m_trellis[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
} context_store;
|
||||
|
||||
|
||||
int uvg_init_nb_info(encoder_control_t* encoder);
|
||||
void uvg_dealloc_nb_info(encoder_control_t* encoder);
|
||||
|
||||
|
||||
void uvg_dep_quant_dequant(
|
||||
const encoder_state_t* const state,
|
||||
const int block_type,
|
||||
const int width,
|
||||
const int height,
|
||||
const color_t compID,
|
||||
coeff_t* quant_coeff,
|
||||
coeff_t* coeff,
|
||||
bool enableScalingLists);
|
||||
|
||||
int uvg_dep_quant(
|
||||
const encoder_state_t* const state,
|
||||
const cu_info_t* const cur_tu,
|
||||
const int width,
|
||||
const int height,
|
||||
const coeff_t* srcCoeff,
|
||||
coeff_t* coeff_out,
|
||||
const color_t compID,
|
||||
enum uvg_tree_type tree_type,
|
||||
int* absSum,
|
||||
const bool enableScalingLists);
|
||||
|
||||
|
||||
void uvg_dep_quant_update_state(
|
||||
context_store* ctxs,
|
||||
int numIPos,
|
||||
const uint32_t scan_pos,
|
||||
const Decision* decisions,
|
||||
const uint32_t sigCtxOffsetNext,
|
||||
const uint32_t gtxCtxOffsetNext,
|
||||
const NbInfoSbb next_nb_info_ssb,
|
||||
const int baseLevel,
|
||||
const bool extRiceFlag,
|
||||
int decision_id);
|
||||
|
||||
|
||||
void uvg_dep_quant_update_state_eos(
|
||||
context_store* ctxs,
|
||||
const uint32_t scan_pos,
|
||||
const uint32_t cg_pos,
|
||||
const uint32_t sigCtxOffsetNext,
|
||||
const uint32_t gtxCtxOffsetNext,
|
||||
const uint32_t width_in_sbb,
|
||||
const uint32_t height_in_sbb,
|
||||
const uint32_t next_sbb_right,
|
||||
const uint32_t next_sbb_below,
|
||||
const Decision* decisions,
|
||||
int decision_id);
|
||||
|
||||
void uvg_dep_quant_check_rd_costs(
|
||||
const all_depquant_states* const state,
|
||||
const enum ScanPosType spt,
|
||||
const PQData* pqDataA,
|
||||
Decision* decisions,
|
||||
const int decisionA,
|
||||
const int decisionB,
|
||||
const int state_offset);
|
||||
#endif
|
File diff suppressed because it is too large
Load diff
|
@ -40,30 +40,29 @@
|
|||
#include "encoderstate.h"
|
||||
#include "global.h"
|
||||
|
||||
bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu);
|
||||
bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu, const cu_loc_t*
|
||||
const cu_loc);
|
||||
bool uvg_is_lfnst_allowed(
|
||||
const encoder_state_t* const state,
|
||||
const cu_info_t* const pred_cu,
|
||||
const int width,
|
||||
const int height,
|
||||
const int x,
|
||||
const int y,
|
||||
enum uvg_tree_type tree_type,
|
||||
const color_t color,
|
||||
const lcu_t* lcu);
|
||||
const cu_loc_t* const cu_loc, const lcu_t* const lcu);
|
||||
|
||||
void uvg_encode_coding_tree(
|
||||
encoder_state_t * const state,
|
||||
uint16_t x_ctb,
|
||||
uint16_t y_ctb,
|
||||
uint8_t depth,
|
||||
lcu_coeff_t *coeff,
|
||||
enum uvg_tree_type tree_type);
|
||||
enum uvg_tree_type tree_type,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_loc_t* const chroma_loc,
|
||||
split_tree_t split_tree,
|
||||
bool has_chroma);
|
||||
|
||||
void uvg_encode_ts_residual(encoder_state_t* const state,
|
||||
cabac_data_t* const cabac,
|
||||
const coeff_t* coeff,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint8_t type,
|
||||
int8_t scan_mode,
|
||||
double* bits);
|
||||
|
@ -77,41 +76,47 @@ void uvg_encode_mvd(encoder_state_t * const state,
|
|||
double uvg_mock_encode_coding_unit(
|
||||
encoder_state_t* const state,
|
||||
cabac_data_t* cabac,
|
||||
int x,
|
||||
int y,
|
||||
int depth,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_loc_t* const chroma_loc,
|
||||
lcu_t* lcu,
|
||||
cu_info_t* cur_cu,
|
||||
enum uvg_tree_type tree_type);
|
||||
enum uvg_tree_type tree_type,
|
||||
const split_tree_t split_tree);
|
||||
|
||||
int uvg_encode_inter_prediction_unit(encoder_state_t* const state,
|
||||
int uvg_encode_inter_prediction_unit(
|
||||
encoder_state_t* const state,
|
||||
cabac_data_t* const cabac,
|
||||
const cu_info_t* const cur_cu,
|
||||
int x, int y, int width, int height,
|
||||
int depth,
|
||||
lcu_t* lcu,
|
||||
double* bits_out,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
void uvg_encode_intra_luma_coding_unit(
|
||||
const encoder_state_t* const state,
|
||||
cabac_data_t* const cabac,
|
||||
const cu_info_t* const cur_cu,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const lcu_t* lcu,
|
||||
double* bits_out);
|
||||
|
||||
void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state,
|
||||
cabac_data_t* const cabac,
|
||||
const cu_info_t* const cur_cu,
|
||||
int x, int y, int depth, const lcu_t* lcu, double* bits_out);
|
||||
|
||||
|
||||
bool uvg_write_split_flag(
|
||||
uint8_t uvg_write_split_flag(
|
||||
const encoder_state_t* const state,
|
||||
cabac_data_t* cabac,
|
||||
const cu_info_t* left_cu,
|
||||
const cu_info_t* above_cu,
|
||||
uint8_t split_flag,
|
||||
int depth,
|
||||
int cu_width,
|
||||
int x,
|
||||
int y,
|
||||
const cu_loc_t* const cu_loc,
|
||||
split_tree_t,
|
||||
enum uvg_tree_type tree_type,
|
||||
bool* is_implicit_out,
|
||||
double* bits_out);
|
||||
|
||||
void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
|
||||
uint8_t lastpos_x, uint8_t lastpos_y,
|
||||
uint8_t width, uint8_t height,
|
||||
uint8_t type, uint8_t scan, double* bits_out);
|
||||
|
||||
void uvg_get_sub_coeff(const coeff_t* dst, const coeff_t* const src,
|
||||
const int lcu_x, const int lcu_y,
|
||||
const int block_w, const int block_h,
|
||||
const int lcu_width);
|
||||
|
|
|
@ -320,6 +320,13 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg)
|
|||
encoder->scaling_list.use_default_list = 1;
|
||||
}
|
||||
|
||||
if(cfg->dep_quant) {
|
||||
if(!uvg_init_nb_info(encoder)) {
|
||||
fprintf(stderr, "Could not initialize nb info.\n");
|
||||
goto init_failed;
|
||||
}
|
||||
}
|
||||
|
||||
// ROI / delta QP
|
||||
if (cfg->roi.file_path) {
|
||||
const char *mode[2] = { "r", "rb" };
|
||||
|
@ -379,10 +386,6 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg)
|
|||
goto init_failed;
|
||||
}
|
||||
|
||||
// NOTE: When tr_depth_inter is equal to 0, the transform is still split
|
||||
// for SMP and AMP partition units.
|
||||
encoder->tr_depth_inter = 0;
|
||||
|
||||
//Tiles
|
||||
encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 ||
|
||||
encoder->cfg.tiles_height_count > 1;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
* Initialization of encoder_control_t.
|
||||
*/
|
||||
|
||||
#include "dep_quant.h"
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "uvg266.h"
|
||||
#include "scalinglist.h"
|
||||
|
@ -98,6 +99,10 @@ typedef struct encoder_control_t
|
|||
//scaling list
|
||||
scaling_list_t scaling_list;
|
||||
|
||||
NbInfoSbb* m_scanId2NbInfoSbbArray[7 + 1][7 + 1];
|
||||
NbInfoOut* m_scanId2NbInfoOutArray[7 + 1][7 + 1];
|
||||
struct dep_quant_scan_info* scan_info[7 + 1][7 + 1];
|
||||
|
||||
//spec: references to variables defined in Rec. ITU-T H.265 (04/2013)
|
||||
int8_t tiles_enable; /*!<spec: tiles_enabled */
|
||||
|
||||
|
@ -132,8 +137,6 @@ typedef struct encoder_control_t
|
|||
|
||||
FILE *roi_file;
|
||||
|
||||
int tr_depth_inter;
|
||||
|
||||
//! pic_parameter_set
|
||||
struct {
|
||||
uint8_t dependent_slice_segments_enabled_flag;
|
||||
|
|
|
@ -528,48 +528,31 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
|||
WRITE_UE(stream, MIN_SIZE-2, "log2_min_luma_coding_block_size_minus2"); // Min size 2^3 = 8x8
|
||||
// if(!no_partition_constraints_override_constraint_flag)
|
||||
WRITE_U(stream, 0, 1, "partition_constraints_override_enabled_flag");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_luma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_luma");
|
||||
WRITE_UE(stream, encoder->cfg.max_btt_depth[0], "sps_max_mtt_hierarchy_depth_intra_slice_luma");
|
||||
if (encoder->cfg.max_btt_depth[0]) {
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]], "sps_log2_diff_max_bt_min_qt_intra_slice_luma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[0]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[0]], "sps_log2_diff_max_tt_min_qt_intra_slice_luma");
|
||||
}
|
||||
|
||||
if (encoder->chroma_format != UVG_CSP_400)
|
||||
{
|
||||
WRITE_U(stream, encoder->cfg.dual_tree, 1, "qtbtt_dual_tree_intra_flag");
|
||||
}
|
||||
if (encoder->cfg.dual_tree) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
|
||||
if (0 /*sps_max_mtt_hierarchy_depth_intra_slice_chroma != 0*/) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_intra_slice_chroma");
|
||||
WRITE_UE(stream, encoder->cfg.max_btt_depth[2], "sps_max_mtt_hierarchy_depth_intra_slice_chroma");
|
||||
if (encoder->cfg.max_btt_depth[2]) {
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]], "sps_log2_diff_max_bt_min_qt_intra_slice_chroma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[2]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[2]], "sps_log2_diff_max_tt_min_qt_intra_slice_chroma");
|
||||
}
|
||||
}
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_inter_slice");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_inter_slice");
|
||||
|
||||
|
||||
#if 0 // mtt depth intra
|
||||
if (max_mtt_depth_intra != 0) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_luma");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_luma");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]] - MIN_SIZE, "sps_log2_diff_min_qt_min_cb_inter_slice");
|
||||
WRITE_UE(stream, encoder->cfg.max_btt_depth[1], "sps_max_mtt_hierarchy_depth_inter_slice");
|
||||
if (encoder->cfg.max_btt_depth[1] != 0) {
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_bt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]], "sps_log2_diff_max_bt_min_qt_inter_tile_group");
|
||||
WRITE_UE(stream, uvg_g_convert_to_log2[encoder->cfg.max_tt_size[1]] - uvg_g_convert_to_log2[encoder->cfg.min_qt_size[1]], "sps_log2_diff_max_tt_min_qt_inter_tile_group");
|
||||
}
|
||||
#endif
|
||||
#if 0 // mtt depth inter
|
||||
if (max_mtt_depth_inter != 0) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_inter_tile_group");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_inter_tile_group");
|
||||
}
|
||||
#endif
|
||||
#if 0 // Dual Tree
|
||||
if (encoder->cfg.dual_i_tree) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_min_qt_min_cb_intra_tile_group_chroma");
|
||||
WRITE_UE(stream, 0, "sps_max_mtt_hierarchy_depth_intra_tile_group_chroma");
|
||||
|
||||
if (max_mtt_depth_intra != 0) {
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_bt_min_qt_intra_tile_group_chroma");
|
||||
WRITE_UE(stream, 0, "sps_log2_diff_max_tt_min_qt_intra_tile_group_chroma");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (LCU_WIDTH > 32)
|
||||
WRITE_U(stream, (TR_MAX_LOG2_SIZE - 5) ? 1 : 0, 1, "sps_max_luma_transform_size_64_flag");
|
||||
|
@ -665,7 +648,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
|||
|
||||
WRITE_UE(stream, encoder->cfg.log2_parallel_merge_level-2, "log2_parallel_merge_level_minus2");
|
||||
|
||||
WRITE_U(stream, 0, 1, "sps_isp_enabled_flag");
|
||||
WRITE_U(stream, encoder->cfg.isp, 1, "sps_isp_enabled_flag");
|
||||
|
||||
if (state->encoder_control->cfg.mrl) {
|
||||
WRITE_U(stream, 1, 1, "sps_mrl_enabled_flag");
|
||||
|
@ -706,7 +689,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
|||
|
||||
WRITE_U(stream, 0, 1, "scaling_list_enabled_flag");
|
||||
|
||||
WRITE_U(stream, 0, 1, "pic_dep_quant_enabled_flag");
|
||||
WRITE_U(stream, encoder->cfg.dep_quant, 1, "pic_dep_quant_enabled_flag");
|
||||
|
||||
WRITE_U(stream, encoder->cfg.signhide_enable, 1, "pic_sign_data_hiding_enabled_flag");
|
||||
|
||||
|
@ -1142,7 +1125,7 @@ static void uvg_encoder_state_write_bitstream_picture_header(
|
|||
WRITE_U(stream, 0, 1, "ph_mvd_l1_zero_flag");
|
||||
}
|
||||
|
||||
if (encoder->cfg.jccr) {
|
||||
if (encoder->cfg.jccr && encoder->chroma_format != UVG_CSP_400) {
|
||||
WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag");
|
||||
}
|
||||
// END PICTURE HEADER
|
||||
|
@ -1375,11 +1358,14 @@ void uvg_encoder_state_write_bitstream_slice_header(
|
|||
}
|
||||
|
||||
// ToDo: depquant
|
||||
if (encoder->cfg.dep_quant) {
|
||||
WRITE_U(stream, 1, 1, "sh_dep_quant_used_flag");
|
||||
}
|
||||
|
||||
if (state->encoder_control->cfg.signhide_enable) {
|
||||
if (state->encoder_control->cfg.signhide_enable && !encoder->cfg.dep_quant) {
|
||||
WRITE_U(stream, 1, 1, "sh_sign_data_hiding_used_flag");
|
||||
}
|
||||
if (state->encoder_control->cfg.trskip_enable && !state->encoder_control->cfg.signhide_enable /* && !cfg.dep_quant*/)
|
||||
if (state->encoder_control->cfg.trskip_enable && !state->encoder_control->cfg.signhide_enable && !encoder->cfg.dep_quant)
|
||||
{
|
||||
// TODO: find out what this is actually about and parametrize it
|
||||
WRITE_U(stream, 0, 1, "sh_ts_residual_coding_disabled_flag");
|
||||
|
|
|
@ -627,43 +627,52 @@ static void encode_sao(encoder_state_t * const state,
|
|||
* \param prev_qp -1 if QP delta has not been coded in current QG,
|
||||
* otherwise the QP of the current QG
|
||||
*/
|
||||
static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp)
|
||||
static void set_cu_qps(encoder_state_t *state, const cu_loc_t* const cu_loc, int *last_qp, int *prev_qp, const
|
||||
int depth)
|
||||
{
|
||||
|
||||
// Stop recursion if the CU is completely outside the frame.
|
||||
if (x >= state->tile->frame->width || y >= state->tile->frame->height) return;
|
||||
if (cu_loc->x >= state->tile->frame->width || cu_loc->y >= state->tile->frame->height) return;
|
||||
|
||||
cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, x, y);
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, cu_loc->x, cu_loc->y);
|
||||
const int width = 1 << cu->log2_width;
|
||||
|
||||
if (depth <= state->frame->max_qp_delta_depth) {
|
||||
*prev_qp = -1;
|
||||
}
|
||||
|
||||
if (cu->depth > depth) {
|
||||
if (cu_loc->width > width) {
|
||||
// Recursively process sub-CUs.
|
||||
const int d = cu_width >> 1;
|
||||
set_cu_qps(state, x, y, depth + 1, last_qp, prev_qp);
|
||||
set_cu_qps(state, x + d, y, depth + 1, last_qp, prev_qp);
|
||||
set_cu_qps(state, x, y + d, depth + 1, last_qp, prev_qp);
|
||||
set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp);
|
||||
const int half_width = cu_loc->width >> 1;
|
||||
const int half_height = cu_loc->height >> 1;
|
||||
cu_loc_t split_cu_loc;
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
|
||||
} else {
|
||||
bool cbf_found = *prev_qp >= 0;
|
||||
|
||||
if (cu->tr_depth > depth) {
|
||||
int y_limit = cu_loc->y + cu_loc->height;
|
||||
int x_limit = cu_loc->x + cu_loc->width;
|
||||
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
|
||||
// The CU is split into smaller transform units. Check whether coded
|
||||
// block flag is set for any of the TUs.
|
||||
const int tu_width = LCU_WIDTH >> cu->tr_depth;
|
||||
for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) {
|
||||
for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) {
|
||||
const int tu_width = MIN(TR_MAX_WIDTH, 1 << cu->log2_width);
|
||||
for (int y_scu = cu_loc->y; !cbf_found && y_scu < y_limit; y_scu += tu_width) {
|
||||
for (int x_scu = cu_loc->x; !cbf_found && x_scu < x_limit; x_scu += tu_width) {
|
||||
cu_info_t *tu = uvg_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu);
|
||||
if (cbf_is_set_any(tu->cbf, cu->depth)) {
|
||||
if (cbf_is_set_any(tu->cbf)) {
|
||||
cbf_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (cbf_is_set_any(cu->cbf, cu->depth)) {
|
||||
} else if (cbf_is_set_any(cu->cbf)) {
|
||||
cbf_found = true;
|
||||
}
|
||||
|
||||
|
@ -671,18 +680,18 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
|
|||
if (cbf_found) {
|
||||
*prev_qp = qp = cu->qp;
|
||||
} else {
|
||||
qp = uvg_get_cu_ref_qp(state, x, y, *last_qp);
|
||||
qp = uvg_get_cu_ref_qp(state, cu_loc->x, cu_loc->y, *last_qp);
|
||||
}
|
||||
|
||||
// Set the correct QP for all state->tile->frame->cu_array elements in
|
||||
// the area covered by the CU.
|
||||
for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) {
|
||||
for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) {
|
||||
for (int y_scu = cu_loc->y; y_scu < y_limit; y_scu += SCU_WIDTH) {
|
||||
for (int x_scu = cu_loc->x; x_scu < x_limit; x_scu += SCU_WIDTH) {
|
||||
uvg_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_last_cu_in_qg(state, x, y, depth)) {
|
||||
if (is_last_cu_in_qg(state, cu_loc)) {
|
||||
*last_qp = cu->qp;
|
||||
}
|
||||
}
|
||||
|
@ -812,7 +821,9 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
|||
if (state->frame->max_qp_delta_depth >= 0) {
|
||||
int last_qp = state->last_qp;
|
||||
int prev_qp = -1;
|
||||
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
|
||||
cu_loc_t cu_loc;
|
||||
uvg_cu_loc_ctor(&cu_loc, lcu->position_px.x, lcu->position_px.y, LCU_WIDTH, LCU_WIDTH);
|
||||
set_cu_qps(state, &cu_loc, &last_qp, &prev_qp, 0);
|
||||
}
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.sliceReshaperEnableFlag) {
|
||||
|
@ -870,10 +881,16 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque)
|
|||
|
||||
enum uvg_tree_type tree_type = state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
|
||||
//Encode coding tree
|
||||
uvg_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0, lcu->coeff, tree_type);
|
||||
cu_loc_t start;
|
||||
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
|
||||
split_tree_t split_tree = { 0, 0, 0, 0, 0 };
|
||||
|
||||
uvg_encode_coding_tree(state, lcu->coeff, tree_type, &start, &start, split_tree, true);
|
||||
|
||||
if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
uvg_encode_coding_tree(state, lcu->position.x * LCU_WIDTH_C, lcu->position.y * LCU_WIDTH_C, 0, lcu->coeff, UVG_CHROMA_T);
|
||||
uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
|
||||
cu_loc_t chroma_tree_loc = start;
|
||||
uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, &chroma_tree_loc, split_tree, true);
|
||||
}
|
||||
|
||||
if (!state->cabac.only_count) {
|
||||
|
@ -1152,6 +1169,12 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
|
|||
uvg_threadqueue_submit(state->encoder_control->threadqueue, job[0]);
|
||||
|
||||
uvg_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[lcu->id]);
|
||||
#ifdef UVG_DEBUG_PRINT_CABAC
|
||||
// Ensures that the ctus are encoded in raster scan order
|
||||
if(i >= state->tile->frame->width_in_lcu) {
|
||||
uvg_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[(lcu->id / state->tile->frame->width_in_lcu - 1) * state->tile->frame->width_in_lcu]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
uvg_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]);
|
||||
|
@ -1281,13 +1304,13 @@ static void encoder_state_encode(encoder_state_t * const main_state) {
|
|||
sub_state->tile->frame->width_in_lcu * LCU_WIDTH,
|
||||
sub_state->tile->frame->height_in_lcu * LCU_WIDTH
|
||||
);
|
||||
if(main_state->encoder_control->cfg.dual_tree){
|
||||
if(main_state->encoder_control->cfg.dual_tree && main_state->frame->is_irap){
|
||||
sub_state->tile->frame->chroma_cu_array = uvg_cu_subarray(
|
||||
main_state->tile->frame->chroma_cu_array,
|
||||
offset_x / 2,
|
||||
offset_y / 2,
|
||||
sub_state->tile->frame->width_in_lcu * LCU_WIDTH_C,
|
||||
sub_state->tile->frame->height_in_lcu * LCU_WIDTH_C
|
||||
offset_x,
|
||||
offset_y,
|
||||
sub_state->tile->frame->width_in_lcu * LCU_WIDTH,
|
||||
sub_state->tile->frame->height_in_lcu * LCU_WIDTH
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -1926,10 +1949,9 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict
|
|||
|
||||
if (cfg->dual_tree && state->encoder_control->chroma_format != UVG_CSP_400 && state->frame->is_irap) {
|
||||
assert(state->tile->frame->chroma_cu_array == NULL);
|
||||
state->tile->frame->chroma_cu_array = uvg_cu_array_chroma_alloc(
|
||||
state->tile->frame->width / 2,
|
||||
state->tile->frame->height / 2,
|
||||
state->encoder_control->chroma_format
|
||||
state->tile->frame->chroma_cu_array = uvg_cu_array_alloc(
|
||||
state->tile->frame->width,
|
||||
state->tile->frame->height
|
||||
);
|
||||
}
|
||||
// Set pictype.
|
||||
|
@ -2029,9 +2051,9 @@ static void _encode_one_frame_add_bitstream_deps(const encoder_state_t * const s
|
|||
void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame)
|
||||
{
|
||||
#if UVG_DEBUG_PRINT_CABAC == 1
|
||||
uvg_cabac_bins_count = 0;
|
||||
// uvg_cabac_bins_count = 0;
|
||||
if (state->frame->num == 0) uvg_cabac_bins_verbose = true;
|
||||
else uvg_cabac_bins_verbose = false;
|
||||
// else uvg_cabac_bins_verbose = false;
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -2193,11 +2215,12 @@ int uvg_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
|
|||
{
|
||||
const cu_array_t *cua = state->tile->frame->cu_array;
|
||||
// Quantization group width
|
||||
const int qg_width = LCU_WIDTH >> MIN(state->frame->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->depth);
|
||||
const int qg_width = 1 << MAX(6 - state->frame->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->log2_width);
|
||||
const int qg_height = 1 << MAX(6 - state->frame->max_qp_delta_depth, uvg_cu_array_at_const(cua, x, y)->log2_height);
|
||||
|
||||
// Coordinates of the top-left corner of the quantization group
|
||||
const int x_qg = x & ~(qg_width - 1);
|
||||
const int y_qg = y & ~(qg_width - 1);
|
||||
const int y_qg = y & ~(qg_height - 1);
|
||||
if(x_qg == 0 && y_qg > 0 && y_qg % LCU_WIDTH == 0) {
|
||||
return uvg_cu_array_at_const(cua, x_qg, y_qg - 1)->qp;
|
||||
}
|
||||
|
|
|
@ -332,6 +332,7 @@ typedef struct encoder_state_t {
|
|||
int8_t qp;
|
||||
|
||||
double c_lambda;
|
||||
double chroma_weights[4];
|
||||
|
||||
/**
|
||||
* \brief Whether a QP delta value must be coded for the current LCU.
|
||||
|
@ -359,7 +360,15 @@ typedef struct encoder_state_t {
|
|||
//Constraint structure
|
||||
void * constraint;
|
||||
|
||||
// Since lfnst needs the collocated luma intra mode for
|
||||
// dual tree if the chroma mode is cclm mode and getting all of
|
||||
// the information that would be necessary to get the collocated
|
||||
// luma mode in the lfnst functions, instead store the current
|
||||
// collocated luma mode in the state.
|
||||
int8_t collocated_luma_mode;
|
||||
|
||||
quant_block quant_blocks[3]; // luma, ISP, chroma
|
||||
rate_estimator_t rate_estimator[4]; // luma, cb, cr, isp
|
||||
} encoder_state_t;
|
||||
|
||||
void uvg_encode_one_frame(encoder_state_t * const state, uvg_picture* frame);
|
||||
|
@ -401,14 +410,13 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
|
|||
* \param depth depth in the CU tree
|
||||
* \return true, if it's the last CU in its QG, otherwise false
|
||||
*/
|
||||
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth)
|
||||
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, const cu_loc_t* const cu_loc)
|
||||
{
|
||||
if (state->frame->max_qp_delta_depth < 0) return false;
|
||||
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth;
|
||||
const int right = x + cu_width;
|
||||
const int bottom = y + cu_width;
|
||||
const int right = cu_loc->x + cu_loc->width;
|
||||
const int bottom = cu_loc->y + cu_loc->height;
|
||||
return (right % qg_width == 0 || right >= state->tile->frame->width) &&
|
||||
(bottom % qg_width == 0 || bottom >= state->tile->frame->height);
|
||||
}
|
||||
|
|
174
src/filter.c
174
src/filter.c
|
@ -36,6 +36,7 @@
|
|||
|
||||
#include "cu.h"
|
||||
#include "encoder.h"
|
||||
#include "intra.h"
|
||||
#include "uvg266.h"
|
||||
#include "transform.h"
|
||||
#include "videoframe.h"
|
||||
|
@ -269,19 +270,19 @@ static bool is_tu_boundary(
|
|||
int32_t x,
|
||||
int32_t y,
|
||||
edge_dir dir,
|
||||
color_t color,
|
||||
enum uvg_tree_type tree_type)
|
||||
{
|
||||
x >>= tree_type == UVG_CHROMA_T;
|
||||
y >>= tree_type == UVG_CHROMA_T;
|
||||
// if (x & 3 || y & 3) return false;
|
||||
const cu_info_t *const scu =
|
||||
uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y);
|
||||
const int tu_width = LCU_WIDTH >> (scu->tr_depth + (tree_type == UVG_CHROMA_T));
|
||||
|
||||
if (dir == EDGE_HOR) {
|
||||
return (y & (tu_width - 1)) == 0;
|
||||
return color == COLOR_Y ? scu->luma_deblocking & EDGE_HOR :
|
||||
scu->chroma_deblocking & EDGE_HOR;
|
||||
} else {
|
||||
return (x & (tu_width - 1)) == 0;
|
||||
return color == COLOR_Y ? scu->luma_deblocking & EDGE_VER :
|
||||
scu->chroma_deblocking & EDGE_VER;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -306,32 +307,6 @@ static bool is_pu_boundary(const encoder_state_t *const state,
|
|||
it for now, in case some other tool requires it.
|
||||
*/
|
||||
return false;
|
||||
//const cu_info_t *const scu =
|
||||
// uvg_cu_array_at_const(state->tile->frame->cu_array, x, y);
|
||||
//// Get the containing CU.
|
||||
//const int32_t cu_width = LCU_WIDTH >> scu->depth;
|
||||
//const int32_t x_cu = x & ~(cu_width - 1);
|
||||
//const int32_t y_cu = y & ~(cu_width - 1);
|
||||
//const cu_info_t *const cu =
|
||||
// uvg_cu_array_at_const(state->tile->frame->cu_array, x_cu, y_cu);
|
||||
|
||||
//const int num_pu = uvg_part_mode_num_parts[cu->part_size];
|
||||
//for (int i = 0; i < num_pu; i++) {
|
||||
// if (dir == EDGE_HOR) {
|
||||
// int y_pu = PU_GET_Y(cu->part_size, cu_width, y_cu, i);
|
||||
// if (y_pu == y) {
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// } else {
|
||||
// int x_pu = PU_GET_X(cu->part_size, cu_width, x_cu, i);
|
||||
// if (x_pu == x) {
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
//return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -346,9 +321,9 @@ static bool is_pu_boundary(const encoder_state_t *const state,
|
|||
static bool is_on_8x8_grid(int x, int y, edge_dir dir)
|
||||
{
|
||||
if (dir == EDGE_HOR) {
|
||||
return (y & 7) == 0 && (x & 2) == 0;
|
||||
return (y & 7) == 0;
|
||||
} else {
|
||||
return (x & 7) == 0 && (y & 2) == 0;
|
||||
return (x & 7) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -628,10 +603,10 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_
|
|||
bool transform_edge_4x4[2] = { false, false };
|
||||
bool transform_edge_8x8[2] = { false, false };
|
||||
|
||||
if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, tree_type);
|
||||
if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, tree_type);
|
||||
if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, tree_type);
|
||||
if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, tree_type);
|
||||
if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, comp, tree_type);
|
||||
if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, comp, tree_type);
|
||||
if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, comp, tree_type);
|
||||
if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, comp, tree_type);
|
||||
|
||||
if (comp == COLOR_Y) {
|
||||
if (tu_size_P_side <= 4 || tu_size_Q_side <= 4){
|
||||
|
@ -756,8 +731,8 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
cu_q = uvg_cu_array_at(frame->cu_array, x_coord, y);
|
||||
}
|
||||
|
||||
bool nonzero_coeffs = cbf_is_set(cu_q->cbf, cu_q->tr_depth, COLOR_Y)
|
||||
|| cbf_is_set(cu_p->cbf, cu_p->tr_depth, COLOR_Y);
|
||||
bool nonzero_coeffs = cbf_is_set(cu_q->cbf, COLOR_Y)
|
||||
|| cbf_is_set(cu_p->cbf, COLOR_Y);
|
||||
|
||||
// Filter strength
|
||||
strength = 0;
|
||||
|
@ -766,7 +741,6 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
}
|
||||
else if (tu_boundary && nonzero_coeffs) {
|
||||
// Non-zero residual/coeffs and transform boundary
|
||||
// Neither CU is intra so tr_depth <= MAX_DEPTH.
|
||||
strength = 1;
|
||||
}
|
||||
else if(cu_p->inter.mv_dir == 3 || cu_q->inter.mv_dir == 3 || state->frame->slicetype == UVG_SLICE_B) { // B-slice related checks. TODO: Need to account for cu_p being in another slice?
|
||||
|
@ -854,18 +828,50 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
bool is_side_Q_large = false;
|
||||
uint8_t max_filter_length_P = 0;
|
||||
uint8_t max_filter_length_Q = 0;
|
||||
const int cu_size = LCU_WIDTH >> cu_q->depth;
|
||||
const int pu_part_idx = (y + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ?
|
||||
1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0)
|
||||
+ (x + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0);
|
||||
const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx)
|
||||
: PU_GET_W(cu_q->part_size, cu_size, pu_part_idx);
|
||||
const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
|
||||
: x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
|
||||
|
||||
const int cu_width = 1 << cu_q->log2_width;
|
||||
const int cu_height = 1 << cu_q->log2_height;
|
||||
const int pu_size = dir == EDGE_HOR ? cu_height : cu_width;
|
||||
const int pu_pos = dir == EDGE_HOR ? y_coord : x_coord;
|
||||
int tu_size_q_side = 0;
|
||||
if (cu_q->type == CU_INTRA && cu_q->intra.isp_mode != ISP_MODE_NO_ISP) {
|
||||
if (cu_q->intra.isp_mode == ISP_MODE_VER && dir == EDGE_VER) {
|
||||
tu_size_q_side = MAX(4, cu_width >> 2);
|
||||
} else if (cu_q->intra.isp_mode == ISP_MODE_HOR && dir == EDGE_HOR) {
|
||||
tu_size_q_side = MAX(4, cu_height >> 2);
|
||||
} else {
|
||||
tu_size_q_side = dir == EDGE_HOR ?
|
||||
MIN(1 << cu_q->log2_height, TR_MAX_WIDTH) :
|
||||
MIN(1 << cu_q->log2_width, TR_MAX_WIDTH);
|
||||
}
|
||||
} else {
|
||||
tu_size_q_side = dir == EDGE_HOR ?
|
||||
MIN(1 << cu_q->log2_height, TR_MAX_WIDTH) :
|
||||
MIN(1 << cu_q->log2_width, TR_MAX_WIDTH);
|
||||
}
|
||||
|
||||
int tu_size_p_side = 0;
|
||||
if (cu_p->type == CU_INTRA && cu_p->intra.isp_mode != ISP_MODE_NO_ISP) {
|
||||
if (cu_p->intra.isp_mode == ISP_MODE_VER && dir == EDGE_VER) {
|
||||
tu_size_p_side = MAX(4, (1 << cu_p->log2_width) >> 2);
|
||||
} else if (cu_p->intra.isp_mode == ISP_MODE_HOR && dir == EDGE_HOR) {
|
||||
tu_size_p_side = MAX(4, (1 << cu_p->log2_height) >> 2);
|
||||
} else {
|
||||
tu_size_p_side = dir == EDGE_HOR ?
|
||||
MIN(1 << cu_p->log2_height, TR_MAX_WIDTH) :
|
||||
MIN(1 << cu_p->log2_width, TR_MAX_WIDTH);
|
||||
}
|
||||
} else {
|
||||
tu_size_p_side = dir == EDGE_HOR ?
|
||||
MIN(1 << cu_p->log2_height, TR_MAX_WIDTH) :
|
||||
MIN(1 << cu_p->log2_width, TR_MAX_WIDTH);
|
||||
|
||||
}
|
||||
|
||||
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
|
||||
dir, tu_boundary,
|
||||
LCU_WIDTH >> cu_p->tr_depth,
|
||||
LCU_WIDTH >> cu_q->tr_depth,
|
||||
tu_size_p_side,
|
||||
tu_size_q_side,
|
||||
pu_pos, pu_size, cu_q->merged, COLOR_Y,
|
||||
UVG_LUMA_T);
|
||||
|
||||
|
@ -1073,41 +1079,44 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
|
|||
// CUs on both sides of the edge
|
||||
cu_info_t *cu_p;
|
||||
cu_info_t *cu_q;
|
||||
int32_t x_coord = x << (tree_type != UVG_CHROMA_T);
|
||||
int32_t y_coord = y << (tree_type != UVG_CHROMA_T);
|
||||
int32_t x_coord = x << 1;
|
||||
int32_t y_coord = y << 1;
|
||||
cu_array_t* cua = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
|
||||
if (dir == EDGE_VER) {
|
||||
y_coord = (y + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T);
|
||||
y_coord = (y + min_chroma_length * blk_idx) << (1);
|
||||
cu_p = uvg_cu_array_at(cua, x_coord - 1, y_coord);
|
||||
cu_q = uvg_cu_array_at(cua, x_coord , y_coord);
|
||||
|
||||
} else {
|
||||
x_coord = (x + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T);
|
||||
x_coord = (x + min_chroma_length * blk_idx) << (1);
|
||||
cu_p = uvg_cu_array_at(cua, x_coord, y_coord - 1);
|
||||
cu_q = uvg_cu_array_at(cua, x_coord, y_coord );
|
||||
}
|
||||
|
||||
const int cu_size = LCU_WIDTH >> (cu_q->depth + (tree_type == UVG_CHROMA_T));
|
||||
const int pu_part_idx = ((y << (tree_type != UVG_CHROMA_T)) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ?
|
||||
1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0)
|
||||
+ ((x << (tree_type != UVG_CHROMA_T)) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0);
|
||||
const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx)
|
||||
: PU_GET_W(cu_q->part_size, cu_size, pu_part_idx);
|
||||
const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
|
||||
: x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
|
||||
uint8_t max_filter_length_P = 0;
|
||||
uint8_t max_filter_length_Q = 0;
|
||||
|
||||
const int tu_p_size = LCU_WIDTH >> (cu_p->tr_depth + (chroma_shift));
|
||||
const int tu_q_size = LCU_WIDTH >> (cu_q->tr_depth + (chroma_shift));
|
||||
const int cu_width = 1 << (cu_q->log2_chroma_width );
|
||||
const int cu_height = 1 << (cu_q->log2_chroma_height);
|
||||
const int pu_size = dir == EDGE_HOR ? cu_height : cu_width;
|
||||
const int pu_pos = dir == EDGE_HOR ? y_coord : x_coord;
|
||||
|
||||
|
||||
const int tu_size_p_side = dir == EDGE_HOR ?
|
||||
MIN(1 << (cu_p->log2_chroma_height), TR_MAX_WIDTH) :
|
||||
MIN(1 << (cu_p->log2_chroma_width), TR_MAX_WIDTH);
|
||||
const int tu_size_q_side = dir == EDGE_HOR ?
|
||||
MIN(1 << (cu_q->log2_chroma_height ), TR_MAX_WIDTH) :
|
||||
MIN(1 << (cu_q->log2_chroma_width ), TR_MAX_WIDTH);
|
||||
|
||||
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
|
||||
dir, tu_boundary, tu_p_size, tu_q_size,
|
||||
dir, tu_boundary, tu_size_p_side, tu_size_q_side,
|
||||
pu_pos, pu_size, cu_q->merged, COLOR_U,
|
||||
tree_type);
|
||||
|
||||
|
||||
const bool large_boundary = (max_filter_length_P >= 3 && max_filter_length_Q >= 3);
|
||||
const bool is_chroma_hor_CTB_boundary = (dir == EDGE_HOR && y_coord % (LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) == 0);
|
||||
const bool is_chroma_hor_CTB_boundary = (dir == EDGE_HOR && y_coord % LCU_WIDTH == 0);
|
||||
uint8_t c_strength[2] = { 0, 0 };
|
||||
|
||||
|
||||
|
@ -1116,10 +1125,10 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
|
|||
c_strength[1] = 2;
|
||||
}
|
||||
else if (tu_boundary){ //TODO: Add ciip/IBC related stuff
|
||||
bool nonzero_coeffs_U = cbf_is_set(cu_q->cbf, cu_q->tr_depth, COLOR_U)
|
||||
|| cbf_is_set(cu_p->cbf, cu_p->tr_depth, COLOR_U);
|
||||
bool nonzero_coeffs_V = cbf_is_set(cu_q->cbf, cu_q->tr_depth, COLOR_V)
|
||||
|| cbf_is_set(cu_p->cbf, cu_p->tr_depth, COLOR_V);
|
||||
bool nonzero_coeffs_U = cbf_is_set(cu_q->cbf, COLOR_U)
|
||||
|| cbf_is_set(cu_p->cbf, COLOR_U);
|
||||
bool nonzero_coeffs_V = cbf_is_set(cu_q->cbf, COLOR_V)
|
||||
|| cbf_is_set(cu_p->cbf, COLOR_V);
|
||||
c_strength[0] = nonzero_coeffs_U ? 1 : 0;
|
||||
c_strength[1] = nonzero_coeffs_V ? 1 : 0;
|
||||
}
|
||||
|
@ -1238,10 +1247,11 @@ static void filter_deblock_unit(
|
|||
const int32_t x_c = x >> 1;
|
||||
const int32_t y_c = y >> 1;
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400 &&
|
||||
(is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32)
|
||||
|| (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0))
|
||||
is_tu_boundary(state, x, y, dir, COLOR_UV, tree_type)
|
||||
&& (is_on_8x8_grid(x_c, y_c, dir == EDGE_HOR && (x_c + 4) % 32 ? EDGE_HOR : EDGE_VER)
|
||||
|| (x == state->tile->frame->width - 8 && dir == EDGE_HOR && y_c % 8 == 0))
|
||||
&& tree_type != UVG_LUMA_T) {
|
||||
filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary, tree_type);
|
||||
filter_deblock_edge_chroma(state, x_c, y_c, 2, dir, tu_boundary, tree_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1271,11 +1281,11 @@ static void filter_deblock_lcu_inside(encoder_state_t * const state,
|
|||
|
||||
for (int edge_y = y; edge_y < end_y; edge_y += 4) {
|
||||
for (int edge_x = x; edge_x < end_x; edge_x += 4) {
|
||||
bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, luma_tree);
|
||||
bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, COLOR_Y, luma_tree);
|
||||
if (tu_boundary || is_pu_boundary(state, edge_x, edge_y, dir)) {
|
||||
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, luma_tree);
|
||||
}
|
||||
if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, chroma_tree)) {
|
||||
if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, COLOR_UV, chroma_tree)) {
|
||||
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, chroma_tree);
|
||||
}
|
||||
}
|
||||
|
@ -1302,7 +1312,7 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
|
|||
for (int x = x_px - 8; x < x_px; x += 4) {
|
||||
for (int y = y_px; y < end; y += 4) {
|
||||
// The top edge of the whole frame is not filtered.
|
||||
bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, luma_tree);
|
||||
bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, COLOR_Y, luma_tree);
|
||||
if (y > 0 && (tu_boundary || is_pu_boundary(state, x, y, EDGE_HOR))) {
|
||||
filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR, tu_boundary);
|
||||
}
|
||||
|
@ -1313,13 +1323,15 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
|
|||
if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
const int x_px_c = x_px >> 1;
|
||||
const int y_px_c = y_px >> 1;
|
||||
const int x_c = x_px_c - 4;
|
||||
const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
|
||||
for (int y_c = y_px_c; y_c < end_c; y_c += 8) {
|
||||
int x_c = x_px_c - 4;
|
||||
const int end_c_y = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
|
||||
for(; x_c < x_px_c; x_c += 2) {
|
||||
for (int y_c = y_px_c; y_c < end_c_y; y_c += 8) {
|
||||
// The top edge of the whole frame is not filtered.
|
||||
bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, chroma_tree);
|
||||
bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, COLOR_UV, chroma_tree);
|
||||
if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) {
|
||||
filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary, chroma_tree);
|
||||
filter_deblock_edge_chroma(state, x_c , y_c, 2, EDGE_HOR, tu_boundary, chroma_tree);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,8 +46,8 @@
|
|||
* \brief Edge direction.
|
||||
*/
|
||||
typedef enum edge_dir {
|
||||
EDGE_VER = 0, // vertical
|
||||
EDGE_HOR = 1, // horizontal
|
||||
EDGE_VER = 1, // vertical
|
||||
EDGE_HOR = 2, // horizontal
|
||||
} edge_dir;
|
||||
|
||||
|
||||
|
|
|
@ -145,11 +145,11 @@ typedef int32_t mv_t;
|
|||
|
||||
#define INTERNAL_MV_PREC 4 // Internal motion vector precision, 4 = 1/16 pel
|
||||
|
||||
//! Limits for prediction block sizes. 0 = 64x64, 4 = 4x4.
|
||||
//! Limits for prediction block sizes.
|
||||
#define PU_DEPTH_INTER_MIN 0
|
||||
#define PU_DEPTH_INTER_MAX 3
|
||||
#define PU_DEPTH_INTER_MAX 8
|
||||
#define PU_DEPTH_INTRA_MIN 0
|
||||
#define PU_DEPTH_INTRA_MAX 4
|
||||
#define PU_DEPTH_INTRA_MAX 8
|
||||
|
||||
//! Maximum number of layers in GOP structure (for allocating structures)
|
||||
#define MAX_GOP_LAYERS 6
|
||||
|
@ -273,7 +273,6 @@ typedef int32_t mv_t;
|
|||
#define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value))
|
||||
#define CLIP_TO_QP(value) CLIP(0, 51, (value))
|
||||
#define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; }
|
||||
#define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth)
|
||||
#define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))
|
||||
#define CEILDIV(x,y) (((x) + (y) - 1) / (y))
|
||||
|
||||
|
|
279
src/inter.c
279
src/inter.c
|
@ -375,23 +375,26 @@ static void inter_cp_with_ext_border(const uvg_pixel *ref_buf, int ref_stride,
|
|||
* \param predict_luma Enable or disable luma prediction for this call.
|
||||
* \param predict_chroma Enable or disable chroma prediction for this call.
|
||||
*/
|
||||
static unsigned inter_recon_unipred(const encoder_state_t * const state,
|
||||
static unsigned inter_recon_unipred(
|
||||
const encoder_state_t * const state,
|
||||
const uvg_picture * const ref,
|
||||
int32_t pu_x,
|
||||
int32_t pu_y,
|
||||
int32_t pu_w,
|
||||
int32_t pu_h,
|
||||
int32_t out_stride_luma,
|
||||
const mv_t mv_param[2],
|
||||
yuv_t *yuv_px,
|
||||
yuv_im_t *yuv_im,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
bool predict_chroma,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
vector2d_t int_mv = { mv_param[0], mv_param[1] };
|
||||
|
||||
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 0, &int_mv);
|
||||
|
||||
const int pu_x = cu_loc->x;
|
||||
const int pu_y = cu_loc->y;
|
||||
const int pu_w = cu_loc->width;
|
||||
const int pu_h = cu_loc->height;
|
||||
|
||||
const vector2d_t int_mv_in_frame = {
|
||||
int_mv.x + pu_x + state->tile->offset_x,
|
||||
int_mv.y + pu_y + state->tile->offset_y
|
||||
|
@ -507,17 +510,15 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
|
|||
* \param predict_luma Enable or disable luma prediction for this call.
|
||||
* \param predict_chroma Enable or disable chroma prediction for this call.
|
||||
*/
|
||||
void uvg_inter_recon_bipred(const encoder_state_t *const state,
|
||||
void uvg_inter_recon_bipred(
|
||||
const encoder_state_t *const state,
|
||||
const uvg_picture *ref1,
|
||||
const uvg_picture *ref2,
|
||||
int32_t pu_x,
|
||||
int32_t pu_y,
|
||||
int32_t pu_w,
|
||||
int32_t pu_h,
|
||||
mv_t mv_param[2][2],
|
||||
lcu_t *lcu,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
bool predict_chroma,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
// Allocate maximum size arrays for interpolated and copied samples
|
||||
ALIGNED(64) uvg_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
|
@ -525,6 +526,11 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
|
|||
ALIGNED(64) uvg_pixel_im im_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
ALIGNED(64) uvg_pixel_im im_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
|
||||
const int pu_x = cu_loc->x;
|
||||
const int pu_y = cu_loc->y;
|
||||
const int pu_w = cu_loc->width;
|
||||
const int pu_h = cu_loc->height;
|
||||
|
||||
yuv_t px_L0;
|
||||
px_L0.size = pu_w * pu_h;
|
||||
px_L0.y = &px_buf_L0[0];
|
||||
|
@ -551,10 +557,10 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
|
|||
|
||||
// Sample blocks from both reference picture lists.
|
||||
// Flags state if the outputs were written to high-precision / interpolated sample buffers.
|
||||
unsigned im_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0],
|
||||
&px_L0, &im_L0, predict_luma, predict_chroma);
|
||||
unsigned im_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1],
|
||||
&px_L1, &im_L1, predict_luma, predict_chroma);
|
||||
unsigned im_flags_L0 = inter_recon_unipred(state, ref1, pu_w, mv_param[0], &px_L0, &im_L0, predict_luma, predict_chroma,
|
||||
cu_loc);
|
||||
unsigned im_flags_L1 = inter_recon_unipred(state, ref2, pu_w, mv_param[1], &px_L1, &im_L1, predict_luma, predict_chroma,
|
||||
cu_loc);
|
||||
|
||||
// After reconstruction, merge the predictors by taking an average of each pixel
|
||||
uvg_bipred_average(lcu, &px_L0, &px_L1, &im_L0, &im_L1,
|
||||
|
@ -578,19 +584,14 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
|
|||
* \param predict_luma Enable or disable luma prediction for this call.
|
||||
* \param predict_chroma Enable or disable chroma prediction for this call.
|
||||
*/
|
||||
void uvg_inter_recon_cu(const encoder_state_t * const state,
|
||||
void uvg_inter_recon_cu(
|
||||
const encoder_state_t * const state,
|
||||
lcu_t *lcu,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
bool predict_chroma,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
||||
const int num_pu = uvg_part_mode_num_parts[cu->part_size];
|
||||
for (int i = 0; i < num_pu; ++i) {
|
||||
uvg_inter_pred_pu(state, lcu, x, y, width, predict_luma, predict_chroma, i);
|
||||
}
|
||||
uvg_inter_pred_pu(state, lcu, predict_luma, predict_chroma, cu_loc);
|
||||
}
|
||||
|
||||
static void ibc_recon_cu(const encoder_state_t * const state,
|
||||
|
@ -599,8 +600,7 @@ static void ibc_recon_cu(const encoder_state_t * const state,
|
|||
int32_t y,
|
||||
int32_t width,
|
||||
bool predict_luma,
|
||||
bool predict_chroma,
|
||||
int i_pu)
|
||||
bool predict_chroma)
|
||||
{
|
||||
const int x_scu = SUB_SCU(x);
|
||||
const int y_scu = SUB_SCU(y);
|
||||
|
@ -668,79 +668,63 @@ static void ibc_recon_cu(const encoder_state_t * const state,
|
|||
* \param predict_chroma Enable or disable chroma prediction for this call.
|
||||
* \param i_pu Index of the PU. Always zero for 2Nx2N. Used for SMP+AMP.
|
||||
*/
|
||||
void uvg_inter_pred_pu(const encoder_state_t * const state,
|
||||
void uvg_inter_pred_pu(
|
||||
const encoder_state_t * const state,
|
||||
lcu_t *lcu,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
bool predict_luma,
|
||||
bool predict_chroma,
|
||||
int i_pu)
|
||||
const cu_loc_t* const cu_loc)
|
||||
|
||||
{
|
||||
const int x_scu = SUB_SCU(x);
|
||||
const int y_scu = SUB_SCU(y);
|
||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
|
||||
const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
|
||||
const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
|
||||
const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
|
||||
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
|
||||
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
|
||||
|
||||
if (cu->type == CU_IBC) {
|
||||
ibc_recon_cu(state, lcu, x, y, width, predict_luma, predict_chroma, i_pu);
|
||||
} else {
|
||||
const int x_scu = SUB_SCU(cu_loc->x);
|
||||
const int y_scu = SUB_SCU(cu_loc->y);
|
||||
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
|
||||
|
||||
if (pu->inter.mv_dir == 3) {
|
||||
const uvg_picture *const refs[2] = {
|
||||
state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]],
|
||||
state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]],
|
||||
state->frame->ref->images[
|
||||
state->frame->ref_LX[0][
|
||||
pu->inter.mv_ref[0]]],
|
||||
state->frame->ref->images[
|
||||
state->frame->ref_LX[1][
|
||||
pu->inter.mv_ref[1]]],
|
||||
};
|
||||
uvg_inter_recon_bipred(
|
||||
state,
|
||||
refs[0],
|
||||
refs[1],
|
||||
pu_x,
|
||||
pu_y,
|
||||
pu_w,
|
||||
pu_h,
|
||||
pu->inter.mv,
|
||||
lcu,
|
||||
predict_luma,
|
||||
predict_chroma);
|
||||
uvg_inter_recon_bipred(state,
|
||||
refs[0], refs[1],
|
||||
pu->inter.mv, lcu,
|
||||
predict_luma, predict_chroma,
|
||||
cu_loc);
|
||||
}
|
||||
else if (pu->type == CU_IBC) {
|
||||
ibc_recon_cu(state, lcu, cu_loc->x, cu_loc->y, cu_loc->width, predict_luma, predict_chroma);
|
||||
} else{
|
||||
const int mv_idx = pu->inter.mv_dir - 1;
|
||||
const uvg_picture *const ref =
|
||||
state->frame->ref->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]];
|
||||
state->frame->ref->images[
|
||||
state->frame->ref_LX[mv_idx][
|
||||
pu->inter.mv_ref[mv_idx]]];
|
||||
|
||||
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
|
||||
const unsigned offset_chroma =
|
||||
SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
|
||||
const unsigned offset_luma = SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x);
|
||||
const unsigned offset_chroma = SUB_SCU(cu_loc->y) / 2 * LCU_WIDTH_C + SUB_SCU(cu_loc->x) / 2;
|
||||
yuv_t lcu_adapter;
|
||||
lcu_adapter.size = pu_w * pu_h;
|
||||
lcu_adapter.y = lcu->rec.y + offset_luma,
|
||||
lcu_adapter.u = lcu->rec.u + offset_chroma,
|
||||
lcu_adapter.v = lcu->rec.v + offset_chroma,
|
||||
lcu_adapter.size = cu_loc->width * cu_loc->height;
|
||||
lcu_adapter.y = lcu->rec.y + offset_luma;
|
||||
lcu_adapter.u = lcu->rec.u + offset_chroma;
|
||||
lcu_adapter.v = lcu->rec.v + offset_chroma;
|
||||
|
||||
inter_recon_unipred(
|
||||
state,
|
||||
inter_recon_unipred(state,
|
||||
ref,
|
||||
pu_x,
|
||||
pu_y,
|
||||
pu_w,
|
||||
pu_h,
|
||||
LCU_WIDTH,
|
||||
pu->inter.mv[mv_idx],
|
||||
LCU_WIDTH, pu->inter.mv[mv_idx],
|
||||
&lcu_adapter,
|
||||
NULL,
|
||||
predict_luma,
|
||||
predict_chroma);
|
||||
}
|
||||
predict_chroma,
|
||||
cu_loc);
|
||||
}
|
||||
if (predict_chroma && state->encoder_control->cfg.jccr) {
|
||||
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
|
||||
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -915,11 +899,9 @@ static bool is_b0_cand_coded(int x, int y, int width, int height)
|
|||
* \param ref_idx index in the reference list
|
||||
* \param cand_out will be filled with C0 and C1 candidates
|
||||
*/
|
||||
static void get_temporal_merge_candidates(const encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
static void get_temporal_merge_candidates(
|
||||
const encoder_state_t * const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
uint8_t ref_list,
|
||||
uint8_t ref_idx,
|
||||
merge_candidates_t *cand_out)
|
||||
|
@ -951,8 +933,8 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
|
|||
cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[colocated_ref];
|
||||
int cu_per_width = ref_cu_array->width / SCU_WIDTH;
|
||||
|
||||
int32_t xColBr = x + width;
|
||||
int32_t yColBr = y + height;
|
||||
int32_t xColBr = cu_loc->x + cu_loc->width;
|
||||
int32_t yColBr = cu_loc->y + cu_loc->height;
|
||||
|
||||
// C0 must be available
|
||||
if (xColBr < state->encoder_control->in.width &&
|
||||
|
@ -972,8 +954,8 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
|
|||
}
|
||||
}
|
||||
}
|
||||
int32_t xColCtr = x + (width / 2);
|
||||
int32_t yColCtr = y + (height / 2);
|
||||
int32_t xColCtr = cu_loc->x + (cu_loc->width / 2);
|
||||
int32_t yColCtr = cu_loc->y + (cu_loc->height / 2);
|
||||
|
||||
// C1 must be inside the LCU, in the center position of current CU
|
||||
if (xColCtr < state->encoder_control->in.width && yColCtr < state->encoder_control->in.height) {
|
||||
|
@ -1254,10 +1236,7 @@ static void get_ibc_merge_candidates(const encoder_state_t * const state,
|
|||
* \param lcu current LCU
|
||||
* \param cand_out will be filled with A and B candidates
|
||||
*/
|
||||
static void get_spatial_merge_candidates(int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
static void get_spatial_merge_candidates(const cu_loc_t* const cu_loc,
|
||||
int32_t picture_width,
|
||||
int32_t picture_height,
|
||||
lcu_t *lcu,
|
||||
|
@ -1276,8 +1255,13 @@ static void get_spatial_merge_candidates(int32_t x,
|
|||
|A1|_________|
|
||||
|A0|
|
||||
*/
|
||||
int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
|
||||
int32_t y_local = SUB_SCU(y);
|
||||
const int32_t x_local = SUB_SCU(cu_loc->x); //!< coordinates from top-left of this LCU
|
||||
const int32_t y_local = SUB_SCU(cu_loc->y);
|
||||
|
||||
const int x = cu_loc->x;
|
||||
const int y = cu_loc->y;
|
||||
const int width = cu_loc->width;
|
||||
const int height = cu_loc->height;
|
||||
// A0 and A1 availability testing
|
||||
if (x != 0) {
|
||||
cu_info_t *a1 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1);
|
||||
|
@ -1350,15 +1334,13 @@ static void get_spatial_merge_candidates(int32_t x,
|
|||
* \param picture_height tile height in pixels
|
||||
* \param cand_out will be filled with A and B candidates
|
||||
*/
|
||||
static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
static void get_spatial_merge_candidates_cua(
|
||||
const cu_array_t *cua,
|
||||
int32_t picture_width,
|
||||
int32_t picture_height,
|
||||
merge_candidates_t *cand_out,
|
||||
bool wpp)
|
||||
bool wpp,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
/*
|
||||
Predictor block locations
|
||||
|
@ -1370,8 +1352,12 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
|
|||
|A1|_________|
|
||||
|A0|
|
||||
*/
|
||||
int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
|
||||
int32_t y_local = SUB_SCU(y);
|
||||
const int x = cu_loc->x;
|
||||
const int y = cu_loc->y;
|
||||
const int width = cu_loc->width;
|
||||
const int height = cu_loc->height;
|
||||
const int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
|
||||
const int32_t y_local = SUB_SCU(y);
|
||||
// A0 and A1 availability testing
|
||||
if (x != 0) {
|
||||
const cu_info_t *a1 = uvg_cu_array_at_const(cua, x - 1, y + height - 1);
|
||||
|
@ -1484,15 +1470,13 @@ static bool add_temporal_candidate(const encoder_state_t *state,
|
|||
/**
|
||||
* \brief Pick two mv candidates from the spatial and temporal candidates.
|
||||
*/
|
||||
static void get_mv_cand_from_candidates(const encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
static void get_mv_cand_from_candidates(
|
||||
const encoder_state_t * const state,
|
||||
const merge_candidates_t *merge_cand,
|
||||
const cu_info_t * const cur_cu,
|
||||
int8_t reflist,
|
||||
mv_t mv_cand[2][2])
|
||||
mv_t mv_cand[2][2],
|
||||
int ctu_row)
|
||||
{
|
||||
const cu_info_t *const *a = merge_cand->a;
|
||||
const cu_info_t *const *b = merge_cand->b;
|
||||
|
@ -1552,7 +1536,6 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
|
|||
|
||||
if (candidates < AMVP_MAX_NUM_CANDS)
|
||||
{
|
||||
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
|
||||
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
|
||||
int32_t num_cand = state->tile->frame->hmvp_size[ctu_row];
|
||||
for (int i = 0; i < MIN(/*MAX_NUM_HMVP_AVMPCANDS*/4,num_cand); i++) {
|
||||
|
@ -1595,32 +1578,30 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
|
|||
* \param lcu current LCU
|
||||
* \param reflist reflist index (either 0 or 1)
|
||||
*/
|
||||
void uvg_inter_get_mv_cand(const encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
void uvg_inter_get_mv_cand(
|
||||
const encoder_state_t * const state,
|
||||
mv_t mv_cand[2][2],
|
||||
const cu_info_t * const cur_cu,
|
||||
lcu_t *lcu,
|
||||
int8_t reflist)
|
||||
int8_t reflist,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
merge_candidates_t merge_cand = { 0 };
|
||||
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
||||
if (cur_cu->type == CU_IBC) {
|
||||
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
|
||||
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
|
||||
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
|
||||
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
|
||||
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
|
||||
} else {
|
||||
get_spatial_merge_candidates(x, y, width, height,
|
||||
state->tile->frame->width,
|
||||
state->tile->frame->height,
|
||||
lcu,
|
||||
&merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp);
|
||||
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
||||
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
|
||||
get_spatial_merge_candidates(cu_loc, state->tile->frame->width, state->tile->frame->height, lcu,
|
||||
&merge_cand,
|
||||
parallel_merge_level,
|
||||
state->encoder_control->cfg.wpp);
|
||||
get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
|
||||
get_mv_cand_from_candidates(state, &merge_cand, cur_cu, reflist, mv_cand, cu_loc->y >> LOG2_LCU_WIDTH);
|
||||
}
|
||||
|
||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
||||
}
|
||||
|
@ -1637,31 +1618,29 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
|
|||
* \param cur_cu current CU
|
||||
* \param reflist reflist index (either 0 or 1)
|
||||
*/
|
||||
void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
void uvg_inter_get_mv_cand_cua(
|
||||
const encoder_state_t * const state,
|
||||
mv_t mv_cand[2][2],
|
||||
const cu_info_t* cur_cu,
|
||||
int8_t reflist)
|
||||
int8_t reflist,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
merge_candidates_t merge_cand = { 0 };
|
||||
|
||||
const cu_array_t *cua = state->tile->frame->cu_array;
|
||||
if (cur_cu->type == CU_IBC) {
|
||||
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
|
||||
get_ibc_merge_candidates(state, cur_cu, NULL,cua,x, y, width, height,ibc_mv_cand);
|
||||
get_ibc_merge_candidates(state, cur_cu, NULL,cua,cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
|
||||
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
|
||||
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
|
||||
} else {
|
||||
get_spatial_merge_candidates_cua(cua,
|
||||
x, y, width, height,
|
||||
state->tile->frame->width, state->tile->frame->height,
|
||||
&merge_cand, state->encoder_control->cfg.wpp);
|
||||
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
||||
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
|
||||
state->tile->frame->width, state->tile->frame->height, &merge_cand, state->encoder_control->cfg.wpp,
|
||||
cu_loc);
|
||||
get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
|
||||
get_mv_cand_from_candidates(state, &merge_cand, cur_cu, reflist, mv_cand, cu_loc->y >> LOG2_LCU_WIDTH);
|
||||
}
|
||||
|
||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
|
||||
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
|
||||
}
|
||||
|
@ -1885,10 +1864,9 @@ void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv) {
|
|||
* \param lcu lcu containing the block
|
||||
* \return number of merge candidates
|
||||
*/
|
||||
uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
||||
int32_t x, int32_t y,
|
||||
int32_t width, int32_t height,
|
||||
bool use_a1, bool use_b1,
|
||||
uint8_t uvg_inter_get_merge_cand(
|
||||
const encoder_state_t * const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
|
||||
lcu_t *lcu)
|
||||
{
|
||||
|
@ -1897,11 +1875,12 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
|||
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
|
||||
merge_candidates_t merge_cand = { 0 };
|
||||
const uint8_t max_num_cands = state->encoder_control->cfg.max_merge;
|
||||
// Current CU
|
||||
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(cu_loc->x), SUB_SCU(cu_loc->y));
|
||||
|
||||
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
||||
if(cur_cu->type == CU_IBC) {
|
||||
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
|
||||
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
|
||||
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
|
||||
for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) {
|
||||
mv_cand[i].dir = 1;
|
||||
mv_cand[i].mv[0][0] = ibc_mv_cand[i][0];
|
||||
|
@ -1909,18 +1888,16 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
|||
}
|
||||
return IBC_MRG_MAX_NUM_CANDS;
|
||||
}
|
||||
|
||||
get_spatial_merge_candidates(x, y, width, height,
|
||||
state->tile->frame->width,
|
||||
state->tile->frame->height,
|
||||
lcu,
|
||||
&merge_cand, parallel_merge_level, state->encoder_control->cfg.wpp);
|
||||
get_spatial_merge_candidates(cu_loc, state->tile->frame->width, state->tile->frame->height, lcu,
|
||||
&merge_cand,
|
||||
parallel_merge_level,
|
||||
state->encoder_control->cfg.wpp);
|
||||
|
||||
const cu_info_t **a = merge_cand.a;
|
||||
const cu_info_t **b = merge_cand.b;
|
||||
|
||||
if (!use_a1) a[1] = NULL;
|
||||
if (!use_b1) b[1] = NULL;
|
||||
const int x = cu_loc->x;
|
||||
const int y = cu_loc->y;
|
||||
|
||||
if (different_mer(x, y, x, y - 1, parallel_merge_level) && add_merge_candidate(b[1], NULL, NULL, &mv_cand[candidates])) candidates++;
|
||||
if (different_mer(x, y, x - 1, y, parallel_merge_level) && add_merge_candidate(a[1], b[1], NULL, &mv_cand[candidates])) candidates++;
|
||||
|
@ -1941,7 +1918,7 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
|||
for (int reflist = 0; reflist <= max_reflist; reflist++) {
|
||||
// Fetch temporal candidates for the current CU
|
||||
// ToDo: change collocated_from_l0_flag to allow L1 ref
|
||||
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
|
||||
get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
|
||||
// TODO: enable L1 TMVP candidate
|
||||
// get_temporal_merge_candidates(state, x, y, width, height, 2, 0, &merge_cand);
|
||||
|
||||
|
@ -1973,7 +1950,7 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
|||
if (candidates == max_num_cands) return candidates;
|
||||
|
||||
if (candidates != max_num_cands - 1) {
|
||||
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
|
||||
const uint32_t ctu_row = (cu_loc->y >> LOG2_LCU_WIDTH);
|
||||
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
|
||||
int32_t num_cand = state->tile->frame->hmvp_size[ctu_row];
|
||||
|
||||
|
|
60
src/inter.h
60
src/inter.h
|
@ -58,61 +58,51 @@ void uvg_change_precision_vector2d(int src, int dst, vector2d_t* mv);
|
|||
void uvg_round_precision(int src, int dst, mv_t* hor, mv_t* ver);
|
||||
void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv);
|
||||
|
||||
void uvg_inter_recon_cu(const encoder_state_t * const state,
|
||||
void uvg_inter_recon_cu(
|
||||
const encoder_state_t * const state,
|
||||
lcu_t *lcu,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
bool predict_luma,
|
||||
bool predict_chroma);
|
||||
|
||||
void uvg_inter_pred_pu(const encoder_state_t * const state,
|
||||
lcu_t *lcu,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
bool predict_luma,
|
||||
bool predict_chroma,
|
||||
int i_pu);
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
void uvg_inter_pred_pu(
|
||||
const encoder_state_t * const state,
|
||||
lcu_t *lcu,
|
||||
bool predict_luma,
|
||||
bool predict_chroma,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu);
|
||||
|
||||
void uvg_inter_recon_bipred(const encoder_state_t * const state,
|
||||
void uvg_inter_recon_bipred(
|
||||
const encoder_state_t * const state,
|
||||
const uvg_picture * ref1,
|
||||
const uvg_picture * ref2,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
mv_t mv_param[2][2],
|
||||
lcu_t* lcu,
|
||||
bool predict_luma,
|
||||
bool predict_chroma);
|
||||
bool predict_chroma,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
|
||||
void uvg_inter_get_mv_cand(const encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
void uvg_inter_get_mv_cand(
|
||||
const encoder_state_t * const state,
|
||||
mv_t mv_cand[2][2],
|
||||
const cu_info_t* cur_cu,
|
||||
lcu_t *lcu,
|
||||
int8_t reflist);
|
||||
int8_t reflist,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
void uvg_inter_get_mv_cand_cua(
|
||||
const encoder_state_t * const state,
|
||||
mv_t mv_cand[2][2],
|
||||
const cu_info_t* cur_cu,
|
||||
int8_t reflist);
|
||||
int8_t reflist,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
|
||||
int32_t x, int32_t y,
|
||||
int32_t width, int32_t height,
|
||||
bool use_a1, bool use_b1,
|
||||
uint8_t uvg_inter_get_merge_cand(
|
||||
const encoder_state_t * const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
|
||||
lcu_t *lcu);
|
||||
#endif
|
||||
|
|
767
src/intra.c
767
src/intra.c
File diff suppressed because it is too large
Load diff
58
src/intra.h
58
src/intra.h
|
@ -71,6 +71,7 @@ typedef struct {
|
|||
double coeff_bits;
|
||||
double distortion;
|
||||
double lfnst_costs[3];
|
||||
uint8_t best_isp_cbfs;
|
||||
} intra_search_data_t ;
|
||||
|
||||
|
||||
|
@ -107,7 +108,9 @@ int8_t uvg_intra_get_dir_luma_predictor(
|
|||
* \param multi_ref_idx Multi reference line index for the prediction block.
|
||||
*/
|
||||
void uvg_intra_build_reference(
|
||||
const int_fast8_t log2_width,
|
||||
const encoder_state_t* const state,
|
||||
const cu_loc_t* const pu_loc,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const color_t color,
|
||||
const vector2d_t *const luma_px,
|
||||
const vector2d_t *const pic_px,
|
||||
|
@ -115,7 +118,8 @@ void uvg_intra_build_reference(
|
|||
uvg_intra_references *const refs,
|
||||
bool entropy_sync,
|
||||
uvg_pixel *extra_refs,
|
||||
uint8_t multi_ref_idx);
|
||||
uint8_t multi_ref_idx,
|
||||
const uint8_t isp_mode);
|
||||
|
||||
/**
|
||||
* \brief Generate intra predictions.
|
||||
|
@ -130,32 +134,60 @@ void uvg_intra_predict(
|
|||
const encoder_state_t* const state,
|
||||
uvg_intra_references* const refs,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_loc_t* const pu_loc,
|
||||
const color_t color,
|
||||
uvg_pixel* dst,
|
||||
const intra_search_data_t* data,
|
||||
const lcu_t* lcu,
|
||||
enum uvg_tree_type tree_type
|
||||
const lcu_t* lcu
|
||||
);
|
||||
|
||||
void uvg_intra_recon_cu(
|
||||
encoder_state_t* const state,
|
||||
int x,
|
||||
int y,
|
||||
int depth,
|
||||
intra_search_data_t* search_data,
|
||||
const cu_loc_t* cu_loc,
|
||||
cu_info_t *cur_cu,
|
||||
lcu_t *lcu,
|
||||
enum uvg_tree_type tree_type,
|
||||
bool recon_luma,
|
||||
bool recon_chroma);
|
||||
|
||||
const cu_info_t* uvg_get_co_located_luma_cu(
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height,
|
||||
double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
double cost_treshold,
|
||||
intra_search_data_t* const search_data,
|
||||
lcu_t* const lcu, bool* violates_lfnst);
|
||||
|
||||
int8_t uvg_get_co_located_luma_mode(
|
||||
const cu_loc_t* const chroma_loc,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_info_t* luma_cu,
|
||||
const lcu_t* const lcu,
|
||||
const cu_array_t* const cu_array,
|
||||
enum uvg_tree_type tree_type);
|
||||
bool uvg_cclm_is_allowed(const encoder_state_t* const state, const cu_loc_t* const luma_loc, cu_info_t const* const cur_cu, enum
|
||||
uvg_tree_type tree_type);
|
||||
|
||||
int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a);
|
||||
uint8_t uvg_get_mip_flag_context(
|
||||
const cu_loc_t* const cu_loc,
|
||||
const lcu_t* lcu,
|
||||
cu_array_t* const cu_a);
|
||||
|
||||
int8_t uvg_wide_angle_correction(
|
||||
int_fast8_t mode,
|
||||
const int log2_width,
|
||||
const int log2_height,
|
||||
const bool account_for_dc_planar);
|
||||
|
||||
// ISP related defines
|
||||
#define NUM_ISP_MODES 3
|
||||
#define ISP_MODE_NO_ISP 0
|
||||
#define ISP_MODE_HOR 1
|
||||
#define ISP_MODE_VER 2
|
||||
#define SPLIT_TYPE_HOR 1
|
||||
#define SPLIT_TYPE_VER 2
|
||||
|
||||
int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_block);
|
||||
int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_block);
|
||||
void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_block);
|
||||
bool uvg_can_use_isp(const int width, const int height);
|
||||
bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp_mode, const enum uvg_tree_type tree_type);
|
||||
|
|
|
@ -795,12 +795,20 @@ static double qp_to_lambda(encoder_state_t* const state, int qp)
|
|||
state->frame->QP + 2 + frame_allocation,
|
||||
est_qp);
|
||||
}
|
||||
if(state->encoder_control->cfg.dep_quant) {
|
||||
est_lambda *= pow(2, 0.25 / 3.0);
|
||||
}
|
||||
|
||||
state->lambda = est_lambda;
|
||||
state->lambda_sqrt = sqrt(est_lambda);
|
||||
state->qp = est_qp;
|
||||
int8_t chroma_qp = encoder->qp_map[0][est_qp];
|
||||
double tmpWeight = pow(2.0, (est_qp - chroma_qp) / 3.0);
|
||||
if (state->encoder_control->cfg.dep_quant)
|
||||
{
|
||||
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
|
||||
}
|
||||
state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight;
|
||||
state->c_lambda = est_lambda / tmpWeight;
|
||||
ctu->qp = est_qp;
|
||||
ctu->lambda = est_lambda;
|
||||
|
@ -820,7 +828,11 @@ static double qp_to_lambda(encoder_state_t* const state, int qp)
|
|||
// Since this value will be later combined with qp_pred, clip to half of that instead to be safe
|
||||
state->qp = CLIP(state->frame->QP + UVG_QP_DELTA_MIN / 2, state->frame->QP + UVG_QP_DELTA_MAX / 2, state->qp);
|
||||
state->qp = CLIP_TO_QP(state->qp);
|
||||
state->lambda = qp_to_lambda(state, state->qp);
|
||||
double to_lambda = qp_to_lambda(state, state->qp);
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
to_lambda *= pow(2, 0.25 / 3.0);
|
||||
}
|
||||
state->lambda = to_lambda;
|
||||
state->lambda_sqrt = sqrt(state->lambda);
|
||||
|
||||
ctu->adjust_lambda = state->lambda;
|
||||
|
@ -1103,7 +1115,12 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
pos.x = 0;
|
||||
}
|
||||
state->qp = CLIP_TO_QP(state->frame->QP + dqp);
|
||||
state->lambda = qp_to_lambda(state, state->qp);
|
||||
double to_lambda = qp_to_lambda(state, state->qp);
|
||||
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
to_lambda *= pow(2, 0.25 / 3.0);
|
||||
}
|
||||
state->lambda = to_lambda;
|
||||
state->lambda_sqrt = sqrt(state->lambda);
|
||||
}
|
||||
else if (ctrl->cfg.target_bitrate > 0) {
|
||||
|
@ -1138,6 +1155,9 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
state->frame->lambda * 1.5874010519681994,
|
||||
lambda);
|
||||
lambda = clip_lambda(lambda);
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
lambda *= pow(2, 0.25 / 3.0);
|
||||
}
|
||||
|
||||
state->lambda = lambda;
|
||||
state->lambda_sqrt = sqrt(lambda);
|
||||
|
@ -1145,8 +1165,13 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
|
||||
} else {
|
||||
state->qp = state->frame->QP;
|
||||
state->lambda = state->frame->lambda;
|
||||
state->lambda_sqrt = sqrt(state->frame->lambda);
|
||||
double lambda = state->frame->lambda;
|
||||
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
lambda *= pow(2, 0.25 / 3.0);
|
||||
}
|
||||
state->lambda = lambda;
|
||||
state->lambda_sqrt = sqrt(lambda);
|
||||
}
|
||||
|
||||
lcu->lambda = state->lambda;
|
||||
|
@ -1154,6 +1179,11 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
|
||||
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
|
||||
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
|
||||
if (state->encoder_control->cfg.dep_quant)
|
||||
{
|
||||
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
|
||||
}
|
||||
state->chroma_weights[1] = state->chroma_weights[2] = state->chroma_weights[3] = tmpWeight;
|
||||
state->c_lambda = state->lambda / tmpWeight;
|
||||
|
||||
// Apply variance adaptive quantization
|
||||
|
@ -1170,10 +1200,34 @@ void uvg_set_lcu_lambda_and_qp(encoder_state_t * const state,
|
|||
// Since this value will be later combined with qp_pred, clip to half of that instead to be safe
|
||||
state->qp = CLIP(state->frame->QP + UVG_QP_DELTA_MIN / 2, state->frame->QP + UVG_QP_DELTA_MAX / 2, state->qp);
|
||||
state->qp = CLIP_TO_QP(state->qp);
|
||||
state->lambda = qp_to_lambda(state, state->qp);
|
||||
double to_lambda = qp_to_lambda(state, state->qp);
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
to_lambda *= pow(2, 0.25 / 3.0);
|
||||
}
|
||||
state->lambda = to_lambda;
|
||||
state->lambda_sqrt = sqrt(state->lambda);
|
||||
|
||||
lcu->adjust_lambda = state->lambda;
|
||||
lcu->adjust_qp = state->qp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode)
|
||||
{
|
||||
const encoder_control_t * const ctrl = state->encoder_control;
|
||||
double lambda = state->lambda;
|
||||
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
|
||||
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
|
||||
if (state->encoder_control->cfg.dep_quant) {
|
||||
tmpWeight *= (state->encoder_control->cfg.gop_len >= 8 ? pow(2.0, 0.1 / 3.0) : pow(2.0, 0.2 / 3.0)); // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
|
||||
}
|
||||
lambda /= tmpWeight;
|
||||
lambda *= use_jccr && state->qp > 18 ? 1.3 : 1.0;
|
||||
if (jccr_mode == 1 || jccr_mode == 2) {
|
||||
lambda *= 0.8;
|
||||
} else if (jccr_mode == 3) {
|
||||
lambda *= 0.5;
|
||||
}
|
||||
return lambda;
|
||||
}
|
|
@ -76,4 +76,6 @@ void uvg_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos);
|
|||
void uvg_update_after_picture(encoder_state_t * const state);
|
||||
void uvg_estimate_pic_lambda(encoder_state_t * const state);
|
||||
|
||||
double uvg_calculate_chroma_lambda(encoder_state_t *state, bool use_jccr, int jccr_mode);
|
||||
|
||||
#endif // RATE_CONTROL_H_
|
||||
|
|
272
src/rdo.c
272
src/rdo.c
|
@ -33,6 +33,7 @@
|
|||
#include "rdo.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
@ -52,7 +53,6 @@
|
|||
#include "strategies/strategies-quant.h"
|
||||
|
||||
|
||||
#define QUANT_SHIFT 14
|
||||
#define SCAN_SET_SIZE 16
|
||||
#define LOG2_SCAN_SET_SIZE 4
|
||||
#define SBH_THRESHOLD 4
|
||||
|
@ -297,15 +297,20 @@ out:
|
|||
static INLINE double get_coeff_cabac_cost(
|
||||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
int32_t width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip,
|
||||
cu_info_t* cur_tu)
|
||||
{
|
||||
const int width = cu_loc->width;
|
||||
const int height = cu_loc->height;
|
||||
const int sub_coeff_w = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int sub_coeff_h = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
|
||||
// Make sure there are coeffs present
|
||||
bool found = false;
|
||||
for (int i = 0; i < width*width; i++) {
|
||||
for (int i = 0; i < sub_coeff_w * sub_coeff_h; i++) {
|
||||
if (coeff[i] != 0) {
|
||||
found = 1;
|
||||
break;
|
||||
|
@ -330,7 +335,7 @@ static INLINE double get_coeff_cabac_cost(
|
|||
uvg_encode_coeff_nxn((encoder_state_t*) state,
|
||||
&cabac_copy,
|
||||
coeff,
|
||||
width,
|
||||
cu_loc,
|
||||
color,
|
||||
scan_mode,
|
||||
cur_tu,
|
||||
|
@ -341,6 +346,7 @@ static INLINE double get_coeff_cabac_cost(
|
|||
&cabac_copy,
|
||||
coeff,
|
||||
width,
|
||||
height,
|
||||
color,
|
||||
scan_mode,
|
||||
&bits);
|
||||
|
@ -391,14 +397,36 @@ double uvg_get_coeff_cost(
|
|||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
cu_info_t* cur_tu,
|
||||
int32_t width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip)
|
||||
int8_t tr_skip,
|
||||
int coeff_order)
|
||||
{
|
||||
uint8_t save_cccs = state->encoder_control->cfg.fastrd_sampling_on;
|
||||
uint8_t check_accuracy = state->encoder_control->cfg.fastrd_accuracy_check_on;
|
||||
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
int x_local = cu_loc->x % LCU_WIDTH;
|
||||
int y_local = cu_loc->y % LCU_WIDTH;
|
||||
const int sub_coeff_w = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int sub_coeff_h = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int lcu_width = color == COLOR_Y ? LCU_WIDTH : LCU_WIDTH_C;
|
||||
|
||||
|
||||
const coeff_t* coeff_ptr = NULL;
|
||||
coeff_t sub_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
|
||||
if (coeff_order == COEFF_ORDER_LINEAR) {
|
||||
coeff_ptr = coeff;
|
||||
}
|
||||
else {
|
||||
// Coeff order CU
|
||||
uvg_get_sub_coeff(sub_coeff, coeff, x_local, y_local, sub_coeff_w, sub_coeff_h, lcu_width);
|
||||
coeff_ptr = sub_coeff;
|
||||
}
|
||||
|
||||
if (state->qp < state->encoder_control->cfg.fast_residual_cost_limit &&
|
||||
state->qp < MAX_FAST_COEFF_COST_QP && !tr_skip) {
|
||||
// TODO: do we need to assert(0) out of the fast-estimation branch if we
|
||||
|
@ -409,17 +437,17 @@ double uvg_get_coeff_cost(
|
|||
return UINT32_MAX; // Hush little compiler don't you cry, not really gonna return anything after assert(0)
|
||||
} else {
|
||||
uint64_t weights = uvg_fast_coeff_get_weights(state);
|
||||
uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, weights);
|
||||
uint32_t fast_cost = uvg_fast_coeff_cost(coeff_ptr, width, height, weights);
|
||||
if (check_accuracy) {
|
||||
double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu);
|
||||
double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu);
|
||||
save_accuracy(state->qp, ccc, fast_cost);
|
||||
}
|
||||
return fast_cost;
|
||||
}
|
||||
} else {
|
||||
double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu);
|
||||
double ccc = get_coeff_cabac_cost(state, coeff_ptr, cu_loc, color, scan_mode, tr_skip, cur_tu);
|
||||
if (save_cccs) {
|
||||
save_ccc(state->qp, coeff, width * width, ccc);
|
||||
save_ccc(state->qp, coeff, width * height, ccc);
|
||||
}
|
||||
return ccc;
|
||||
}
|
||||
|
@ -684,12 +712,13 @@ void uvg_rdoq_sign_hiding(
|
|||
const int32_t last_pos,
|
||||
const coeff_t *const coeffs,
|
||||
coeff_t *const quant_coeffs,
|
||||
const int8_t color)
|
||||
const int8_t color,
|
||||
const bool need_sqrt_adjust)
|
||||
{
|
||||
const encoder_control_t * const ctrl = state->encoder_control;
|
||||
const double lambda = color ? state->c_lambda : state->lambda;
|
||||
|
||||
int inv_quant = uvg_g_inv_quant_scales[qp_scaled % 6];
|
||||
int inv_quant = uvg_g_inv_quant_scales[need_sqrt_adjust][qp_scaled % 6];
|
||||
// This somehow scales quant_delta into fractional bits. Instead of the bits
|
||||
// being multiplied by lambda, the residual is divided by it, or something
|
||||
// like that.
|
||||
|
@ -814,28 +843,28 @@ void uvg_rdoq_sign_hiding(
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned templateAbsSum(const coeff_t* coeff, int baseLevel, uint32_t posX, uint32_t posY, uint32_t width, uint32_t height)
|
||||
static unsigned templateAbsSum(const coeff_t* coeff, int baseLevel, uint32_t posX, uint32_t posY, uint32_t width, uint32_t height, uint8_t mts_index)
|
||||
{
|
||||
const coeff_t* pData = coeff + posX + posY * width;
|
||||
coeff_t sum = 0;
|
||||
if (posX < width - 1)
|
||||
{
|
||||
sum += abs(pData[1]);
|
||||
sum += mts_index && posX + 1 >= 16 ? 0 : abs(pData[1]);
|
||||
if (posX < width - 2)
|
||||
{
|
||||
sum += abs(pData[2]);
|
||||
sum += mts_index && posX + 2 >= 16 ? 0 : abs(pData[2]);
|
||||
}
|
||||
if (posY < height - 1)
|
||||
{
|
||||
sum += abs(pData[width + 1]);
|
||||
sum += mts_index && (posY + 1 >= 16 || posX + 1 >= 16) ? 0 : abs(pData[width + 1]);
|
||||
}
|
||||
}
|
||||
if (posY < height - 1)
|
||||
{
|
||||
sum += abs(pData[width]);
|
||||
sum += mts_index && posY + 1 >= 16 ? 0 : abs(pData[width]);
|
||||
if (posY < height - 2)
|
||||
{
|
||||
sum += abs(pData[width << 1]);
|
||||
sum += mts_index && posY + 2 >= 16 ? 0 : abs(pData[width << 1]);
|
||||
}
|
||||
}
|
||||
return MAX(MIN(sum - 5 * baseLevel, 31), 0);
|
||||
|
@ -1141,7 +1170,8 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
|
|||
const int max_log2_tr_dynamic_range = 15;
|
||||
uint32_t log2_tr_width = uvg_math_floor_log2(width);
|
||||
uint32_t log2_tr_height = uvg_math_floor_log2(height);
|
||||
const uint32_t log2_block_size = uvg_g_convert_to_bit[width] + 2;
|
||||
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
|
||||
const uint32_t log2_cg_width = g_log2_sbb_size[log2_tr_width][log2_tr_height][0];
|
||||
const uint32_t log2_cg_height = g_log2_sbb_size[log2_tr_width][log2_tr_height][1];
|
||||
|
||||
|
@ -1166,15 +1196,18 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
|
|||
|
||||
switch (cg_num) {
|
||||
case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); FILL_ARRAY(cost_coeffgroup_sig, 0, 1); break;
|
||||
case 2: FILL_ARRAY(sig_coeffgroup_flag, 0, 2); FILL_ARRAY(cost_coeffgroup_sig, 0, 2); break;
|
||||
case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); FILL_ARRAY(cost_coeffgroup_sig, 0, 4); break;
|
||||
case 8: FILL_ARRAY(sig_coeffgroup_flag, 0, 8); FILL_ARRAY(cost_coeffgroup_sig, 0, 8); break;
|
||||
case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); FILL_ARRAY(cost_coeffgroup_sig, 0, 16); break;
|
||||
case 32: FILL_ARRAY(sig_coeffgroup_flag, 0, 32); FILL_ARRAY(cost_coeffgroup_sig, 0, 32); break;
|
||||
case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); FILL_ARRAY(cost_coeffgroup_sig, 0, 64); break;
|
||||
default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups");
|
||||
}
|
||||
|
||||
const bool needs_sqrt2_scale = false; // from VTM: should always be false - transform-skipped blocks don't require sqrt(2) compensation.
|
||||
const int q_bits = QUANT_SHIFT + qp_scaled / 6 + (needs_sqrt2_scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
|
||||
const int32_t quant_coeff = uvg_g_quant_scales[qp_scaled % 6];
|
||||
const int32_t quant_coeff = uvg_g_quant_scales[needs_sqrt2_scale][qp_scaled % 6];
|
||||
|
||||
const double error_scale = (double)(1 << CTX_FRAC_BITS) / quant_coeff / quant_coeff;
|
||||
|
||||
|
@ -1182,8 +1215,8 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
|
|||
|
||||
const coeff_t entropy_coding_maximum = (1 << max_log2_tr_dynamic_range) - 1;
|
||||
|
||||
const uint32_t* scan = uvg_g_sig_last_scan[scan_mode][log2_block_size - 1];
|
||||
const uint32_t* scan_cg = g_sig_last_scan_cg[log2_block_size - 1][scan_mode];
|
||||
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
|
||||
const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
|
||||
|
||||
uint32_t coeff_levels[3];
|
||||
double coeff_level_error[4];
|
||||
|
@ -1221,8 +1254,8 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
|
|||
scan_pos = (sbId << log2_cg_size) + scan_pos_in_sb;
|
||||
int last_pos_coded = sbSizeM1;
|
||||
uint32_t blkpos = scan[scan_pos];
|
||||
uint32_t pos_y = blkpos >> log2_block_size;
|
||||
uint32_t pos_x = blkpos - (pos_y << log2_block_size);
|
||||
uint32_t pos_y = blkpos >> log2_block_width;
|
||||
uint32_t pos_x = blkpos - (pos_y << log2_block_width);
|
||||
//===== quantization =====
|
||||
|
||||
// set coeff
|
||||
|
@ -1365,6 +1398,48 @@ int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_
|
|||
return abs_sum;
|
||||
}
|
||||
|
||||
|
||||
static uint32_t context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
|
||||
uint32_t width, uint32_t height, int8_t color,
|
||||
int32_t* temp_diag, int32_t* temp_sum, int8_t mts)
|
||||
{
|
||||
const coeff_t* data = coeff + pos_x + pos_y * width;
|
||||
const int diag = pos_x + pos_y;
|
||||
int num_pos = 0;
|
||||
int sum_abs = 0;
|
||||
#define UPDATE(x) {int a=abs(x);sum_abs+=MIN(4+(a&1),a);num_pos+=(a?1:0);}
|
||||
if (pos_x < width - 1)
|
||||
{
|
||||
UPDATE(mts && pos_x + 1 >= 16 ? 0 : data[1]);
|
||||
if (pos_x < width - 2)
|
||||
{
|
||||
UPDATE(mts && pos_x + 2 >= 16 ? 0 : data[2]);
|
||||
}
|
||||
if (pos_y < height - 1)
|
||||
{
|
||||
UPDATE(mts && (pos_y + 1 >= 16 || pos_x + 1 >= 16) ? 0 : data[width + 1]);
|
||||
}
|
||||
}
|
||||
if (pos_y < height - 1)
|
||||
{
|
||||
UPDATE(mts && pos_x + 1 >= 16 ? 0 : data[width]);
|
||||
if (pos_y < height - 2)
|
||||
{
|
||||
UPDATE(mts && pos_x + 2 >= 16 ? 0 : data[width << 1]);
|
||||
}
|
||||
}
|
||||
#undef UPDATE
|
||||
int ctx_ofs = MIN((sum_abs + 1) >> 1, 3) + (diag < 2 ? 4 : 0);
|
||||
if (color == COLOR_Y)
|
||||
{
|
||||
ctx_ofs += diag < 5 ? 4 : 0;
|
||||
}
|
||||
|
||||
*temp_diag = diag;
|
||||
*temp_sum = sum_abs - num_pos;
|
||||
return ctx_ofs;
|
||||
}
|
||||
|
||||
/** RDOQ with CABAC
|
||||
* \returns void
|
||||
* Rate distortion optimized quantization for entropy
|
||||
|
@ -1377,31 +1452,35 @@ void uvg_rdoq(
|
|||
coeff_t *dest_coeff,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
int8_t type,
|
||||
int8_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t block_type,
|
||||
int8_t tr_depth,
|
||||
uint16_t cbf,
|
||||
uint8_t lfnst_idx)
|
||||
uint8_t lfnst_idx, uint8_t mts_idx)
|
||||
{
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
cabac_data_t * const cabac = &state->cabac;
|
||||
uint32_t log2_tr_width = uvg_math_floor_log2( height );
|
||||
uint32_t log2_tr_height = uvg_math_floor_log2( width );
|
||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); // Represents scaling through forward transform
|
||||
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
|
||||
bool needs_block_size_trafo_scale = !false && ((log2_block_width + log2_block_height) % 2 == 1);
|
||||
needs_block_size_trafo_scale |= 0; // Non log2 block size
|
||||
|
||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_block_width + log2_block_height) >> 1); // Represents scaling through forward transform
|
||||
uint16_t go_rice_param = 0;
|
||||
uint32_t reg_bins = (width * height * 28) >> 4;
|
||||
const uint32_t log2_block_size = uvg_g_convert_to_bit[ width ] + 2;
|
||||
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + type;
|
||||
|
||||
int32_t qp_scaled = uvg_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
|
||||
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + color;
|
||||
|
||||
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
|
||||
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
|
||||
|
||||
const double lambda = type ? state->c_lambda : state->lambda;
|
||||
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift - needs_block_size_trafo_scale;
|
||||
|
||||
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
|
||||
const double *err_scale = encoder->scaling_list.error_scale[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
|
||||
const double lambda = color ? state->c_lambda : state->lambda;
|
||||
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
|
||||
const bool use_scaling_list = state->encoder_control->cfg.scaling_list != UVG_SCALING_LIST_OFF;
|
||||
|
||||
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6];
|
||||
const double *err_scale = encoder->scaling_list.error_scale[log2_block_width][log2_block_height][scalinglist_type][qp_scaled%6];
|
||||
|
||||
double block_uncoded_cost = 0;
|
||||
|
||||
|
@ -1415,14 +1494,19 @@ void uvg_rdoq(
|
|||
|
||||
memset(dest_coeff, 0, sizeof(coeff_t) * width * height);
|
||||
|
||||
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_size][log2_block_size][0] + uvg_g_log2_sbb_size[log2_block_size][log2_block_size][1];
|
||||
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
|
||||
const uint32_t log2_cg_width = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0];
|
||||
const uint32_t log2_cg_height = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
|
||||
|
||||
const uint32_t cg_width = (MIN((uint8_t)32, width) >> (log2_cg_size / 2));
|
||||
const uint32_t cg_width = (MIN((uint8_t)TR_MAX_WIDTH, width) >> log2_cg_width);
|
||||
const uint32_t cg_height = (MIN((uint8_t)TR_MAX_WIDTH, height) >> log2_cg_height);
|
||||
|
||||
const uint32_t * const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
|
||||
const uint32_t * const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
|
||||
|
||||
const uint32_t *scan_cg = g_sig_last_scan_cg[log2_block_size - 1][scan_mode];
|
||||
const uint32_t cg_size = 16;
|
||||
const int32_t shift = 4 >> 1;
|
||||
const uint32_t num_blk_side = width >> shift;
|
||||
const uint32_t num_blk_side = MAX(width >> shift, 1);
|
||||
double cost_coeffgroup_sig[ 64 ];
|
||||
uint32_t sig_coeffgroup_flag[ 64 ];
|
||||
|
||||
|
@ -1431,26 +1515,34 @@ void uvg_rdoq(
|
|||
int32_t temp_diag = -1;
|
||||
int32_t temp_sum = -1;
|
||||
|
||||
const uint32_t *scan = uvg_g_sig_last_scan[ scan_mode ][ log2_block_size - 1 ];
|
||||
|
||||
int32_t cg_last_scanpos = -1;
|
||||
int32_t last_scanpos = -1;
|
||||
|
||||
uint32_t cg_num = width * height >> 4;
|
||||
uint32_t cg_num = lfnst_idx > 0 ? 1 : width * height >> 4;
|
||||
|
||||
double dTransShift = (double)transform_shift + (needs_block_size_trafo_scale ? -0.5 : 0.0);
|
||||
// Compensate for scaling of bitcount in Lagrange cost function
|
||||
double scale = CTX_FRAC_ONE_BIT;
|
||||
// Compensate for scaling through forward transform
|
||||
scale = scale * pow(2.0, -2.0 * dTransShift);
|
||||
const double default_error_scale = scale / default_quant_coeff / default_quant_coeff;
|
||||
|
||||
// Explicitly tell the only possible numbers of elements to be zeroed.
|
||||
// Hope the compiler is able to utilize this information.
|
||||
switch (cg_num) {
|
||||
case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break;
|
||||
case 2: FILL_ARRAY(sig_coeffgroup_flag, 0, 2); break;
|
||||
case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break;
|
||||
case 8: FILL_ARRAY(sig_coeffgroup_flag, 0, 8); break;
|
||||
case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break;
|
||||
case 32: FILL_ARRAY(sig_coeffgroup_flag, 0, 32); break;
|
||||
case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break;
|
||||
default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups");
|
||||
default: assert(0 && "There should be 1, 2, 4, 8, 16, 32 or 64 coefficient groups");
|
||||
}
|
||||
|
||||
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[type ? 2 : 0]);
|
||||
cabac_ctx_t *baseCtx = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0][0]) : &(cabac->ctx.cu_sig_model_chroma[0][0]);
|
||||
cabac_ctx_t* base_gt1_ctx = (type == 0) ? &(cabac->ctx.cu_gtx_flag_model_luma[1][0]) : &(cabac->ctx.cu_gtx_flag_model_chroma[1][0]);
|
||||
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.sig_coeff_group_model[color ? 2 : 0]);
|
||||
cabac_ctx_t *baseCtx = (color == 0) ? &(cabac->ctx.cu_sig_model_luma[0][0]) : &(cabac->ctx.cu_sig_model_chroma[0][0]);
|
||||
cabac_ctx_t* base_gt1_ctx = (color == 0) ? &(cabac->ctx.cu_gtx_flag_model_luma[1][0]) : &(cabac->ctx.cu_gtx_flag_model_chroma[1][0]);
|
||||
|
||||
struct {
|
||||
double coded_level_and_dist;
|
||||
|
@ -1463,21 +1555,26 @@ void uvg_rdoq(
|
|||
//Find last cg and last scanpos
|
||||
const int max_lfnst_pos = ((height == 4 && width == 4) || (height == 8 && width == 8)) ? 7 : 15;
|
||||
int32_t cg_scanpos;
|
||||
uint32_t max_scan_group_size = lfnst_idx > 0 ? max_lfnst_pos : cg_size - 1;
|
||||
for (cg_scanpos = (cg_num - 1); cg_scanpos >= 0; cg_scanpos--)
|
||||
{
|
||||
for (int32_t scanpos_in_cg = (cg_size - 1); scanpos_in_cg >= 0; scanpos_in_cg--)
|
||||
uint32_t cg_blkpos = scan_cg[cg_scanpos];
|
||||
uint32_t cg_pos_y = cg_blkpos / num_blk_side;
|
||||
uint32_t cg_pos_x = cg_blkpos - (cg_pos_y * num_blk_side);
|
||||
if (mts_idx != 0 && (cg_pos_y >= 4 || cg_pos_x >= 4)) continue;
|
||||
for (int32_t scanpos_in_cg = max_scan_group_size; scanpos_in_cg >= 0; scanpos_in_cg--)
|
||||
{
|
||||
int32_t scanpos = cg_scanpos*cg_size + scanpos_in_cg;
|
||||
if (lfnst_idx > 0 && scanpos > max_lfnst_pos) break;
|
||||
|
||||
uint32_t blkpos = scan[scanpos];
|
||||
int32_t q = quant_coeff[blkpos];
|
||||
int32_t q = use_scaling_list ? quant_coeff[blkpos] : default_quant_coeff;
|
||||
int32_t level_double = coef[blkpos];
|
||||
level_double = MIN(abs(level_double) * q, MAX_INT - (1 << (q_bits - 1)));
|
||||
uint32_t max_abs_level = (level_double + (1 << (q_bits - 1))) >> q_bits;
|
||||
|
||||
double err = (double)level_double;
|
||||
|
||||
cost_coeff0[scanpos] = err * err * err_scale[blkpos];
|
||||
cost_coeff0[scanpos] = err * err * (use_scaling_list ? err_scale[blkpos] : default_error_scale);
|
||||
|
||||
dest_coeff[blkpos] = max_abs_level;
|
||||
if (max_abs_level > 0) {
|
||||
|
@ -1507,43 +1604,45 @@ void uvg_rdoq(
|
|||
uint32_t cg_pos_x = cg_blkpos - (cg_pos_y * num_blk_side);
|
||||
|
||||
FILL(rd_stats, 0);
|
||||
for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||
if (mts_idx != 0 && (cg_pos_y >= 4 || cg_pos_x >= 4)) continue;
|
||||
for (int32_t scanpos_in_cg = max_scan_group_size; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||
int32_t scanpos = cg_scanpos*cg_size + scanpos_in_cg;
|
||||
if (scanpos > last_scanpos) {
|
||||
continue;
|
||||
}
|
||||
uint32_t blkpos = scan[scanpos];
|
||||
int32_t q = quant_coeff[blkpos];
|
||||
double temp = err_scale[blkpos];
|
||||
int32_t q = use_scaling_list ? quant_coeff[blkpos] : default_quant_coeff;
|
||||
double temp = (use_scaling_list ? err_scale[blkpos] : default_error_scale);
|
||||
int32_t level_double = coef[blkpos];
|
||||
level_double = MIN(abs(level_double) * q , MAX_INT - (1 << (q_bits - 1)));
|
||||
uint32_t max_abs_level = (level_double + (1 << (q_bits - 1))) >> q_bits;
|
||||
dest_coeff[blkpos] = max_abs_level;
|
||||
double err = (double)level_double;
|
||||
|
||||
cost_coeff0[scanpos] = err * err * err_scale[blkpos];
|
||||
cost_coeff0[scanpos] = err * err * (use_scaling_list ? err_scale[blkpos] : default_error_scale);
|
||||
|
||||
block_uncoded_cost += cost_coeff0[ scanpos ];
|
||||
|
||||
if (last_scanpos >= 0) {
|
||||
|
||||
uint32_t pos_y = blkpos >> log2_block_size;
|
||||
uint32_t pos_x = blkpos - (pos_y << log2_block_size);
|
||||
uint32_t pos_y = blkpos >> log2_block_width;
|
||||
uint32_t pos_x = blkpos - (pos_y << log2_block_width);
|
||||
//===== coefficient level estimation =====
|
||||
int32_t level;
|
||||
|
||||
uint16_t ctx_sig = 0;
|
||||
if (scanpos != last_scanpos) {
|
||||
ctx_sig = uvg_context_get_sig_ctx_idx_abs(dest_coeff, pos_x, pos_y, width, height, type, &temp_diag, &temp_sum);
|
||||
// VVC document 9.3.4.2.8, context for sig_coeff_flag calculated here
|
||||
ctx_sig = context_get_sig_ctx_idx_abs(dest_coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum, mts_idx);
|
||||
}
|
||||
|
||||
if (temp_diag != -1) {
|
||||
ctx_set = (MIN(temp_sum, 4) + 1) + (!temp_diag ? ((type == 0) ? 15 : 5) : (type == 0) ? temp_diag < 3 ? 10 : (temp_diag < 10 ? 5 : 0) : 0);
|
||||
ctx_set = (MIN(temp_sum, 4) + 1) + (!temp_diag ? ((color == 0) ? 15 : 5) : (color == 0) ? temp_diag < 3 ? 10 : (temp_diag < 10 ? 5 : 0) : 0);
|
||||
}
|
||||
else ctx_set = 0;
|
||||
|
||||
if (reg_bins < 4) {
|
||||
int sumAll = templateAbsSum(dest_coeff, 0, pos_x, pos_y, width, height);
|
||||
int sumAll = templateAbsSum(dest_coeff, 0, pos_x, pos_y, width, height,mts_idx);
|
||||
go_rice_param = g_auiGoRiceParsCoeff[sumAll];
|
||||
}
|
||||
|
||||
|
@ -1554,12 +1653,12 @@ void uvg_rdoq(
|
|||
if (scanpos == last_scanpos) {
|
||||
level = uvg_get_coded_level(state, &cost_coeff[scanpos], &cost_coeff0[scanpos], &cost_sig[scanpos],
|
||||
level_double, max_abs_level, 0, gt1_ctx, gt2_ctx, par_ctx, go_rice_param,
|
||||
reg_bins, q_bits, temp, 1, type);
|
||||
reg_bins, q_bits, temp, 1, color);
|
||||
}
|
||||
else {
|
||||
level = uvg_get_coded_level(state, &cost_coeff[scanpos], &cost_coeff0[scanpos], &cost_sig[scanpos],
|
||||
level_double, max_abs_level, ctx_sig, gt1_ctx, gt2_ctx, par_ctx, go_rice_param,
|
||||
reg_bins, q_bits, temp, 0, type);
|
||||
reg_bins, q_bits, temp, 0, color);
|
||||
if (encoder->cfg.signhide_enable) {
|
||||
int greater_than_zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 1);
|
||||
int zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 0);
|
||||
|
@ -1572,14 +1671,14 @@ void uvg_rdoq(
|
|||
if (encoder->cfg.signhide_enable) {
|
||||
sh_rates.quant_delta[blkpos] = (level_double - level * (1 << q_bits)) >> (q_bits - 8);
|
||||
if (level > 0) {
|
||||
int32_t rate_now = uvg_get_ic_rate(state, level, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type, false);
|
||||
sh_rates.inc[blkpos] = uvg_get_ic_rate(state, level + 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type, false) - rate_now;
|
||||
sh_rates.dec[blkpos] = uvg_get_ic_rate(state, level - 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type, false) - rate_now;
|
||||
int32_t rate_now = uvg_get_ic_rate(state, level, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, color, false);
|
||||
sh_rates.inc[blkpos] = uvg_get_ic_rate(state, level + 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, color, false) - rate_now;
|
||||
sh_rates.dec[blkpos] = uvg_get_ic_rate(state, level - 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, color, false) - rate_now;
|
||||
}
|
||||
else { // level == 0
|
||||
if (reg_bins < 4) {
|
||||
int32_t rate_now = uvg_get_ic_rate(state, level, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type, false);
|
||||
sh_rates.inc[blkpos] = uvg_get_ic_rate(state, level + 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type, false) - rate_now;
|
||||
int32_t rate_now = uvg_get_ic_rate(state, level, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, color, false);
|
||||
sh_rates.inc[blkpos] = uvg_get_ic_rate(state, level + 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, color, false) - rate_now;
|
||||
}
|
||||
else {
|
||||
sh_rates.inc[blkpos] = CTX_ENTROPY_BITS(&base_gt1_ctx[gt1_ctx], 0);
|
||||
|
@ -1595,7 +1694,7 @@ void uvg_rdoq(
|
|||
}
|
||||
else if (reg_bins >= 4) {
|
||||
reg_bins -= (level < 2 ? level : 3) + (scanpos != last_scanpos);
|
||||
int sumAll = templateAbsSum(coef, 4, pos_x, pos_y, width, height);
|
||||
int sumAll = templateAbsSum(coef, 4, pos_x, pos_y, width, height, mts_idx);
|
||||
go_rice_param = g_auiGoRiceParsCoeff[sumAll];
|
||||
}
|
||||
}
|
||||
|
@ -1620,7 +1719,7 @@ void uvg_rdoq(
|
|||
if( cg_scanpos ) {
|
||||
if (sig_coeffgroup_flag[cg_blkpos] == 0) {
|
||||
uint32_t ctx_sig = uvg_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
||||
cg_pos_y, cg_width);
|
||||
cg_pos_y, cg_width, cg_height);
|
||||
cost_coeffgroup_sig[cg_scanpos] = lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
||||
base_cost += cost_coeffgroup_sig[cg_scanpos] - rd_stats.sig_cost;
|
||||
} else {
|
||||
|
@ -1636,7 +1735,7 @@ void uvg_rdoq(
|
|||
|
||||
// add SigCoeffGroupFlag cost to total cost
|
||||
ctx_sig = uvg_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
||||
cg_pos_y, cg_width);
|
||||
cg_pos_y, cg_width, cg_height);
|
||||
|
||||
cost_coeffgroup_sig[cg_scanpos] = lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
|
||||
base_cost += cost_coeffgroup_sig[cg_scanpos];
|
||||
|
@ -1656,7 +1755,7 @@ void uvg_rdoq(
|
|||
cost_coeffgroup_sig[cg_scanpos] = lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
|
||||
|
||||
// reset coeffs to 0 in this block
|
||||
for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||
for (int32_t scanpos_in_cg = max_scan_group_size; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||
int32_t scanpos = cg_scanpos*cg_size + scanpos_in_cg;
|
||||
uint32_t blkpos = scan[scanpos];
|
||||
if (dest_coeff[blkpos]){
|
||||
|
@ -1679,12 +1778,12 @@ void uvg_rdoq(
|
|||
int8_t found_last = 0;
|
||||
int32_t best_last_idx_p1 = 0;
|
||||
|
||||
if( block_type != CU_INTRA && !type ) {
|
||||
if( block_type != CU_INTRA && !color ) {
|
||||
best_cost = block_uncoded_cost + lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
|
||||
base_cost += lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
|
||||
} else {
|
||||
cabac_ctx_t* base_cbf_model = NULL;
|
||||
switch (type) {
|
||||
switch (color) {
|
||||
case COLOR_Y:
|
||||
base_cbf_model = cabac->ctx.qt_cbf_model_luma;
|
||||
break;
|
||||
|
@ -1697,25 +1796,26 @@ void uvg_rdoq(
|
|||
default:
|
||||
assert(0);
|
||||
}
|
||||
ctx_cbf = ( type != COLOR_V ? 0 : cbf_is_set(cbf, 5 - uvg_math_floor_log2(width), COLOR_U));
|
||||
// This cbf should work even with non-square blocks
|
||||
ctx_cbf = ( color != COLOR_V ? 0 : cbf_is_set(cbf, COLOR_U));
|
||||
best_cost = block_uncoded_cost + lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
|
||||
base_cost += lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
|
||||
}
|
||||
|
||||
calc_last_bits(state, width, height, type, last_x_bits, last_y_bits);
|
||||
calc_last_bits(state, width, height, color, last_x_bits, last_y_bits);
|
||||
for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
|
||||
uint32_t cg_blkpos = scan_cg[cg_scanpos];
|
||||
base_cost -= cost_coeffgroup_sig[cg_scanpos];
|
||||
|
||||
if (sig_coeffgroup_flag[ cg_blkpos ]) {
|
||||
for ( int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||
for ( int32_t scanpos_in_cg = max_scan_group_size; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||
int32_t scanpos = cg_scanpos*cg_size + scanpos_in_cg;
|
||||
if (scanpos > last_scanpos) continue;
|
||||
uint32_t blkpos = scan[scanpos];
|
||||
|
||||
if( dest_coeff[ blkpos ] ) {
|
||||
uint32_t pos_y = blkpos >> log2_block_size;
|
||||
uint32_t pos_x = blkpos - ( pos_y << log2_block_size );
|
||||
uint32_t pos_y = blkpos >> log2_block_width;
|
||||
uint32_t pos_x = blkpos - ( pos_y << log2_block_width );
|
||||
|
||||
double cost_last = get_rate_last(lambda, pos_x, pos_y, last_x_bits,last_y_bits );
|
||||
double totalCost = base_cost + cost_last - cost_sig[ scanpos ];
|
||||
|
@ -1739,19 +1839,31 @@ void uvg_rdoq(
|
|||
} // end for
|
||||
|
||||
uint32_t abs_sum = 0;
|
||||
if(!mts_idx || (width < 32 && height < 32)) {
|
||||
for ( int32_t scanpos = 0; scanpos < best_last_idx_p1; scanpos++) {
|
||||
int32_t blkPos = scan[scanpos];
|
||||
int32_t level = dest_coeff[blkPos];
|
||||
abs_sum += level;
|
||||
dest_coeff[blkPos] = (coeff_t)(( coef[blkPos] < 0 ) ? -level : level);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for ( int32_t scanpos = 0; scanpos < best_last_idx_p1; scanpos++) {
|
||||
int32_t blkPos = scan[scanpos];
|
||||
int32_t blk_x = blkPos & (width - 1);
|
||||
int32_t blk_y = blkPos >> log2_block_width;
|
||||
int32_t level = blk_x >= 16 || blk_y >= 16 ? 0 : dest_coeff[blkPos];
|
||||
abs_sum += level;
|
||||
dest_coeff[blkPos] = (coeff_t)(( level < 0 ) ? -level : level);
|
||||
}
|
||||
}
|
||||
//===== clean uncoded coefficients =====
|
||||
for ( int32_t scanpos = best_last_idx_p1; scanpos <= last_scanpos; scanpos++) {
|
||||
dest_coeff[scan[scanpos]] = 0;
|
||||
}
|
||||
|
||||
if (encoder->cfg.signhide_enable && abs_sum >= 2) {
|
||||
uvg_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, type);
|
||||
uvg_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, color, needs_block_size_trafo_scale);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
10
src/rdo.h
10
src/rdo.h
|
@ -44,6 +44,8 @@
|
|||
#include "global.h" // IWYU pragma: keep
|
||||
#include "search_inter.h"
|
||||
|
||||
#define QUANT_SHIFT 14
|
||||
#define IQUANT_SHIFT 6
|
||||
|
||||
extern const uint32_t uvg_g_go_rice_range[5];
|
||||
extern const uint32_t uvg_g_go_rice_prefix_len[5];
|
||||
|
@ -60,9 +62,8 @@ void uvg_rdoq(
|
|||
int8_t type,
|
||||
int8_t scan_mode,
|
||||
int8_t block_type,
|
||||
int8_t tr_depth,
|
||||
uint16_t cbf,
|
||||
uint8_t lfnst_idx);
|
||||
uint8_t lfnst_idx, uint8_t mts_idx);
|
||||
|
||||
|
||||
int uvg_ts_rdoq(encoder_state_t* const state, coeff_t* src_coeff, coeff_t* dest_coeff, int32_t width,
|
||||
|
@ -73,10 +74,11 @@ double uvg_get_coeff_cost(
|
|||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
cu_info_t* cur_tu,
|
||||
int32_t width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip);
|
||||
int8_t tr_skip,
|
||||
int coeff_order);
|
||||
|
||||
int32_t uvg_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par,
|
||||
uint16_t abs_go_rice, uint32_t reg_bins, int8_t type, int use_limited_prefix_length);
|
||||
|
|
|
@ -88,8 +88,14 @@ static const int32_t g_quant_inter_default_8x8[64] =
|
|||
24, 25, 28, 33, 41, 54, 71, 91
|
||||
};
|
||||
|
||||
const int16_t uvg_g_quant_scales[6] = {26214, 23302, 20560, 18396, 16384, 14564};
|
||||
const int16_t uvg_g_inv_quant_scales[6] = {40, 45, 51, 57, 64, 72};
|
||||
const int16_t uvg_g_quant_scales[2][6] = {
|
||||
{26214, 23302, 20560, 18396, 16384, 14564},
|
||||
{ 18396,16384,14564,13107,11651,10280 }
|
||||
};
|
||||
const int16_t uvg_g_inv_quant_scales[2][6] = {
|
||||
{40, 45, 51, 57, 64, 72},
|
||||
{ 57,64,72,80,90,102 }
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
|
@ -406,11 +412,11 @@ void uvg_scalinglist_set(scaling_list_t* const scaling_list, const int32_t* cons
|
|||
int32_t* quantcoeff = (int32_t*)scaling_list->quant_coeff[size_id_x][size_id_y][listId][qp];
|
||||
int32_t* dequantcoeff = (int32_t*)scaling_list->de_quant_coeff[size_id_x][size_id_y][listId][qp];
|
||||
|
||||
// Encoder list
|
||||
uvg_scalinglist_process_enc(coeff, quantcoeff, uvg_g_quant_scales[qp] << 4, height, width, ratio,
|
||||
// Encoder list TODO: the sqrt adjusted lists
|
||||
uvg_scalinglist_process_enc(coeff, quantcoeff, uvg_g_quant_scales[0][qp] << 4, height, width, ratio,
|
||||
MIN(8, g_scaling_list_size_x[size_id_x]), dc, !scaling_list->enable);
|
||||
// Decoder list
|
||||
scalinglist_process_dec(coeff, dequantcoeff, uvg_g_inv_quant_scales[qp], height, width, ratio,
|
||||
scalinglist_process_dec(coeff, dequantcoeff, uvg_g_inv_quant_scales[0][qp], height, width, ratio,
|
||||
MIN(8, g_scaling_list_size_x[size_id_x]), dc, !scaling_list->enable);
|
||||
|
||||
|
||||
|
|
1537
src/search.c
1537
src/search.c
File diff suppressed because it is too large
Load diff
20
src/search.h
20
src/search.h
|
@ -84,19 +84,17 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map);
|
|||
|
||||
void uvg_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf, lcu_coeff_t *coeff);
|
||||
|
||||
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
double uvg_cu_rd_cost_luma(
|
||||
const encoder_state_t *const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu);
|
||||
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
lcu_t *const lcu,
|
||||
uint8_t isp_cbf);
|
||||
double uvg_cu_rd_cost_chroma(
|
||||
const encoder_state_t *const state,
|
||||
cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu);
|
||||
lcu_t *const lcu,
|
||||
const cu_loc_t * const);
|
||||
|
||||
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type
|
||||
tree_type);
|
||||
|
||||
void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
||||
void uvg_intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
||||
|
||||
#endif
|
||||
|
|
224
src/search_ibc.c
224
src/search_ibc.c
|
@ -75,7 +75,8 @@ typedef struct {
|
|||
* \brief Possible optimized SAD implementation for the width, leave as
|
||||
* NULL for arbitrary-width blocks
|
||||
*/
|
||||
optimized_sad_func_ptr_t optimized_sad;
|
||||
optimized_sad_func_ptr_t optimized_sad_y;
|
||||
optimized_sad_func_ptr_t optimized_sad_uv;
|
||||
|
||||
lcu_t *lcu;
|
||||
|
||||
|
@ -109,8 +110,10 @@ static INLINE bool fracmv_within_ibc_range(const ibc_search_info_t *info, int x,
|
|||
}
|
||||
|
||||
|
||||
static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y)
|
||||
static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, const cu_loc_t* loc, int32_t mv_x, int32_t mv_y)
|
||||
{
|
||||
const uint32_t x = loc->x;
|
||||
const uint32_t y = loc->y;
|
||||
const int x_scu = SUB_SCU(x);
|
||||
const int y_scu = SUB_SCU(y);
|
||||
|
||||
|
@ -132,9 +135,11 @@ static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu
|
|||
cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);;
|
||||
cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);;
|
||||
|
||||
uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400);
|
||||
uvg_inter_recon_cu(state, lcu, true, state->encoder_control->chroma_format != UVG_CSP_400, loc);
|
||||
|
||||
*cur_cu = cu_backup;
|
||||
uint32_t width = loc->width;
|
||||
uint32_t height = loc->height;
|
||||
|
||||
cost = uvg_satd_any_size(width,
|
||||
width,
|
||||
|
@ -162,10 +167,15 @@ static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu
|
|||
}
|
||||
|
||||
|
||||
static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_sad_func_ptr_t optimized_sad, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y)
|
||||
static uint32_t calculate_ibc_cost_sad(ibc_search_info_t *info, const cu_loc_t* loc, int32_t mv_x, int32_t mv_y)
|
||||
{
|
||||
const uint32_t x = loc->x;
|
||||
const uint32_t y = loc->y;
|
||||
lcu_t *lcu = info->lcu;
|
||||
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
||||
|
||||
const encoder_state_t* state = info->state;
|
||||
|
||||
cu_info_t cu_backup = *cur_cu;
|
||||
uint32_t cost = MAX_INT;
|
||||
|
||||
|
@ -173,6 +183,8 @@ static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_s
|
|||
const int y_scu = SUB_SCU(y);
|
||||
const uint32_t offset = x_scu + y_scu * LCU_WIDTH;
|
||||
const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
|
||||
const uint32_t width = loc->width;
|
||||
const uint32_t height = loc->height;
|
||||
|
||||
cur_cu->type = CU_IBC;
|
||||
cur_cu->inter.mv_dir = 1;
|
||||
|
@ -183,23 +195,26 @@ static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_s
|
|||
cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);;
|
||||
cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);;
|
||||
|
||||
uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400);
|
||||
uvg_inter_recon_cu(state, lcu, true, state->encoder_control->chroma_format != UVG_CSP_400, loc);
|
||||
|
||||
*cur_cu = cu_backup;
|
||||
|
||||
if (optimized_sad != NULL) {
|
||||
cost = optimized_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * state->tile->frame->source->stride + x], width, LCU_WIDTH, state->tile->frame->source->stride);
|
||||
if(state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
cost += optimized_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2);
|
||||
cost += optimized_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2);
|
||||
}
|
||||
if (info->optimized_sad_y != NULL) {
|
||||
cost = info->optimized_sad_y(lcu->rec.y + offset, &state->tile->frame->source->y[y * state->tile->frame->source->stride + x], width, LCU_WIDTH, state->tile->frame->source->stride);
|
||||
} else {
|
||||
cost = uvg_reg_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * state->tile->frame->source->stride + x], width,width, LCU_WIDTH, state->tile->frame->source->stride);
|
||||
if(state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
}
|
||||
|
||||
// ToDo: Enable chroma cost calculation
|
||||
/* if (state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
if (info->optimized_sad_uv != NULL) {
|
||||
cost += info->optimized_sad_uv(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2);
|
||||
cost += info->optimized_sad_uv(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2);
|
||||
} else {
|
||||
cost += uvg_reg_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2);
|
||||
cost += uvg_reg_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * state->tile->frame->source->stride / 2 + x / 2], width / 2, width / 2, LCU_WIDTH_C, state->tile->frame->source->stride / 2);
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -235,8 +250,11 @@ static bool check_mv_cost(ibc_search_info_t *info,
|
|||
|
||||
double bitcost = 0;
|
||||
double cost = MAX_DOUBLE;
|
||||
cu_loc_t loc;
|
||||
uvg_cu_loc_ctor(&loc, info->origin.x, info->origin.y, info->width, info->height);
|
||||
|
||||
cost = calculate_ibc_cost_sad(info->state, info->optimized_sad, info->lcu, info->origin.x, info->origin.y, info->width, x, y);
|
||||
|
||||
cost = calculate_ibc_cost_sad(info, &loc, x, y);
|
||||
|
||||
if (cost >= *best_cost) return false;
|
||||
|
||||
|
@ -246,7 +264,7 @@ static bool check_mv_cost(ibc_search_info_t *info,
|
|||
info->mv_cand,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
&bitcost
|
||||
);
|
||||
|
||||
|
@ -782,39 +800,23 @@ static bool merge_candidate_in_list(inter_merge_cand_t *all_cands,
|
|||
* \param amvp Return searched AMVP PUs sorted by costs
|
||||
* \param merge Return searched Merge PUs sorted by costs
|
||||
*/
|
||||
static void search_pu_ibc(encoder_state_t * const state,
|
||||
int x_cu, int y_cu,
|
||||
int depth,
|
||||
part_mode_t part_mode,
|
||||
int i_pu,
|
||||
static void search_pu_ibc(
|
||||
encoder_state_t * const state,
|
||||
const cu_loc_t * const cu_loc,
|
||||
unit_stats_map_t *amvp,
|
||||
unit_stats_map_t *merge,
|
||||
ibc_search_info_t *info)
|
||||
{
|
||||
const uvg_config *cfg = &state->encoder_control->cfg;
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
const int width_cu = LCU_WIDTH >> depth;
|
||||
const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
|
||||
const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
|
||||
const int width = PU_GET_W(part_mode, width_cu, i_pu);
|
||||
const int height = PU_GET_H(part_mode, width_cu, i_pu);
|
||||
|
||||
// Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
|
||||
// nRx2N partitions.
|
||||
const bool merge_a1 = i_pu == 0 || width >= height;
|
||||
// Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
|
||||
// 2NxnD partitions.
|
||||
const bool merge_b1 = i_pu == 0 || width <= height;
|
||||
|
||||
const int width_cu = cu_loc->width;
|
||||
const int height_cu= cu_loc->height;
|
||||
|
||||
lcu_t *lcu = info->lcu;
|
||||
const int x_local = SUB_SCU(x);
|
||||
const int y_local = SUB_SCU(y);
|
||||
const int x_local = SUB_SCU(cu_loc->x);
|
||||
const int y_local = SUB_SCU(cu_loc->y);
|
||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
||||
cur_pu->type = CU_IBC;
|
||||
cur_pu->part_size = part_mode;
|
||||
cur_pu->depth = depth;
|
||||
cur_pu->tr_depth = depth;
|
||||
cur_pu->qp = state->qp;
|
||||
cur_pu->inter.mv_dir = 1;
|
||||
|
||||
|
@ -825,20 +827,20 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
|
||||
info->state = state;
|
||||
info->pic = frame->source;
|
||||
info->origin.x = x;
|
||||
info->origin.y = y;
|
||||
info->width = width;
|
||||
info->height = height;
|
||||
info->mvd_cost_func = cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
|
||||
info->optimized_sad = uvg_get_optimized_sad(width);
|
||||
info->origin.x = cu_loc->x;
|
||||
info->origin.y = cu_loc->y;
|
||||
info->width = width_cu;
|
||||
info->height = height_cu;
|
||||
info->mvd_cost_func =
|
||||
cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
|
||||
info->optimized_sad_y = uvg_get_optimized_sad(width_cu);
|
||||
info->optimized_sad_uv = uvg_get_optimized_sad(cu_loc->chroma_width);
|
||||
info->lcu = lcu;
|
||||
|
||||
// Search for merge mode candidates
|
||||
info->num_merge_cand = uvg_inter_get_merge_cand(
|
||||
state,
|
||||
x, y,
|
||||
width, height,
|
||||
merge_a1, merge_b1,
|
||||
cu_loc,
|
||||
info->merge_cand,
|
||||
lcu);
|
||||
|
||||
|
@ -853,7 +855,7 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
#ifdef COMPLETE_PRED_MODE_BITS
|
||||
// Technically counting these bits would be correct, however counting
|
||||
// them universally degrades quality so this block is disabled by default
|
||||
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(x, y, lcu, NULL)], 0);
|
||||
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL)], 0);
|
||||
#else
|
||||
const double no_skip_flag = 0;
|
||||
#endif
|
||||
|
@ -875,7 +877,7 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
{
|
||||
continue;
|
||||
}
|
||||
uvg_inter_pred_pu(state, info->lcu, x_cu, y_cu, width_cu, true, false, i_pu);
|
||||
uvg_inter_pred_pu(state, info->lcu, true, false, cu_loc);
|
||||
merge->unit[merge->size] = *cur_pu;
|
||||
merge->unit[merge->size].type = CU_IBC;
|
||||
merge->unit[merge->size].merge_idx = merge_idx;
|
||||
|
@ -883,11 +885,11 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
merge->unit[merge->size].skipped = false;
|
||||
|
||||
double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
|
||||
if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits);
|
||||
if(state->encoder_control->cfg.rdo >= 2) {
|
||||
uvg_cu_cost_inter_rd2(state, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits, cu_loc);
|
||||
}
|
||||
else {
|
||||
merge->cost[merge->size] = uvg_satd_any_size(width, height,
|
||||
merge->cost[merge->size] = uvg_satd_any_size(width_cu, height_cu,
|
||||
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
|
||||
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
|
||||
bits += no_skip_flag;
|
||||
|
@ -909,7 +911,7 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
|
||||
// Early Skip Mode Decision
|
||||
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
|
||||
if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
if (cfg->early_skip) {
|
||||
for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) {
|
||||
if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) {
|
||||
merge->size = 1;
|
||||
|
@ -919,6 +921,7 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
merge->keys[0] = 0;
|
||||
}
|
||||
else if(cfg->rdo < 2) {
|
||||
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
// Reconstruct blocks with merge candidate.
|
||||
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set
|
||||
// and chroma exists.
|
||||
|
@ -927,19 +930,18 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
cur_pu->inter.mv_dir = info->merge_cand[merge_idx].dir;
|
||||
cur_pu->inter.mv[0][0] = info->merge_cand[merge_idx].mv[0][0];
|
||||
cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1];
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T);
|
||||
uvg_inter_recon_cu(state, lcu, x, y, width, true, false);
|
||||
uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
|
||||
uvg_quantize_lcu_residual(state, true, false, false, cu_loc, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
|
||||
if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
|
||||
if (cbf_is_set(cur_pu->cbf, COLOR_Y)) {
|
||||
continue;
|
||||
}
|
||||
else if (has_chroma) {
|
||||
uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
|
||||
uvg_inter_recon_cu(state, lcu, false, has_chroma, cu_loc);
|
||||
uvg_quantize_lcu_residual(state, false, has_chroma,
|
||||
false, /*we are only checking for lack of coeffs so no need to check jccr*/
|
||||
x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
if (!cbf_is_set_any(cur_pu->cbf, depth)) {
|
||||
cu_loc, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
if (!cbf_is_set_any(cur_pu->cbf)) {
|
||||
cur_pu->type = CU_IBC;
|
||||
cur_pu->merge_idx = merge_idx;
|
||||
cur_pu->skipped = true;
|
||||
|
@ -965,14 +967,11 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
// Do the motion search
|
||||
|
||||
uvg_inter_get_mv_cand(info->state,
|
||||
info->origin.x,
|
||||
info->origin.y,
|
||||
info->width,
|
||||
info->height,
|
||||
info->mv_cand,
|
||||
cur_pu,
|
||||
lcu,
|
||||
NULL);
|
||||
0,
|
||||
cu_loc);
|
||||
|
||||
vector2d_t best_mv = { 0, 0 };
|
||||
|
||||
|
@ -1003,9 +1002,7 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
best_cost = calculate_ibc_cost_satd(
|
||||
info->state,
|
||||
lcu,
|
||||
info->origin.x,
|
||||
info->origin.y,
|
||||
info->width,
|
||||
cu_loc,
|
||||
(best_mv.x >> INTERNAL_MV_PREC),
|
||||
(best_mv.y >> INTERNAL_MV_PREC));
|
||||
best_cost += best_bits * info->state->lambda;
|
||||
|
@ -1052,16 +1049,16 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
};
|
||||
|
||||
|
||||
if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]);
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
if (amvp[0].size) uvg_cu_cost_inter_rd2(state, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]], cu_loc);
|
||||
}
|
||||
|
||||
|
||||
if(cfg->rdo < 2) {
|
||||
int predmode_ctx;
|
||||
|
||||
const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1) * 3;
|
||||
const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx);
|
||||
const float ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1);
|
||||
const int skip_contest = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, &predmode_ctx);
|
||||
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0);
|
||||
|
||||
const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0);
|
||||
|
@ -1077,33 +1074,29 @@ static void search_pu_ibc(encoder_state_t * const state,
|
|||
#include "threads.h"
|
||||
|
||||
static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
||||
int x, int y, int depth,
|
||||
const cu_loc_t* cu_loc,
|
||||
lcu_t* lcu,
|
||||
double* inter_cost,
|
||||
double* inter_bitcost)
|
||||
{
|
||||
const int x_cu = x;
|
||||
const int y_cu = y;
|
||||
const int x_cu = cu_loc->x;
|
||||
const int y_cu = cu_loc->y;
|
||||
const int part_mode = SIZE_2Nx2N;
|
||||
const uvg_config *cfg = &state->encoder_control->cfg;
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
const int width_cu = LCU_WIDTH >> depth;
|
||||
const int width = PU_GET_W(part_mode, width_cu, 0);
|
||||
const int height = PU_GET_H(part_mode, width_cu, 0);
|
||||
const int width_cu = cu_loc->width;
|
||||
const int height_cu = cu_loc->height;
|
||||
|
||||
const bool merge_a1 = true;
|
||||
const bool merge_b1 = true;
|
||||
|
||||
ibc_search_info_t info;
|
||||
|
||||
const int x_local = SUB_SCU(x);
|
||||
const int y_local = SUB_SCU(y);
|
||||
const int x_local = SUB_SCU(x_cu);
|
||||
const int y_local = SUB_SCU(y_cu);
|
||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
||||
|
||||
cur_pu->type = CU_IBC;
|
||||
cur_pu->part_size = part_mode;
|
||||
cur_pu->depth = depth;
|
||||
cur_pu->tr_depth = depth;
|
||||
cur_pu->qp = state->qp;
|
||||
|
||||
// Default to candidate 0
|
||||
|
@ -1113,24 +1106,20 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
|
||||
info.state = state;
|
||||
info.pic = frame->source;
|
||||
info.origin.x = x;
|
||||
info.origin.y = y;
|
||||
info.width = width;
|
||||
info.height = height;
|
||||
info.origin.x = cu_loc->x;
|
||||
info.origin.y = cu_loc->y;
|
||||
info.width = width_cu;
|
||||
info.height = height_cu;
|
||||
info.mvd_cost_func =
|
||||
cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
|
||||
info.optimized_sad = uvg_get_optimized_sad(width);
|
||||
info.optimized_sad_y = uvg_get_optimized_sad(width_cu);
|
||||
info.optimized_sad_uv = uvg_get_optimized_sad(cu_loc->chroma_width);
|
||||
info.lcu = lcu;
|
||||
|
||||
// Search for merge mode candidates
|
||||
info.num_merge_cand = uvg_inter_get_merge_cand(
|
||||
state,
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
merge_a1,
|
||||
merge_b1,
|
||||
cu_loc,
|
||||
info.merge_cand,
|
||||
lcu);
|
||||
|
||||
|
@ -1145,17 +1134,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
static int evaluations = 0;
|
||||
static int hits = 0;
|
||||
|
||||
|
||||
UVG_CLOCK_T hashmap_start_temp;
|
||||
UVG_CLOCK_T hashmap_end_temp;
|
||||
|
||||
|
||||
UVG_CLOCK_T hashmap_start_real_time;
|
||||
UVG_CLOCK_T hashmap_end_real_time;
|
||||
UVG_GET_TIME(&hashmap_start_real_time);
|
||||
|
||||
int xx = x;
|
||||
int yy = y;
|
||||
int xx = x_cu;
|
||||
int yy = y_cu;
|
||||
|
||||
int best_mv_x = INT_MAX>>2;
|
||||
int best_mv_y = INT_MAX>>2;
|
||||
|
@ -1185,12 +1169,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
int pos_y = result->value & 0xffff;
|
||||
int mv_x = pos_x - xx;
|
||||
int mv_y = pos_y - yy;
|
||||
if (pos_x <= xx - width && pos_y <= yy - height) {
|
||||
if (pos_x <= xx - width_cu && pos_y <= yy - height_cu) {
|
||||
valid_mv = intmv_within_ibc_range(&info, mv_x, mv_y);
|
||||
if (valid_mv) {
|
||||
bool full_block = true; // Is the full block covered by the IBC?
|
||||
for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width; offset_x+=UVG_HASHMAP_BLOCKSIZE) {
|
||||
for (int offset_y = 0; offset_y < height; offset_y += UVG_HASHMAP_BLOCKSIZE) {
|
||||
for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width_cu; offset_x+=UVG_HASHMAP_BLOCKSIZE) {
|
||||
for (int offset_y = 0; offset_y < height_cu; offset_y += UVG_HASHMAP_BLOCKSIZE) {
|
||||
uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[
|
||||
((yy+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (xx+offset_x) / UVG_HASHMAP_BLOCKSIZE];
|
||||
|
||||
|
@ -1211,7 +1195,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
if (full_block) {
|
||||
double cost = ibc_cost, bits = ibc_bitcost;
|
||||
vector2d_t mv = { best_mv_x, best_mv_y};
|
||||
cost = calc_ibc_mvd_cost(state, mv_x, mv_y,INTERNAL_MV_PREC,info.mv_cand, info.merge_cand, info.num_merge_cand, NULL, &bits);
|
||||
cost = calc_ibc_mvd_cost(state, mv_x, mv_y,INTERNAL_MV_PREC,info.mv_cand, info.merge_cand, info.num_merge_cand, 0, &bits);
|
||||
//double cost = get_ibc_mvd_coding_cost(state, &state->cabac, mv_x,mv_y) * state->lambda_sqrt;
|
||||
//cost +=
|
||||
bool better_mv = cost < ibc_cost;
|
||||
|
@ -1220,7 +1204,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
best_mv_y = mv_y;
|
||||
ibc_cost = cost;
|
||||
ibc_bitcost = bits;
|
||||
fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y);
|
||||
fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x_cu,y_cu, width_cu,height_cu, mv_x, mv_y);
|
||||
found_block = true;
|
||||
//break;
|
||||
}
|
||||
|
@ -1238,7 +1222,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
//if (x > state->tile->frame->width-64 && y > state->tile->frame->height-64)
|
||||
//fprintf(stderr, "Hashmap time: %f (crc: %f, search: %f) Evaluations: %d Hits: %d, hashed in this block: %d\n", time_spent,crc_time, search_time, evaluations, hits,hashes_found);
|
||||
|
||||
if (!found_block) return;
|
||||
if (!found_block) return 0;
|
||||
|
||||
*inter_cost = ibc_cost;
|
||||
*inter_bitcost = ibc_bitcost;
|
||||
|
@ -1267,18 +1251,16 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
cur_pu->skipped = merged;
|
||||
|
||||
|
||||
const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1);
|
||||
const float ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1);
|
||||
ibc_cost += ibc_flag * state->lambda;
|
||||
ibc_bitcost += ibc_flag;
|
||||
|
||||
uvg_inter_recon_cu(
|
||||
state,
|
||||
lcu,
|
||||
x,
|
||||
y,
|
||||
CU_WIDTH_FROM_DEPTH(depth),
|
||||
true,
|
||||
state->encoder_control->chroma_format != UVG_CSP_400);
|
||||
state->encoder_control->chroma_format != UVG_CSP_400,
|
||||
cu_loc);
|
||||
|
||||
if (*inter_cost < MAX_DOUBLE) {
|
||||
assert(fracmv_within_ibc_range(
|
||||
|
@ -1286,7 +1268,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
cur_pu->inter.mv[0][0],
|
||||
cur_pu->inter.mv[0][1]));
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1305,17 +1287,18 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
|
|||
* \param inter_bitcost Return inter bitcost
|
||||
*/
|
||||
void uvg_search_cu_ibc(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
const cu_loc_t * const cu_loc,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
double* inter_bitcost)
|
||||
{
|
||||
*inter_cost = MAX_DOUBLE;
|
||||
*inter_bitcost = MAX_INT;
|
||||
|
||||
// Quick hashmap search
|
||||
/* uvg_search_hash_cu_ibc(
|
||||
state,
|
||||
x, y, depth,
|
||||
cu_loc,
|
||||
lcu,
|
||||
inter_cost,
|
||||
inter_bitcost);
|
||||
|
@ -1330,8 +1313,7 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
|
|||
info.lcu = lcu;
|
||||
|
||||
search_pu_ibc(state,
|
||||
x, y, depth,
|
||||
SIZE_2Nx2N, 0,
|
||||
cu_loc,
|
||||
amvp,
|
||||
&merge,
|
||||
&info);
|
||||
|
@ -1374,14 +1356,14 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
|
|||
return;
|
||||
}
|
||||
|
||||
const int x_local = SUB_SCU(x);
|
||||
const int y_local = SUB_SCU(y);
|
||||
const int x_local = SUB_SCU(cu_loc->x);
|
||||
const int y_local = SUB_SCU(cu_loc->y);
|
||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
||||
*cur_pu = *best_inter_pu;
|
||||
cur_pu->type = CU_IBC;
|
||||
|
||||
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth),
|
||||
true, state->encoder_control->chroma_format != UVG_CSP_400);
|
||||
uvg_inter_recon_cu(state, lcu,
|
||||
true, state->encoder_control->chroma_format != UVG_CSP_400, cu_loc);
|
||||
|
||||
if (*inter_cost < MAX_DOUBLE) {
|
||||
assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
|
||||
void uvg_search_cu_ibc(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
const cu_loc_t * const cu_loc,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
double* inter_bitcost);
|
||||
|
|
|
@ -1293,8 +1293,8 @@ static void apply_mv_scaling(int32_t current_poc,
|
|||
/**
|
||||
* \brief Perform inter search for a single reference frame.
|
||||
*/
|
||||
static void search_pu_inter_ref(inter_search_info_t *info,
|
||||
int depth,
|
||||
static void search_pu_inter_ref(
|
||||
inter_search_info_t *info,
|
||||
lcu_t *lcu,
|
||||
cu_info_t *cur_cu,
|
||||
unit_stats_map_t *amvp)
|
||||
|
@ -1327,15 +1327,15 @@ static void search_pu_inter_ref(inter_search_info_t *info,
|
|||
// Get MV candidates
|
||||
cur_cu->inter.mv_ref[ref_list] = ref_list_idx[ref_list];
|
||||
|
||||
cu_loc_t cu_loc;
|
||||
uvg_cu_loc_ctor(&cu_loc, info->origin.x, info->origin.y, info->width, info->height);
|
||||
|
||||
uvg_inter_get_mv_cand(info->state,
|
||||
info->origin.x,
|
||||
info->origin.y,
|
||||
info->width,
|
||||
info->height,
|
||||
info->mv_cand,
|
||||
cur_cu,
|
||||
lcu,
|
||||
ref_list);
|
||||
ref_list,
|
||||
&cu_loc);
|
||||
|
||||
vector2d_t best_mv = { 0, 0 };
|
||||
|
||||
|
@ -1498,11 +1498,13 @@ static void search_pu_inter_ref(inter_search_info_t *info,
|
|||
/**
|
||||
* \brief Search bipred modes for a PU.
|
||||
*/
|
||||
static void search_pu_inter_bipred(inter_search_info_t *info,
|
||||
int depth,
|
||||
static void search_pu_inter_bipred(
|
||||
inter_search_info_t *info,
|
||||
lcu_t *lcu,
|
||||
unit_stats_map_t *amvp_bipred)
|
||||
{
|
||||
cu_loc_t cu_loc;
|
||||
uvg_cu_loc_ctor(&cu_loc, info->origin.x, info->origin.y, info->width, info->height);
|
||||
const image_list_t *const ref = info->state->frame->ref;
|
||||
uint8_t (*ref_LX)[16] = info->state->frame->ref_LX;
|
||||
const videoframe_t * const frame = info->state->tile->frame;
|
||||
|
@ -1551,7 +1553,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
|
|||
bipred_pu->skipped = false;
|
||||
|
||||
for (int reflist = 0; reflist < 2; reflist++) {
|
||||
uvg_inter_get_mv_cand(info->state, x, y, width, height, info->mv_cand, bipred_pu, lcu, reflist);
|
||||
uvg_inter_get_mv_cand(info->state, info->mv_cand, bipred_pu, lcu, reflist, &cu_loc);
|
||||
}
|
||||
|
||||
// Don't try merge candidates that don't satisfy mv constraints.
|
||||
|
@ -1564,13 +1566,11 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
|
|||
uvg_inter_recon_bipred(info->state,
|
||||
ref->images[ref_LX[0][merge_cand[i].ref[0]]],
|
||||
ref->images[ref_LX[1][merge_cand[j].ref[1]]],
|
||||
x, y,
|
||||
width,
|
||||
height,
|
||||
mv,
|
||||
lcu,
|
||||
true,
|
||||
false);
|
||||
false,
|
||||
&cu_loc);
|
||||
|
||||
const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
|
||||
const uvg_pixel *src = &frame->source->y[x + y * frame->source->stride];
|
||||
|
@ -1666,11 +1666,9 @@ static bool merge_candidate_in_list(inter_merge_cand_t *all_cands,
|
|||
* \param amvp Return searched AMVP PUs sorted by costs
|
||||
* \param merge Return searched Merge PUs sorted by costs
|
||||
*/
|
||||
static void search_pu_inter(encoder_state_t * const state,
|
||||
int x_cu, int y_cu,
|
||||
int depth,
|
||||
part_mode_t part_mode,
|
||||
int i_pu,
|
||||
static void search_pu_inter(
|
||||
encoder_state_t * const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
lcu_t *lcu,
|
||||
unit_stats_map_t *amvp,
|
||||
unit_stats_map_t *merge,
|
||||
|
@ -1678,25 +1676,14 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
{
|
||||
const uvg_config *cfg = &state->encoder_control->cfg;
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
const int width_cu = LCU_WIDTH >> depth;
|
||||
const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
|
||||
const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
|
||||
const int width = PU_GET_W(part_mode, width_cu, i_pu);
|
||||
const int height = PU_GET_H(part_mode, width_cu, i_pu);
|
||||
const int width_cu = cu_loc->width;
|
||||
const int height_cu = cu_loc->height;
|
||||
|
||||
// Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
|
||||
// nRx2N partitions.
|
||||
const bool merge_a1 = i_pu == 0 || width >= height;
|
||||
// Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
|
||||
// 2NxnD partitions.
|
||||
const bool merge_b1 = i_pu == 0 || width <= height;
|
||||
|
||||
const int x_local = SUB_SCU(x);
|
||||
const int y_local = SUB_SCU(y);
|
||||
const int x_local = SUB_SCU(cu_loc->x);
|
||||
const int y_local = SUB_SCU(cu_loc->y);
|
||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
||||
cur_pu->type = CU_NOTSET;
|
||||
cur_pu->part_size = part_mode;
|
||||
cur_pu->depth = depth;
|
||||
cur_pu->qp = state->qp;
|
||||
|
||||
// Default to candidate 0
|
||||
|
@ -1707,19 +1694,17 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
|
||||
info->state = state;
|
||||
info->pic = frame->source;
|
||||
info->origin.x = x;
|
||||
info->origin.y = y;
|
||||
info->width = width;
|
||||
info->height = height;
|
||||
info->origin.x = cu_loc->x;
|
||||
info->origin.y = cu_loc->y;
|
||||
info->width = width_cu;
|
||||
info->height = height_cu;
|
||||
info->mvd_cost_func = cfg->mv_rdo ? uvg_calc_mvd_cost_cabac : calc_mvd_cost;
|
||||
info->optimized_sad = uvg_get_optimized_sad(width);
|
||||
info->optimized_sad = uvg_get_optimized_sad(width_cu);
|
||||
|
||||
// Search for merge mode candidates
|
||||
info->num_merge_cand = uvg_inter_get_merge_cand(
|
||||
state,
|
||||
x, y,
|
||||
width, height,
|
||||
merge_a1, merge_b1,
|
||||
cu_loc,
|
||||
info->merge_cand,
|
||||
lcu
|
||||
);
|
||||
|
@ -1754,7 +1739,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
// If bipred is not enabled, do not try candidates with mv_dir == 3.
|
||||
// Bipred is also forbidden for 4x8 and 8x4 blocks by the standard.
|
||||
if (cur_pu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
|
||||
if (cur_pu->inter.mv_dir == 3 && !(width + height > 12)) continue;
|
||||
if (cur_pu->inter.mv_dir == 3 && !(cu_loc->width + cu_loc->height > 12)) continue;
|
||||
|
||||
bool is_duplicate = merge_candidate_in_list(info->merge_cand, cur_cand, merge);
|
||||
|
||||
|
@ -1768,7 +1753,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
{
|
||||
continue;
|
||||
}
|
||||
uvg_inter_pred_pu(state, lcu, x_cu, y_cu, width_cu, true, false, i_pu);
|
||||
uvg_inter_pred_pu(state, lcu, true, false, cu_loc);
|
||||
merge->unit[merge->size] = *cur_pu;
|
||||
merge->unit[merge->size].type = CU_INTER;
|
||||
merge->unit[merge->size].merge_idx = merge_idx;
|
||||
|
@ -1776,11 +1761,11 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
merge->unit[merge->size].skipped = false;
|
||||
|
||||
double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
|
||||
if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits);
|
||||
if(state->encoder_control->cfg.rdo >= 2) {
|
||||
uvg_cu_cost_inter_rd2(state, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits, cu_loc);
|
||||
}
|
||||
else {
|
||||
merge->cost[merge->size] = uvg_satd_any_size(width, height,
|
||||
merge->cost[merge->size] = uvg_satd_any_size(cu_loc->width, cu_loc->height,
|
||||
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
|
||||
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
|
||||
bits += no_skip_flag;
|
||||
|
@ -1802,7 +1787,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
|
||||
// Early Skip Mode Decision
|
||||
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
|
||||
if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
if (cfg->early_skip) {
|
||||
for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) {
|
||||
if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) {
|
||||
merge->size = 1;
|
||||
|
@ -1812,6 +1797,8 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
merge->keys[0] = 0;
|
||||
}
|
||||
else if(cfg->rdo < 2) {
|
||||
|
||||
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
// Reconstruct blocks with merge candidate.
|
||||
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set
|
||||
// and chroma exists.
|
||||
|
@ -1824,22 +1811,22 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1];
|
||||
cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0];
|
||||
cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1];
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T);
|
||||
uvg_inter_recon_cu(state, lcu, x, y, width, true, false);
|
||||
uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
|
||||
|
||||
if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
|
||||
uvg_quantize_lcu_residual(state, true, false, false, cu_loc, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
|
||||
if (cbf_is_set(cur_pu->cbf, COLOR_Y)) {
|
||||
continue;
|
||||
}
|
||||
else if (has_chroma) {
|
||||
uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
|
||||
uvg_inter_recon_cu(state, lcu, false, has_chroma, cu_loc);
|
||||
uvg_quantize_lcu_residual(state,
|
||||
false, has_chroma,
|
||||
false, /*we are only checking for lack of coeffs so no need to check jccr*/
|
||||
x, y, depth, cur_pu, lcu,
|
||||
cu_loc, cur_pu, lcu,
|
||||
true,
|
||||
UVG_BOTH_T);
|
||||
if (!cbf_is_set_any(cur_pu->cbf, depth)) {
|
||||
if (!cbf_is_set_any(cur_pu->cbf)) {
|
||||
cur_pu->type = CU_INTER;
|
||||
cur_pu->merge_idx = merge_idx;
|
||||
cur_pu->skipped = true;
|
||||
|
@ -1871,7 +1858,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
info->ref_idx = ref_idx;
|
||||
info->ref = state->frame->ref->images[ref_idx];
|
||||
|
||||
search_pu_inter_ref(info, depth, lcu, cur_pu, amvp);
|
||||
search_pu_inter_ref(info, lcu, cur_pu, amvp);
|
||||
}
|
||||
|
||||
assert(amvp[0].size <= MAX_UNIT_STATS_MAP_SIZE);
|
||||
|
@ -1936,14 +1923,11 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
info->ref = ref->images[info->ref_idx];
|
||||
|
||||
uvg_inter_get_mv_cand(info->state,
|
||||
info->origin.x,
|
||||
info->origin.y,
|
||||
info->width,
|
||||
info->height,
|
||||
info->mv_cand,
|
||||
unipred_pu,
|
||||
lcu,
|
||||
list);
|
||||
list,
|
||||
cu_loc);
|
||||
|
||||
double frac_cost = MAX_DOUBLE;
|
||||
double frac_bits = MAX_INT;
|
||||
|
@ -1964,8 +1948,8 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
unipred_pu->inter.mv[list][1] = frac_mv.y;
|
||||
CU_SET_MV_CAND(unipred_pu, list, cu_mv_cand);
|
||||
|
||||
if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
uvg_cu_cost_inter_rd2(state, x, y, depth, unipred_pu, lcu, &frac_cost, &frac_bits);
|
||||
if (state->encoder_control->cfg.rdo >= 2) {
|
||||
uvg_cu_cost_inter_rd2(state, unipred_pu, lcu, &frac_cost, &frac_bits, cu_loc);
|
||||
}
|
||||
|
||||
amvp[list].cost[key] = frac_cost;
|
||||
|
@ -1987,15 +1971,15 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
amvp[list].size = n_best;
|
||||
}
|
||||
|
||||
if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N && cfg->fme_level == 0) {
|
||||
if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]);
|
||||
if (amvp[1].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]]);
|
||||
if (state->encoder_control->cfg.rdo >= 2 && cfg->fme_level == 0) {
|
||||
if (amvp[0].size) uvg_cu_cost_inter_rd2(state, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]], cu_loc);
|
||||
if (amvp[1].size) uvg_cu_cost_inter_rd2(state, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]], cu_loc);
|
||||
}
|
||||
|
||||
// Search bi-pred positions
|
||||
bool can_use_bipred = state->frame->slicetype == UVG_SLICE_B
|
||||
&& cfg->bipred
|
||||
&& width + height >= 16; // 4x8 and 8x4 PBs are restricted to unipred
|
||||
&& cu_loc->width + cu_loc->height >= 16; // 4x8 and 8x4 PBs are restricted to unipred
|
||||
|
||||
if (can_use_bipred) {
|
||||
|
||||
|
@ -2026,25 +2010,23 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
bipred_pu->skipped = false;
|
||||
|
||||
for (int reflist = 0; reflist < 2; reflist++) {
|
||||
uvg_inter_get_mv_cand(info->state, x, y, width, height, info->mv_cand, bipred_pu, lcu, reflist);
|
||||
uvg_inter_get_mv_cand(info->state, info->mv_cand, bipred_pu, lcu, reflist, cu_loc);
|
||||
}
|
||||
|
||||
uvg_inter_recon_bipred(info->state,
|
||||
ref->images[ref_LX[0][bipred_pu->inter.mv_ref[0]]],
|
||||
ref->images[ref_LX[1][bipred_pu->inter.mv_ref[1]]],
|
||||
x, y,
|
||||
width,
|
||||
height,
|
||||
mv,
|
||||
lcu,
|
||||
mv, lcu,
|
||||
true,
|
||||
false);
|
||||
false,
|
||||
cu_loc
|
||||
);
|
||||
|
||||
const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
|
||||
const uvg_pixel *src = &lcu->ref.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
|
||||
const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x)];
|
||||
const uvg_pixel *src = &lcu->ref.y[SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x)];
|
||||
|
||||
best_bipred_cost =
|
||||
uvg_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH);
|
||||
uvg_satd_any_size(cu_loc->width, cu_loc->height, rec, LCU_WIDTH, src, LCU_WIDTH);
|
||||
|
||||
double bitcost[2] = { 0, 0 };
|
||||
|
||||
|
@ -2091,17 +2073,17 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
}
|
||||
|
||||
// TODO: this probably should have a separate command line option
|
||||
if (cfg->rdo >= 3) search_pu_inter_bipred(info, depth, lcu, &amvp[2]);
|
||||
if (cfg->rdo >= 3) search_pu_inter_bipred(info, lcu, &amvp[2]);
|
||||
|
||||
assert(amvp[2].size <= MAX_UNIT_STATS_MAP_SIZE);
|
||||
uvg_sort_keys_by_cost(&amvp[2]);
|
||||
if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
|
||||
uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]]);
|
||||
if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2) {
|
||||
uvg_cu_cost_inter_rd2(state, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]], cu_loc);
|
||||
}
|
||||
}
|
||||
if(cfg->rdo < 2) {
|
||||
int predmode_ctx;
|
||||
const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx);
|
||||
const int skip_contest = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, &predmode_ctx);
|
||||
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0);
|
||||
|
||||
const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0);
|
||||
|
@ -2135,22 +2117,19 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
* \param inter_cost Return inter cost
|
||||
* \param inter_bitcost Return inter bitcost
|
||||
*/
|
||||
void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
void uvg_cu_cost_inter_rd2(
|
||||
encoder_state_t * const state,
|
||||
cu_info_t* cur_cu,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
double* inter_bitcost){
|
||||
double* inter_bitcost,
|
||||
const cu_loc_t* const cu_loc){
|
||||
|
||||
int tr_depth = MAX(1, depth);
|
||||
if (cur_cu->part_size != SIZE_2Nx2N) {
|
||||
tr_depth = depth + 1;
|
||||
}
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, UVG_BOTH_T);
|
||||
const int x_px = SUB_SCU(cu_loc->x);
|
||||
const int y_px = SUB_SCU(cu_loc->y);
|
||||
const int width = cu_loc->width;
|
||||
const int height = cu_loc->height;
|
||||
|
||||
const int x_px = SUB_SCU(x);
|
||||
const int y_px = SUB_SCU(y);
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
cabac_data_t cabac_copy;
|
||||
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
|
||||
cabac_data_t* cabac = &state->search_cabac;
|
||||
|
@ -2160,31 +2139,43 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
*cur_pu = *cur_cu;
|
||||
|
||||
const bool reconstruct_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
|
||||
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, reconstruct_chroma);
|
||||
uvg_inter_recon_cu(state, lcu, true, reconstruct_chroma, cu_loc);
|
||||
|
||||
int index = y_px * LCU_WIDTH + x_px;
|
||||
double ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
|
||||
LCU_WIDTH, LCU_WIDTH,
|
||||
width) * UVG_LUMA_MULT;
|
||||
width, height) * UVG_LUMA_MULT;
|
||||
if (reconstruct_chroma) {
|
||||
int index = y_px / 2 * LCU_WIDTH_C + x_px / 2;
|
||||
double ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width / 2);
|
||||
cu_loc->chroma_width, cu_loc->chroma_height);
|
||||
double ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width / 2);
|
||||
cu_loc->chroma_width, cu_loc->chroma_height);
|
||||
ssd += (ssd_u + ssd_v) * UVG_CHROMA_MULT;
|
||||
}
|
||||
double no_cbf_bits;
|
||||
double bits = 0;
|
||||
const int skip_context = uvg_get_skip_context(x, y, lcu, NULL, NULL);
|
||||
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
|
||||
const int skip_context = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, NULL);
|
||||
|
||||
int8_t depth = 0;
|
||||
int8_t mtt_depth = 0;
|
||||
uint32_t splits = cur_cu->split_tree;
|
||||
while (splits & 7) {
|
||||
if ((splits & 7) != QT_SPLIT) {
|
||||
mtt_depth++;
|
||||
}
|
||||
depth++;
|
||||
splits >>= 3;
|
||||
}
|
||||
const split_tree_t splitt_tree = { cur_cu->split_tree, depth, mtt_depth, 0};
|
||||
if (cur_cu->merged) {
|
||||
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost;
|
||||
bits += uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu, UVG_BOTH_T);
|
||||
bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree);
|
||||
}
|
||||
else {
|
||||
no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu, UVG_BOTH_T);
|
||||
no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree);
|
||||
bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1);
|
||||
}
|
||||
double no_cbf_cost = ssd + no_cbf_bits * state->lambda;
|
||||
|
@ -2194,20 +2185,20 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
state->encoder_control->cfg.chroma_trskip_enable;
|
||||
|
||||
double chroma_cost = 0;
|
||||
if((state->encoder_control->cfg.jccr || can_use_chroma_tr_skip) && cur_cu->depth == cur_cu->tr_depth && reconstruct_chroma) {
|
||||
if((state->encoder_control->cfg.jccr || can_use_chroma_tr_skip) && PU_IS_TU(cur_cu) && reconstruct_chroma) {
|
||||
uvg_quantize_lcu_residual(state,
|
||||
true,
|
||||
false,
|
||||
false, x, y,
|
||||
depth,
|
||||
false,
|
||||
cu_loc,
|
||||
cur_cu,
|
||||
lcu,
|
||||
false,
|
||||
UVG_BOTH_T);
|
||||
ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, height, LCU_WIDTH_C, width);
|
||||
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, height, LCU_WIDTH_C, width);
|
||||
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
|
||||
|
@ -2216,6 +2207,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
u_pred,
|
||||
u_resi,
|
||||
width,
|
||||
height,
|
||||
LCU_WIDTH_C,
|
||||
width);
|
||||
uvg_generate_residual(
|
||||
|
@ -2223,19 +2215,17 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
v_pred,
|
||||
v_resi,
|
||||
width,
|
||||
height,
|
||||
LCU_WIDTH_C,
|
||||
width);
|
||||
|
||||
uvg_chorma_ts_out_t chorma_ts_out;
|
||||
uvg_chroma_transform_search(
|
||||
state,
|
||||
depth,
|
||||
lcu,
|
||||
&cabac_copy,
|
||||
width,
|
||||
width,
|
||||
cu_loc,
|
||||
index,
|
||||
0,
|
||||
cur_cu,
|
||||
u_pred,
|
||||
v_pred,
|
||||
|
@ -2243,41 +2233,41 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
v_resi,
|
||||
&chorma_ts_out,
|
||||
UVG_BOTH_T);
|
||||
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
|
||||
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
|
||||
cbf_clear(&cur_cu->cbf, COLOR_U);
|
||||
cbf_clear(&cur_cu->cbf, COLOR_V);
|
||||
if (chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost < chorma_ts_out.best_combined_cost) {
|
||||
cur_cu->joint_cb_cr = 0;
|
||||
cur_cu->tr_skip |= (chorma_ts_out.best_u_index == CHROMA_TS) << COLOR_U;
|
||||
cur_cu->tr_skip |= (chorma_ts_out.best_v_index == CHROMA_TS) << COLOR_V;
|
||||
if(chorma_ts_out.best_u_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, depth, COLOR_U);
|
||||
if(chorma_ts_out.best_v_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, depth, COLOR_V);
|
||||
if(chorma_ts_out.best_u_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, COLOR_U);
|
||||
if(chorma_ts_out.best_v_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, COLOR_V);
|
||||
chroma_cost += chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost;
|
||||
}
|
||||
else {
|
||||
cur_cu->joint_cb_cr = chorma_ts_out.best_combined_index;
|
||||
if (chorma_ts_out.best_combined_index & 2) cbf_set(&cur_cu->cbf, depth, COLOR_U);
|
||||
if (chorma_ts_out.best_combined_index & 1) cbf_set(&cur_cu->cbf, depth, COLOR_V);
|
||||
if (chorma_ts_out.best_combined_index & 2) cbf_set(&cur_cu->cbf, COLOR_U);
|
||||
if (chorma_ts_out.best_combined_index & 1) cbf_set(&cur_cu->cbf, COLOR_V);
|
||||
chroma_cost += chorma_ts_out.best_combined_cost;
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvg_quantize_lcu_residual(state,
|
||||
true, reconstruct_chroma,
|
||||
reconstruct_chroma && state->encoder_control->cfg.jccr, x, y,
|
||||
depth,
|
||||
reconstruct_chroma && state->encoder_control->cfg.jccr,
|
||||
cu_loc,
|
||||
cur_cu,
|
||||
lcu,
|
||||
false,
|
||||
UVG_BOTH_T);
|
||||
}
|
||||
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||
int cbf = cbf_is_set_any(cur_cu->cbf);
|
||||
|
||||
if(cbf) {
|
||||
*inter_cost = uvg_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu);
|
||||
*inter_cost = uvg_cu_rd_cost_luma(state, cu_loc, cur_cu, lcu, 0);
|
||||
if (reconstruct_chroma) {
|
||||
if (cur_cu->depth != cur_cu->tr_depth || !state->encoder_control->cfg.jccr) {
|
||||
*inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu);
|
||||
if (!PU_IS_TU(cur_cu) || !state->encoder_control->cfg.jccr) {
|
||||
*inter_cost += uvg_cu_rd_cost_chroma(state, cur_cu, lcu, cu_loc);
|
||||
}
|
||||
else {
|
||||
*inter_cost += chroma_cost;
|
||||
|
@ -2297,7 +2287,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
|
||||
if(no_cbf_cost < *inter_cost) {
|
||||
cur_cu->cbf = 0;
|
||||
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
|
||||
if (cur_cu->merged) {
|
||||
cur_cu->skipped = 1;
|
||||
}
|
||||
*inter_cost = no_cbf_cost;
|
||||
|
@ -2321,8 +2311,9 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
|
|||
* \param inter_cost Return inter cost
|
||||
* \param inter_bitcost Return inter bitcost
|
||||
*/
|
||||
void uvg_search_cu_inter(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
void uvg_search_cu_inter(
|
||||
encoder_state_t * const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
double* inter_bitcost)
|
||||
|
@ -2338,12 +2329,8 @@ void uvg_search_cu_inter(encoder_state_t * const state,
|
|||
inter_search_info_t info;
|
||||
|
||||
search_pu_inter(state,
|
||||
x, y, depth,
|
||||
SIZE_2Nx2N, 0,
|
||||
lcu,
|
||||
amvp,
|
||||
&merge,
|
||||
&info);
|
||||
cu_loc, lcu, amvp,
|
||||
&merge, &info);
|
||||
|
||||
// Early Skip CU decision
|
||||
if (merge.size == 1 && merge.unit[0].skipped) {
|
||||
|
@ -2385,13 +2372,14 @@ void uvg_search_cu_inter(encoder_state_t * const state,
|
|||
return;
|
||||
}
|
||||
|
||||
const int x_local = SUB_SCU(x);
|
||||
const int y_local = SUB_SCU(y);
|
||||
const int x_local = SUB_SCU(cu_loc->x);
|
||||
const int y_local = SUB_SCU(cu_loc->y);
|
||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
||||
*cur_pu = *best_inter_pu;
|
||||
|
||||
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth),
|
||||
true, state->encoder_control->chroma_format != UVG_CSP_400);
|
||||
uvg_inter_recon_cu(state, lcu,
|
||||
true, state->encoder_control->chroma_format != UVG_CSP_400,
|
||||
cu_loc);
|
||||
|
||||
if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) {
|
||||
assert(fracmv_within_tile(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));
|
||||
|
|
|
@ -73,8 +73,9 @@ typedef double uvg_mvd_cost_func(const encoder_state_t *state,
|
|||
int32_t ref_idx,
|
||||
double *bitcost);
|
||||
|
||||
void uvg_search_cu_inter(encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
void uvg_search_cu_inter(
|
||||
encoder_state_t * const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
lcu_t *lcu,
|
||||
double *inter_cost,
|
||||
double* inter_bitcost);
|
||||
|
@ -85,12 +86,13 @@ unsigned uvg_inter_satd_cost(const encoder_state_t* state,
|
|||
const lcu_t *lcu,
|
||||
int x,
|
||||
int y);
|
||||
void uvg_cu_cost_inter_rd2(encoder_state_t* const state,
|
||||
int x, int y, int depth,
|
||||
void uvg_cu_cost_inter_rd2(
|
||||
encoder_state_t* const state,
|
||||
cu_info_t* cur_cu,
|
||||
lcu_t* lcu,
|
||||
double* inter_cost,
|
||||
double* inter_bitcost);
|
||||
double* inter_bitcost,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
int uvg_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -43,27 +43,27 @@
|
|||
#include "global.h" // IWYU pragma: keep
|
||||
#include "intra.h"
|
||||
|
||||
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, int x, int y, int8_t depth, const lcu_t* lcu);
|
||||
double uvg_luma_mode_bits(const encoder_state_t *state, const cu_info_t* const cur_cu, const cu_loc_t*
|
||||
const cu_loc,
|
||||
const lcu_t* lcu);
|
||||
|
||||
double uvg_chroma_mode_bits(const encoder_state_t *state,
|
||||
int8_t chroma_mode, int8_t luma_mode);
|
||||
|
||||
int8_t uvg_search_cu_intra_chroma(
|
||||
encoder_state_t * const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
const cu_loc_t* const cu_loc,
|
||||
lcu_t *lcu,
|
||||
intra_search_data_t* best_cclm,
|
||||
enum uvg_tree_type tree_type);
|
||||
int8_t luma_mode,
|
||||
enum uvg_tree_type tree_type,
|
||||
bool is_separate);
|
||||
|
||||
void uvg_search_cu_intra(
|
||||
encoder_state_t * const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
intra_search_data_t* search_data,
|
||||
lcu_t *lcu,
|
||||
enum uvg_tree_type tree_type);
|
||||
enum uvg_tree_type tree_type,
|
||||
const cu_loc_t* const cu_loc);
|
||||
|
||||
#endif // SEARCH_INTRA_H_
|
||||
|
|
File diff suppressed because it is too large
Load diff
4827
src/strategies/avx2/dct_avx2_tables.h
Normal file
4827
src/strategies/avx2/dct_avx2_tables.h
Normal file
File diff suppressed because it is too large
Load diff
1544
src/strategies/avx2/depquant-avx2.c
Normal file
1544
src/strategies/avx2/depquant-avx2.c
Normal file
File diff suppressed because it is too large
Load diff
46
src/strategies/avx2/depquant-avx2.h
Normal file
46
src/strategies/avx2/depquant-avx2.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
#ifndef STRATEGIES_DEPQUANT_AVX2_H_
|
||||
#define STRATEGIES_DEPQUANT_AVX2_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* \ingroup Optimization
|
||||
* \file
|
||||
* Optimizations for AVX2.
|
||||
*/
|
||||
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
|
||||
|
||||
int uvg_strategy_register_depquant_avx2(void* opaque, uint8_t bitdepth);
|
||||
|
||||
#endif //STRATEGIES_DEPQUANT_AVX2_H_
|
|
@ -38,13 +38,14 @@
|
|||
* Functions for writing the coding quadtree and related syntax.
|
||||
*/
|
||||
|
||||
#include "cu.h"
|
||||
#include "encoderstate.h"
|
||||
#include "global.h"
|
||||
|
||||
void uvg_encode_coeff_nxn_avx2(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
uint8_t width,
|
||||
const cu_loc_t *loc,
|
||||
uint8_t type,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip,
|
||||
|
|
|
@ -42,10 +42,9 @@
|
|||
#include "strategyselector.h"
|
||||
#include "strategies/missing-intel-intrinsics.h"
|
||||
|
||||
|
||||
/**
|
||||
* \brief Generate angular predictions.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param cu_loc CU locationand size data.
|
||||
* \param intra_mode Angular mode in range 2..34.
|
||||
* \param channel_type Color channel.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
|
@ -54,20 +53,28 @@
|
|||
* \param multi_ref_idx Reference line index for use with MRL.
|
||||
*/
|
||||
static void uvg_angular_pred_avx2(
|
||||
const int_fast8_t log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int_fast8_t intra_mode,
|
||||
const int_fast8_t channel_type,
|
||||
const uvg_pixel *const in_ref_above,
|
||||
const uvg_pixel *const in_ref_left,
|
||||
uvg_pixel *const dst,
|
||||
const uint8_t multi_ref_idx)
|
||||
const uint8_t multi_ref_idx,
|
||||
const uint8_t isp_mode,
|
||||
const int cu_dim)
|
||||
{
|
||||
// ISP_TODO: non-square block implementation, height is passed but not used
|
||||
const int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int log2_width = uvg_g_convert_to_log2[width];
|
||||
const int log2_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
assert((log2_width >= 2 && log2_width <= 5) && (log2_height >= 2 && log2_height <= 5));
|
||||
assert(intra_mode >= 2 && intra_mode <= 66);
|
||||
|
||||
// TODO: implement handling of MRL
|
||||
uint8_t multi_ref_index = channel_type == COLOR_Y ? multi_ref_idx : 0;
|
||||
uint8_t isp = isp_mode;
|
||||
|
||||
__m256i p_shuf_01 = _mm256_setr_epi8(
|
||||
0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04,
|
||||
|
@ -142,7 +149,6 @@ static void uvg_angular_pred_avx2(
|
|||
//uvg_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE:IDX] = { 0 };
|
||||
uvg_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
uvg_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
const int_fast32_t width = 1 << log2_width;
|
||||
|
||||
int32_t pred_mode = intra_mode; // ToDo: handle WAIP
|
||||
|
||||
|
@ -345,13 +351,13 @@ static void uvg_angular_pred_avx2(
|
|||
|
||||
|
||||
// PDPC
|
||||
bool PDPC_filter = (width >= 4 || channel_type != 0);
|
||||
bool PDPC_filter = ((width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) || channel_type != 0);
|
||||
if (pred_mode > 1 && pred_mode < 67) {
|
||||
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
|
||||
PDPC_filter = false;
|
||||
}
|
||||
else if (mode_disp > 0) {
|
||||
PDPC_filter = (scale >= 0);
|
||||
PDPC_filter &= (scale >= 0);
|
||||
}
|
||||
}
|
||||
if(PDPC_filter) {
|
||||
|
@ -497,20 +503,27 @@ static void uvg_angular_pred_avx2(
|
|||
|
||||
/**
|
||||
* \brief Generate planar prediction.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param cu_loc CU location and size data.
|
||||
* \param color Color channel.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
*/
|
||||
static void uvg_intra_pred_planar_avx2(
|
||||
const int_fast8_t log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
const uint8_t *const ref_top,
|
||||
const uint8_t *const ref_left,
|
||||
uint8_t *const dst)
|
||||
{
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
// ISP_TODO: non-square block implementation, height is passed but not used
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int log2_width = uvg_g_convert_to_log2[width];
|
||||
const int log2_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
assert((log2_width >= 2 && log2_width <= 5) && (log2_height >= 2 && log2_height <= 5));
|
||||
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
const uint8_t top_right = ref_top[width + 1];
|
||||
const uint8_t bottom_left = ref_left[width + 1];
|
||||
|
||||
|
@ -964,12 +977,17 @@ static void uvg_intra_pred_filtered_dc_avx2(
|
|||
*/
|
||||
static void uvg_pdpc_planar_dc_avx2(
|
||||
const int mode,
|
||||
const int width,
|
||||
const int log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const color_t color,
|
||||
const uvg_intra_ref *const used_ref,
|
||||
uvg_pixel *const dst)
|
||||
{
|
||||
// ISP_TODO: non-square block implementation, height is passed but not used
|
||||
assert(mode == 0 || mode == 1); // planar or DC
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int log2_width = uvg_g_convert_to_log2[width];
|
||||
const int log2_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
__m256i shuf_mask_byte = _mm256_setr_epi8(
|
||||
0, -1, 0, -1, 0, -1, 0, -1,
|
||||
|
|
|
@ -716,8 +716,9 @@ SATD_ANY_SIZE_MULTI_AVX2(quad_avx2, 4)
|
|||
|
||||
static unsigned pixels_calc_ssd_avx2(const uint8_t *const ref, const uint8_t *const rec,
|
||||
const int ref_stride, const int rec_stride,
|
||||
const int width)
|
||||
const int width, const int height)
|
||||
{
|
||||
assert(width == height && "Non square not yet implemented");
|
||||
__m256i ssd_part;
|
||||
__m256i diff = _mm256_setzero_si256();
|
||||
__m128i sum;
|
||||
|
@ -1743,40 +1744,32 @@ static INLINE __m128i get_residual_8x1_avx2(const uint8_t* a_in, const uint8_t*
|
|||
return diff;
|
||||
}
|
||||
|
||||
static void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride) {
|
||||
|
||||
static void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in, int16_t* residual, int width, int height, int ref_stride, int pred_stride) {
|
||||
// ISP_TODO: non-square block implementation, height is passed but not used
|
||||
__m128i diff = _mm_setzero_si128();
|
||||
switch (width) {
|
||||
case 4:
|
||||
diff = get_residual_4x1_avx2(ref_in + 0 * ref_stride, pred_in + 0 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[0]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 1 * ref_stride, pred_in + 1 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 2 * ref_stride, pred_in + 2 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[8]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 3 * ref_stride, pred_in + 3 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[12]), diff);
|
||||
for (int y = 0; y < height; y+=4) {
|
||||
diff = get_residual_4x1_avx2(ref_in + y * ref_stride, pred_in + y * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + (y + 1) * ref_stride, pred_in + (y + 1) * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4 + 4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + (y + 2) * ref_stride, pred_in + (y + 2) * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4 + 8]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + (y + 3) * ref_stride, pred_in + (y + 3) * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4 + 12]), diff);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
diff = get_residual_8x1_avx2(&ref_in[0 * ref_stride], &pred_in[0 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[0]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[1 * ref_stride], &pred_in[1 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[8]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[2 * ref_stride], &pred_in[2 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[16]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[3 * ref_stride], &pred_in[3 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[24]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[4 * ref_stride], &pred_in[4 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[32]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[5 * ref_stride], &pred_in[5 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[40]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[6 * ref_stride], &pred_in[6 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[48]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[7 * ref_stride], &pred_in[7 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[56]), diff);
|
||||
for (int y = 0; y < height; y += 2) {
|
||||
diff = get_residual_8x1_avx2(&ref_in[y * ref_stride], &pred_in[y * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[y * 8]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[(y + 1) * ref_stride], &pred_in[(y + 1) * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[y*8 + 8]), diff);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; x += 16) {
|
||||
diff = get_residual_8x1_avx2(&ref_in[x + y * ref_stride], &pred_in[x + y * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & residual[x + y * width], diff);
|
||||
|
|
|
@ -380,20 +380,24 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
|
|||
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip, uint8_t lfnst_idx)
|
||||
{
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
const uint32_t log2_block_size = uvg_g_convert_to_bit[width] + 2;
|
||||
const uint32_t * const scan = uvg_g_sig_last_scan[scan_idx][log2_block_size - 1];
|
||||
const uint32_t log2_tr_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
|
||||
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_tr_width, log2_tr_height);
|
||||
|
||||
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
|
||||
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
|
||||
uint32_t log2_tr_width = uvg_math_floor_log2(height);
|
||||
uint32_t log2_tr_height = uvg_math_floor_log2(width);
|
||||
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
|
||||
needs_block_size_trafo_scale |= 0; // Non log2 block size
|
||||
|
||||
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color;
|
||||
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
|
||||
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); //!< Represents scaling through forward transform
|
||||
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift);
|
||||
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale);
|
||||
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
|
||||
const int32_t q_bits8 = q_bits - 8;
|
||||
|
||||
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
|
||||
|
||||
uint32_t ac_sum = 0;
|
||||
int32_t last_cg = -1;
|
||||
|
||||
|
@ -402,7 +406,7 @@ void uvg_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
|
|||
// Loading once is enough if scaling lists are not off
|
||||
__m256i low_b = _mm256_setzero_si256(), high_b = _mm256_setzero_si256();
|
||||
if (!(state->encoder_control->scaling_list.enable)) {
|
||||
low_b = _mm256_set1_epi32(quant_coeff[0]);
|
||||
low_b = _mm256_set1_epi32(default_quant_coeff);
|
||||
high_b = low_b;
|
||||
}
|
||||
|
||||
|
@ -579,8 +583,9 @@ static INLINE int64_t get_quantized_recon_8x1_avx2(int16_t *residual, const uint
|
|||
return _mm_cvtsi128_si64(_mm_packus_epi16(rec, rec));
|
||||
}
|
||||
|
||||
static void get_quantized_recon_avx2(int16_t *residual, const uint8_t *pred_in, int in_stride, uint8_t *rec_out, int out_stride, int width){
|
||||
static void get_quantized_recon_avx2(int16_t *residual, const uint8_t *pred_in, int in_stride, uint8_t *rec_out, int out_stride, int width, int height){
|
||||
|
||||
if (height == width || width >= 16) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
*(int32_t*) & (rec_out[0 * out_stride]) = get_quantized_recon_4x1_avx2(residual + 0 * width, pred_in + 0 * in_stride);
|
||||
|
@ -599,7 +604,7 @@ static void get_quantized_recon_avx2(int16_t *residual, const uint8_t *pred_in,
|
|||
*(int64_t*)& (rec_out[7 * out_stride]) = get_quantized_recon_8x1_avx2(residual + 7 * width, pred_in + 7 * in_stride);
|
||||
break;
|
||||
default:
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; x += 16) {
|
||||
*(int64_t*)& (rec_out[x + y * out_stride]) = get_quantized_recon_8x1_avx2(residual + x + y * width, pred_in + x + y * in_stride);
|
||||
*(int64_t*)& (rec_out[(x + 8) + y * out_stride]) = get_quantized_recon_8x1_avx2(residual + (x + 8) + y * width, pred_in + (x + 8) + y * in_stride);
|
||||
|
@ -608,6 +613,32 @@ static void get_quantized_recon_avx2(int16_t *residual, const uint8_t *pred_in,
|
|||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (width) {
|
||||
case 4:
|
||||
for (int y = 0; y < height; y += 4) {
|
||||
*(int32_t*)& (rec_out[(y + 0) * out_stride]) = get_quantized_recon_4x1_avx2(residual + (y + 0) * width, pred_in + (y + 0) * in_stride);
|
||||
*(int32_t*)& (rec_out[(y + 1) * out_stride]) = get_quantized_recon_4x1_avx2(residual + (y + 1) * width, pred_in + (y + 1) * in_stride);
|
||||
*(int32_t*)& (rec_out[(y + 2) * out_stride]) = get_quantized_recon_4x1_avx2(residual + (y + 2) * width, pred_in + (y + 2) * in_stride);
|
||||
*(int32_t*)& (rec_out[(y + 3) * out_stride]) = get_quantized_recon_4x1_avx2(residual + (y + 3) * width, pred_in + (y + 3) * in_stride);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
for (int y = 0; y < height; ++y) {
|
||||
*(int32_t*)& (rec_out[y * out_stride]) = get_quantized_recon_8x1_avx2(residual + y * width, pred_in + y * in_stride);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
int16_t val = residual[x + y * width] + pred_in[x + y * in_stride];
|
||||
rec_out[x + y * out_stride] = (uvg_pixel)CLIP(0, PIXEL_MAX, val);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Quantize residual and get both the reconstruction and coeffs.
|
||||
|
@ -626,7 +657,7 @@ static void get_quantized_recon_avx2(int16_t *residual, const uint8_t *pred_in,
|
|||
* \returns Whether coeff_out contains any non-zero coefficients.
|
||||
*/
|
||||
int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
||||
const cu_info_t *const cur_cu, const int width, const color_t color,
|
||||
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
|
||||
const coeff_scan_order_t scan_order, const int use_trskip,
|
||||
const int in_stride, const int out_stride,
|
||||
const uint8_t *const ref_in, const uint8_t *const pred_in,
|
||||
|
@ -637,15 +668,15 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
// Temporary arrays to pass data to and from uvg_quant and transform functions.
|
||||
ALIGNED(64) int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
// ISP_TODO: non-square block implementation, height is passed but not used
|
||||
|
||||
const int height = width; // TODO: height for non-square blocks
|
||||
int has_coeffs = 0;
|
||||
|
||||
assert(width <= TR_MAX_WIDTH);
|
||||
assert(width >= TR_MIN_WIDTH);
|
||||
|
||||
// Get residual. (ref_in - pred_in -> residual)
|
||||
uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
|
||||
uvg_generate_residual(ref_in, pred_in, residual, width, height, in_stride, in_stride);
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
int y, x;
|
||||
|
@ -662,40 +693,51 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
|
||||
// Transform residual. (residual -> coeff)
|
||||
if (use_trskip) {
|
||||
uvg_transformskip(state->encoder_control, residual, coeff, width);
|
||||
uvg_transformskip(state->encoder_control, residual, coeff, width, height);
|
||||
}
|
||||
else {
|
||||
uvg_transform2d(state->encoder_control, residual, coeff, width, color, cur_cu);
|
||||
uvg_transform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
|
||||
}
|
||||
|
||||
const uint16_t lfnst_index = color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;
|
||||
|
||||
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {
|
||||
// Forward low frequency non-separable transform
|
||||
uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type);
|
||||
uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
|
||||
// Quantize coeffs. (coeff -> coeff_out)
|
||||
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
int abs_sum = 0;
|
||||
if(!use_trskip && state->encoder_control->cfg.dep_quant) {
|
||||
uvg_dep_quant(
|
||||
state,
|
||||
cur_cu,
|
||||
width,
|
||||
height,
|
||||
coeff,
|
||||
coeff_out,
|
||||
color,
|
||||
tree_type,
|
||||
&abs_sum,
|
||||
state->encoder_control->cfg.scaling_list);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
|
||||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, color,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf, lfnst_index);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order, cur_cu->type, cur_cu->cbf, lfnst_index, color == 0 ? cur_cu->tr_idx : 0);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && use_trskip) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, color,
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, coeff, coeff_out, width, width, color,
|
||||
uvg_quant(state, coeff, coeff_out, width, height, color,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y, lfnst_index);
|
||||
}
|
||||
|
||||
// Check if there are any non-zero coefficients.
|
||||
for (int i = 0; i < width * width; i += 8) {
|
||||
for (int i = 0; i < width * height; i += 8) {
|
||||
__m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i]));
|
||||
has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff);
|
||||
if(has_coeffs) break;
|
||||
|
@ -705,25 +747,25 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
// rec_out.
|
||||
if (has_coeffs && !early_skip) {
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, color,
|
||||
uvg_dequant(state, coeff_out, coeff, width, height, color,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
|
||||
|
||||
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {
|
||||
// Inverse low frequency non-separable transform
|
||||
uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type);
|
||||
uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
if (use_trskip) {
|
||||
uvg_itransformskip(state->encoder_control, residual, coeff, width);
|
||||
uvg_itransformskip(state->encoder_control, residual, coeff, width, height);
|
||||
}
|
||||
else {
|
||||
uvg_itransform2d(state->encoder_control, residual, coeff, width, color, cur_cu);
|
||||
uvg_itransform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
|
||||
}
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
int y, x;
|
||||
int sign, absval;
|
||||
int maxAbsclipBD = (1 << UVG_BIT_DEPTH) - 1;
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
residual[x + y * width] = (int16_t)CLIP((int16_t)(-maxAbsclipBD - 1), (int16_t)maxAbsclipBD, residual[x + y * width]);
|
||||
sign = residual[x + y * width] >= 0 ? 1 : -1;
|
||||
|
@ -739,14 +781,14 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
}
|
||||
|
||||
// Get quantized reconstruction. (residual + pred_in -> rec_out)
|
||||
get_quantized_recon_avx2(residual, pred_in, in_stride, rec_out, out_stride, width);
|
||||
get_quantized_recon_avx2(residual, pred_in, in_stride, rec_out, out_stride, width, height);
|
||||
}
|
||||
else if (rec_out != pred_in) {
|
||||
// With no coeffs and rec_out == pred_int we skip copying the coefficients
|
||||
// because the reconstruction is just the prediction.
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
rec_out[x + y * out_stride] = pred_in[x + y * in_stride];
|
||||
}
|
||||
|
@ -763,20 +805,26 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,color_t color, int8_t block_type, int8_t transform_skip)
|
||||
{
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
if (encoder->cfg.dep_quant && !transform_skip) {
|
||||
uvg_dep_quant_dequant(state, block_type, width, height, color, q_coef, coef, encoder->cfg.scaling_list);
|
||||
return;
|
||||
}
|
||||
int32_t shift,add,coeff_q;
|
||||
int32_t n;
|
||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((uvg_math_floor_log2(width) + uvg_math_floor_log2(height)) >> 1); // Represents scaling through forward transform
|
||||
const uint32_t log2_tr_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
|
||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1);
|
||||
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
|
||||
needs_block_size_trafo_scale |= 0; // Non log2 block size// Represents scaling through forward transform
|
||||
|
||||
|
||||
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
|
||||
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
|
||||
|
||||
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift);
|
||||
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale);
|
||||
|
||||
if (encoder->scaling_list.enable)
|
||||
{
|
||||
uint32_t log2_tr_width = uvg_math_floor_log2(height) + 2;
|
||||
uint32_t log2_tr_height = uvg_math_floor_log2(width) + 2;
|
||||
int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)(color);
|
||||
|
||||
const int32_t* dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width - 2][log2_tr_height - 2][scalinglist_type][qp_scaled % 6];
|
||||
|
@ -797,7 +845,7 @@ void uvg_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
|
|||
}
|
||||
}
|
||||
} else {
|
||||
int32_t scale = uvg_g_inv_quant_scales[qp_scaled%6] << (qp_scaled/6);
|
||||
int32_t scale = uvg_g_inv_quant_scales[needs_block_size_trafo_scale][qp_scaled%6] << (qp_scaled/6);
|
||||
add = 1 << (shift-1);
|
||||
|
||||
__m256i v_scale = _mm256_set1_epi32(scale);
|
||||
|
@ -845,8 +893,9 @@ static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length)
|
|||
return parts[0] + parts[1] + parts[2] + parts[3];
|
||||
}
|
||||
|
||||
static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64_t weights)
|
||||
static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights)
|
||||
{
|
||||
assert((width == height) && "Non-square block handling not implemented for this function.");
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
const __m256i threes = _mm256_set1_epi16(3);
|
||||
const __m256i negate_hibytes = _mm256_set1_epi16(0xff00);
|
||||
|
@ -863,7 +912,7 @@ static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64
|
|||
__m256i wts_lo = _mm256_broadcastsi128_si256(wts_lo_128);
|
||||
__m256i wts_hi = _mm256_broadcastsi128_si256(wts_hi_128);
|
||||
|
||||
for (int i = 0; i < width * width; i += 32) {
|
||||
for (int i = 0; i < width * height; i += 32) {
|
||||
__m256i curr_lo = _mm256_loadu_si256 ((const __m256i *)(coeff + i));
|
||||
__m256i curr_abs_lo = _mm256_abs_epi16 (curr_lo);
|
||||
__m256i curr_max3_lo = _mm256_min_epu16 (curr_abs_lo, threes);
|
||||
|
|
|
@ -771,6 +771,12 @@ static void fast_inverse_dst_4x4_generic(int8_t bitdepth, const int16_t* input,
|
|||
|
||||
|
||||
// DCT-2
|
||||
#define DEFINE_DCT2_P2_MATRIX(a) \
|
||||
{ \
|
||||
a, a, \
|
||||
a, -a \
|
||||
}
|
||||
|
||||
#define DEFINE_DCT2_P4_MATRIX(a,b,c) \
|
||||
{ \
|
||||
a, a, a, a, \
|
||||
|
@ -1002,6 +1008,7 @@ static void fast_inverse_dst_4x4_generic(int8_t bitdepth, const int16_t* input,
|
|||
}
|
||||
|
||||
// DCT-2
|
||||
const int16_t uvg_g_DCT2P2[4] = DEFINE_DCT2_P2_MATRIX(64);
|
||||
const int16_t uvg_g_DCT2P4[16] = DEFINE_DCT2_P4_MATRIX(64, 83, 36);
|
||||
const int16_t uvg_g_DCT2P8[64] = DEFINE_DCT2_P8_MATRIX(64, 83, 36, 89, 75, 50, 18);
|
||||
const int16_t uvg_g_DCT2P16[256] = DEFINE_DCT2_P16_MATRIX(64, 83, 36, 89, 75, 50, 18, 90, 87, 80, 70, 57, 43, 25, 9);
|
||||
|
@ -1020,6 +1027,68 @@ const int16_t uvg_g_DCT8P16[256] = DEFINE_DCT8_P16_MATRIX(88, 88, 87, 85, 81, 77
|
|||
const int16_t uvg_g_DCT8P32[1024] = DEFINE_DCT8_P32_MATRIX(90, 90, 89, 88, 87, 86, 85, 84, 82, 80, 78, 77, 74, 72, 68, 66, 63, 60, 56, 53, 50, 46, 42, 38, 34, 30, 26, 21, 17, 13, 9, 4);
|
||||
|
||||
// ********************************** DCT-2 **********************************
|
||||
static void fastForwardDCT2_B2(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2)
|
||||
{
|
||||
int32_t j;
|
||||
int32_t E, O;
|
||||
int32_t add = (shift > 0) ? (1 << (shift - 1)) : 0;
|
||||
|
||||
const int16_t* iT = uvg_g_DCT2P2;
|
||||
|
||||
int16_t *p_coef = dst;
|
||||
const int reduced_line = line - skip_line;
|
||||
for (j = 0; j < reduced_line; j++)
|
||||
{
|
||||
/* E and O */
|
||||
E = src[0] + src[1];
|
||||
O = src[0] - src[1];
|
||||
|
||||
dst[0] = (iT[0] * E + add) >> shift;
|
||||
dst[line] = (iT[2] * O + add) >> shift;
|
||||
|
||||
|
||||
src += 2;
|
||||
dst++;
|
||||
}
|
||||
if (skip_line)
|
||||
{
|
||||
dst = p_coef + reduced_line;
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
memset(dst, 0, sizeof(int16_t) * skip_line);
|
||||
dst += line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void fastInverseDCT2_B2(const int16_t* src, int16_t* dst, int shift, int line, int skip_line, int skip_line2)
|
||||
{
|
||||
int32_t j;
|
||||
int32_t E, O;
|
||||
int32_t add = 1 << (shift - 1);
|
||||
|
||||
const int16_t* iT = uvg_g_DCT2P2;
|
||||
|
||||
const int reduced_line = line - skip_line;
|
||||
for (j = 0; j < reduced_line; j++)
|
||||
{
|
||||
/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
|
||||
E = iT[0] * (src[0] + src[line]);
|
||||
O = iT[2] * (src[0] - src[line]);
|
||||
|
||||
/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
|
||||
dst[0] = (short)CLIP(-32768, 32767, (E + add) >> shift);
|
||||
dst[1] = (short)CLIP(-32768, 32767, (O + add) >> shift);
|
||||
|
||||
src++;
|
||||
dst += 2;
|
||||
}
|
||||
if (skip_line)
|
||||
{
|
||||
memset(dst, 0, (skip_line << 1) * sizeof(int16_t));
|
||||
}
|
||||
}
|
||||
|
||||
static void fastForwardDCT2_B4(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2)
|
||||
{
|
||||
int32_t j;
|
||||
|
@ -1366,11 +1435,6 @@ static void fastForwardDCT2_B32(const int16_t* src, int16_t* dst, int32_t shift,
|
|||
dst += line;
|
||||
}
|
||||
}
|
||||
if (skip_line2) {
|
||||
const int reduced_line = line - skip_line2;
|
||||
dst = p_coef + reduced_line * 32;
|
||||
memset(dst, 0, skip_line2 * 32 * sizeof(coeff_t));
|
||||
}
|
||||
}
|
||||
|
||||
static void fastInverseDCT2_B32(const int16_t* src, int16_t* dst, int32_t shift, int line, int skip_line, int skip_line2)
|
||||
|
@ -2417,16 +2481,16 @@ DCT_MTS_NXN_GENERIC(DST1, 32);
|
|||
typedef void partial_tr_func(const int16_t*, int16_t*, int32_t, int, int, int);
|
||||
|
||||
// ToDo: Enable MTS 2x2 and 64x64 transforms
|
||||
static partial_tr_func* dct_table[3][5] = {
|
||||
{ fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, NULL },
|
||||
{ fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, NULL },
|
||||
{ fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, NULL },
|
||||
static partial_tr_func* dct_table[3][6] = {
|
||||
{ fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, NULL },
|
||||
{ NULL, fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, NULL },
|
||||
{ NULL, fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, NULL },
|
||||
};
|
||||
|
||||
static partial_tr_func* idct_table[3][5] = {
|
||||
{ fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, NULL/*fastInverseDCT2_B64*/ },
|
||||
{ fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, NULL },
|
||||
{ fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, NULL },
|
||||
static partial_tr_func* idct_table[3][6] = {
|
||||
{ fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, NULL/*fastInverseDCT2_B64*/ },
|
||||
{ NULL, fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, NULL },
|
||||
{ NULL, fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, NULL },
|
||||
};
|
||||
|
||||
|
||||
|
@ -2436,11 +2500,12 @@ static const tr_type_t mts_subset_intra[4][2] = { { DST7, DST7 }, { DCT8, DST7 }
|
|||
|
||||
void uvg_get_tr_type(
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
color_t color,
|
||||
const cu_info_t* tu,
|
||||
tr_type_t* hor_out,
|
||||
tr_type_t* ver_out,
|
||||
const int8_t mts_idx)
|
||||
const int8_t mts_type)
|
||||
{
|
||||
*hor_out = DCT2;
|
||||
*ver_out = DCT2;
|
||||
|
@ -2450,13 +2515,19 @@ void uvg_get_tr_type(
|
|||
return;
|
||||
}
|
||||
|
||||
const int height = width;
|
||||
const bool explicit_mts = mts_idx == UVG_MTS_BOTH || (tu->type == CU_INTRA ? mts_idx == UVG_MTS_INTRA : (mts_idx == UVG_MTS_INTER && tu->type == CU_INTER));
|
||||
const bool implicit_mts = tu->type == CU_INTRA && (mts_idx == UVG_MTS_IMPLICIT || mts_idx == UVG_MTS_INTER);
|
||||
const bool explicit_mts = mts_type == UVG_MTS_BOTH || (tu->type == CU_INTRA ? mts_type == UVG_MTS_INTRA : (mts_type == UVG_MTS_INTER && tu->type == CU_INTER));
|
||||
const bool implicit_mts = tu->type == CU_INTRA && (mts_type == UVG_MTS_IMPLICIT || mts_type == UVG_MTS_INTER);
|
||||
|
||||
assert(!(explicit_mts && implicit_mts));
|
||||
const bool is_isp = tu->type == CU_INTRA && tu->intra.isp_mode && color == COLOR_Y ? tu->intra.isp_mode : 0;
|
||||
const int8_t lfnst_idx = color == COLOR_Y ? tu->lfnst_idx : tu->cr_lfnst_idx;
|
||||
// const bool is_sbt = cu->type == CU_INTER && tu->sbt && color == COLOR_Y; // TODO: check SBT here when implemented
|
||||
|
||||
if (implicit_mts)
|
||||
if (is_isp && lfnst_idx) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (implicit_mts || (is_isp && explicit_mts))
|
||||
{
|
||||
bool width_ok = width >= 4 && width <= 16;
|
||||
bool height_ok = height >= 4 && height <= 16;
|
||||
|
@ -2472,6 +2543,10 @@ void uvg_get_tr_type(
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
TODO: SBT HANDLING
|
||||
*/
|
||||
|
||||
if (explicit_mts)
|
||||
{
|
||||
if (tu->tr_idx > MTS_SKIP) {
|
||||
|
@ -2487,27 +2562,31 @@ static void mts_dct_generic(
|
|||
const color_t color,
|
||||
const cu_info_t* tu,
|
||||
const int8_t width,
|
||||
const int8_t height,
|
||||
const int16_t* input,
|
||||
int16_t* output,
|
||||
const int8_t mts_idx)
|
||||
const int8_t mts_type)
|
||||
{
|
||||
tr_type_t type_hor;
|
||||
tr_type_t type_ver;
|
||||
|
||||
uvg_get_tr_type(width, color, tu, &type_hor, &type_ver, mts_idx);
|
||||
uvg_get_tr_type(width, height, color, tu, &type_hor, &type_ver, mts_type);
|
||||
|
||||
if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx && !tu->cr_lfnst_idx)
|
||||
if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx && !tu->cr_lfnst_idx && width == height)
|
||||
{
|
||||
dct_func *dct_func = uvg_get_dct_func(width, color, tu->type);
|
||||
dct_func *dct_func = uvg_get_dct_func(width, height, color, tu->type);
|
||||
dct_func(bitdepth, input, output);
|
||||
}
|
||||
else
|
||||
{
|
||||
const int height = width;
|
||||
int skip_width = (type_hor != DCT2 && width == 32) ? 16 : (width > 32 ? width - 32 : 0);
|
||||
int skip_height = (type_ver != DCT2 && height == 32) ? 16 : (height > 32 ? height - 32 : 0);
|
||||
const int log2_width_minus2 = uvg_g_convert_to_bit[width];
|
||||
if(tu->lfnst_idx || tu->cr_lfnst_idx) {
|
||||
const int log2_width_minus1 = uvg_g_convert_to_log2[width] - 1;
|
||||
const int log2_height_minus1 = uvg_g_convert_to_log2[height] - 1;
|
||||
//const int log2_width_minus2 = uvg_g_convert_to_bit[width];
|
||||
//const int log2_height_minus2 = uvg_g_convert_to_bit[height];
|
||||
|
||||
if((tu->lfnst_idx && color == COLOR_Y) || (tu->cr_lfnst_idx && color != COLOR_Y)) {
|
||||
if ((width == 4 && height > 4) || (width > 4 && height == 4))
|
||||
{
|
||||
skip_width = width - 4;
|
||||
|
@ -2520,17 +2599,22 @@ static void mts_dct_generic(
|
|||
}
|
||||
}
|
||||
|
||||
partial_tr_func* dct_hor = dct_table[type_hor][log2_width_minus2];
|
||||
partial_tr_func* dct_ver = dct_table[type_ver][log2_width_minus2];
|
||||
partial_tr_func* dct_hor = width != 1 ? dct_table[type_hor][log2_width_minus1] : NULL;
|
||||
partial_tr_func* dct_ver = height != 1 ? dct_table[type_ver][log2_height_minus1] : NULL;
|
||||
|
||||
int16_t tmp[32 * 32];
|
||||
const int32_t shift_1st = log2_width_minus2 + bitdepth - 7;
|
||||
const int32_t shift_2nd = log2_width_minus2 + 8;
|
||||
|
||||
const int32_t shift_1st = log2_width_minus1 + bitdepth - 8;
|
||||
const int32_t shift_2nd = log2_height_minus1 + 7;
|
||||
if (height == 1) {
|
||||
dct_hor(input, output, shift_1st, height, 0, skip_width);
|
||||
} else if (width == 1) {
|
||||
dct_ver(input, output, log2_height_minus1 + 1 + bitdepth + 6 - 15, width, 0, skip_height);
|
||||
} else {
|
||||
dct_hor(input, tmp, shift_1st, height, 0, skip_width);
|
||||
dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void mts_idct_generic(
|
||||
|
@ -2538,38 +2622,59 @@ static void mts_idct_generic(
|
|||
const color_t color,
|
||||
const cu_info_t* tu,
|
||||
const int8_t width,
|
||||
const int8_t height,
|
||||
const int16_t* input,
|
||||
int16_t* output,
|
||||
const int8_t mts_idx)
|
||||
const int8_t mts_type)
|
||||
{
|
||||
tr_type_t type_hor;
|
||||
tr_type_t type_ver;
|
||||
|
||||
uvg_get_tr_type(width, color, tu, &type_hor, &type_ver, mts_idx);
|
||||
uvg_get_tr_type(width, height, color, tu, &type_hor, &type_ver, mts_type);
|
||||
|
||||
if (type_hor == DCT2 && type_ver == DCT2)
|
||||
if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx && !tu->cr_lfnst_idx && width == height)
|
||||
{
|
||||
dct_func *idct_func = uvg_get_idct_func(width, color, tu->type);
|
||||
dct_func *idct_func = uvg_get_idct_func(width, height, color, tu->type);
|
||||
idct_func(bitdepth, input, output);
|
||||
}
|
||||
else
|
||||
{
|
||||
const int height = width;
|
||||
const int skip_width = (type_hor != DCT2 && width == 32) ? 16 : width > 32 ? width - 32 : 0;
|
||||
const int skip_height = (type_ver != DCT2 && height == 32) ? 16 : height > 32 ? height - 32 : 0;
|
||||
const int log2_width_minus2 = uvg_g_convert_to_bit[width];
|
||||
int skip_width = (type_hor != DCT2 && width == 32) ? 16 : width > 32 ? width - 32 : 0;
|
||||
int skip_height = (type_ver != DCT2 && height == 32) ? 16 : height > 32 ? height - 32 : 0;
|
||||
const int log2_width_minus1 = uvg_g_convert_to_log2[width] - 1;
|
||||
const int log2_height_minus1 = uvg_g_convert_to_log2[height] - 1;
|
||||
|
||||
partial_tr_func* idct_hor = idct_table[type_hor][log2_width_minus2];
|
||||
partial_tr_func* idct_ver = idct_table[type_ver][log2_width_minus2];
|
||||
if ((tu->lfnst_idx && color == COLOR_Y) || (tu->cr_lfnst_idx && color != COLOR_Y)) {
|
||||
if ((width == 4 && height > 4) || (width > 4 && height == 4)) {
|
||||
skip_width = width - 4;
|
||||
skip_height = height - 4;
|
||||
}
|
||||
else if ((width >= 8 && height >= 8)) {
|
||||
skip_width = width - 8;
|
||||
skip_height = height - 8;
|
||||
}
|
||||
}
|
||||
|
||||
partial_tr_func* idct_hor = width != 1 ? idct_table[type_hor][log2_width_minus1] : NULL;
|
||||
partial_tr_func* idct_ver = height != 1 ? idct_table[type_ver][log2_height_minus1] : NULL;
|
||||
|
||||
int16_t tmp[32 * 32];
|
||||
const int32_t shift_1st = 7;
|
||||
const int32_t shift_2nd = 20 - bitdepth;
|
||||
const int max_log2_tr_dynamic_range = 15;
|
||||
const int transform_matrix_shift = 6;
|
||||
|
||||
const int32_t shift_1st = transform_matrix_shift + 1;
|
||||
const int32_t shift_2nd = (transform_matrix_shift + max_log2_tr_dynamic_range - 1) - bitdepth;
|
||||
|
||||
if (height == 1) {
|
||||
idct_hor(input, output, shift_2nd + 1, height, 0, skip_width);
|
||||
} else if (width == 1) {
|
||||
idct_ver(input, output, shift_2nd + 1, width, 0, skip_height);
|
||||
} else {
|
||||
idct_ver(input, tmp, shift_1st, width, skip_width, skip_height);
|
||||
idct_hor(tmp, output, shift_2nd, height, 0, skip_width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int uvg_strategy_register_dct_generic(void* opaque, uint8_t bitdepth)
|
||||
|
@ -2582,6 +2687,7 @@ int uvg_strategy_register_dct_generic(void* opaque, uint8_t bitdepth)
|
|||
success &= uvg_strategyselector_register(opaque, "dct_8x8", "generic", 0, &dct_8x8_generic);
|
||||
success &= uvg_strategyselector_register(opaque, "dct_16x16", "generic", 0, &dct_16x16_generic);
|
||||
success &= uvg_strategyselector_register(opaque, "dct_32x32", "generic", 0, &dct_32x32_generic);
|
||||
//success &= uvg_strategyselector_register(opaque, "dct_non_square", "generic", 0, &dct_non_square_generic);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "fast_inverse_dst_4x4", "generic", 0, &fast_inverse_dst_4x4_generic);
|
||||
|
||||
|
|
252
src/strategies/generic/depquant-generic.c
Normal file
252
src/strategies/generic/depquant-generic.c
Normal file
|
@ -0,0 +1,252 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
#include "strategies/generic/depquant-generic.h"
|
||||
|
||||
#include "dep_quant.h"
|
||||
|
||||
#include "cu.h"
|
||||
#include "encoderstate.h"
|
||||
#include "intra.h"
|
||||
#include "rdo.h"
|
||||
#include "strategyselector.h"
|
||||
#include "transform.h"
|
||||
#include "uvg_math.h"
|
||||
#include "generic/quant-generic.h"
|
||||
static const int32_t g_goRiceBits[4][RICEMAX] = {
|
||||
{32768, 65536, 98304, 131072, 163840, 196608, 262144, 262144,
|
||||
327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216,
|
||||
393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752,
|
||||
458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
|
||||
{65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840,
|
||||
196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912,
|
||||
360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448,
|
||||
425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
|
||||
{98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072,
|
||||
163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608,
|
||||
229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144,
|
||||
327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
|
||||
{131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072,
|
||||
163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840,
|
||||
196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608,
|
||||
229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376},
|
||||
};
|
||||
|
||||
|
||||
static INLINE void checkRdCostSkipSbbZeroOut(
|
||||
Decision* decision,
|
||||
const all_depquant_states* const state,
|
||||
int decision_id,
|
||||
int skip_offset) {
|
||||
int64_t rdCost = state->m_rdCost[decision_id + skip_offset] + state->m_sbbFracBits[decision_id + skip_offset][0];
|
||||
decision->rdCost[decision_id] = rdCost;
|
||||
decision->absLevel[decision_id] = 0;
|
||||
decision->prevId[decision_id] = 4 + state->m_stateId[decision_id + skip_offset];
|
||||
}
|
||||
|
||||
static INLINE void checkRdCostSkipSbb(const all_depquant_states* const state, Decision * decisions, int decision_id, int skip_offset)
|
||||
{
|
||||
int64_t rdCost = state->m_rdCost[skip_offset + decision_id] + state->m_sbbFracBits[skip_offset + decision_id][0];
|
||||
if (rdCost < decisions->rdCost[decision_id])
|
||||
{
|
||||
decisions->rdCost[decision_id] = rdCost;
|
||||
decisions->absLevel[decision_id] = 0;
|
||||
decisions->prevId[decision_id] = 4 + state->m_stateId[skip_offset + decision_id];
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void checkRdCostStart(const depquant_state* const state, int32_t lastOffset, const PQData *pqData, Decision *decisions, int
|
||||
decision_id)
|
||||
{
|
||||
int64_t rdCost = pqData->deltaDist[decision_id] + lastOffset;
|
||||
if (pqData->absLevel[decision_id] < 4) {
|
||||
rdCost += state->m_coeffFracBits[pqData->absLevel[decision_id]];
|
||||
}
|
||||
else {
|
||||
const coeff_t value = (pqData->absLevel[decision_id] - 4) >> 1;
|
||||
rdCost += state->m_coeffFracBits[pqData->absLevel[decision_id] - (value << 1)]
|
||||
+ g_goRiceBits[state->m_goRicePar][value < RICEMAX ? value : RICEMAX - 1];
|
||||
}
|
||||
if (rdCost < decisions->rdCost[decision_id]) {
|
||||
decisions->rdCost[decision_id] = rdCost;
|
||||
decisions->absLevel[decision_id] = pqData->absLevel[decision_id];
|
||||
decisions->prevId[decision_id] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const Decision startDec = { .rdCost = {INT64_MAX >> 2, INT64_MAX >> 2, INT64_MAX >> 2, INT64_MAX >> 2, INT64_MAX >> 2, INT64_MAX >> 2, INT64_MAX >> 2, INT64_MAX >> 2},
|
||||
.absLevel = {-1, -1, -1, -1, 0, 0, 0, 0}, .prevId = {-2, -2, -2, -2, 4, 5, 6, 7} };
|
||||
|
||||
static INLINE void preQuantCoeff(const quant_block * const qp, const coeff_t absCoeff, PQData* pqData, coeff_t quanCoeff)
|
||||
{
|
||||
int64_t scaledOrg = (int64_t)(absCoeff) * quanCoeff;
|
||||
coeff_t qIdx = MAX(1, (coeff_t)MIN(qp->m_maxQIdx, ((scaledOrg + qp->m_QAdd) >> qp->m_QShift)));
|
||||
int64_t scaledAdd = qIdx * qp->m_DistStepAdd - scaledOrg * qp->m_DistOrgFact;
|
||||
int index = qIdx & 3;
|
||||
pqData->deltaDist[index] = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
|
||||
pqData->absLevel[index] = (++qIdx) >> 1;
|
||||
scaledAdd += qp->m_DistStepAdd;
|
||||
index = qIdx & 3;
|
||||
pqData->deltaDist[index] = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
|
||||
pqData->absLevel[index] = (++qIdx) >> 1;
|
||||
scaledAdd += qp->m_DistStepAdd;
|
||||
index = qIdx & 3;
|
||||
pqData->deltaDist[index] = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
|
||||
pqData->absLevel[index] = (++qIdx) >> 1;
|
||||
scaledAdd += qp->m_DistStepAdd;
|
||||
index = qIdx & 3;
|
||||
pqData->deltaDist[index] = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
|
||||
pqData->absLevel[index] = (++qIdx) >> 1;
|
||||
}
|
||||
|
||||
static void xDecide(
|
||||
all_depquant_states* const all_states,
|
||||
depquant_state* const m_startState,
|
||||
quant_block* qp,
|
||||
const enum ScanPosType spt,
|
||||
const coeff_t absCoeff,
|
||||
const int lastOffset,
|
||||
Decision* decisions,
|
||||
bool zeroOut,
|
||||
coeff_t quanCoeff,
|
||||
const int skip_offset,
|
||||
const int prev_offset)
|
||||
{
|
||||
memcpy(decisions, &startDec, sizeof(Decision));
|
||||
|
||||
if (zeroOut) {
|
||||
if (spt == SCAN_EOCSBB) {
|
||||
checkRdCostSkipSbbZeroOut(decisions, all_states, 0, skip_offset);
|
||||
checkRdCostSkipSbbZeroOut(decisions, all_states, 1, skip_offset);
|
||||
checkRdCostSkipSbbZeroOut(decisions, all_states, 2, skip_offset);
|
||||
checkRdCostSkipSbbZeroOut(decisions, all_states, 3, skip_offset);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
PQData pqData;
|
||||
preQuantCoeff(qp, absCoeff, &pqData, quanCoeff);
|
||||
uvg_dep_quant_check_rd_costs(all_states, spt, &pqData, decisions, 0, 2, prev_offset + 0);
|
||||
uvg_dep_quant_check_rd_costs(all_states, spt, &pqData, decisions, 2, 0, prev_offset + 1);
|
||||
uvg_dep_quant_check_rd_costs(all_states, spt, &pqData, decisions, 1, 3, prev_offset + 2);
|
||||
uvg_dep_quant_check_rd_costs(all_states, spt, &pqData, decisions, 3, 1, prev_offset + 3);
|
||||
if (spt == SCAN_EOCSBB) {
|
||||
checkRdCostSkipSbb(all_states, decisions, 0, skip_offset);
|
||||
checkRdCostSkipSbb(all_states, decisions, 1, skip_offset);
|
||||
checkRdCostSkipSbb(all_states, decisions, 2, skip_offset);
|
||||
checkRdCostSkipSbb(all_states, decisions, 3, skip_offset);
|
||||
}
|
||||
|
||||
checkRdCostStart(m_startState, lastOffset, &pqData, decisions, 0);
|
||||
checkRdCostStart(m_startState, lastOffset, &pqData, decisions, 2);
|
||||
}
|
||||
|
||||
|
||||
static void uvg_dep_quant_decide_and_update_generic(
|
||||
rate_estimator_t* re,
|
||||
context_store* ctxs,
|
||||
struct dep_quant_scan_info const* const scan_info,
|
||||
const coeff_t absCoeff,
|
||||
const uint32_t scan_pos,
|
||||
const uint32_t width_in_sbb,
|
||||
const uint32_t height_in_sbb,
|
||||
const NbInfoSbb next_nb_info_ssb,
|
||||
bool zeroOut,
|
||||
coeff_t quantCoeff,
|
||||
const uint32_t effWidth,
|
||||
const uint32_t effHeight,
|
||||
bool is_chroma)
|
||||
{
|
||||
Decision* decisions = &ctxs->m_trellis[scan_pos];
|
||||
SWAP(ctxs->m_curr_state_offset, ctxs->m_prev_state_offset, int);
|
||||
|
||||
enum ScanPosType spt = 0;
|
||||
if ((scan_pos & 15) == 15 && scan_pos > 16 && scan_pos < effHeight * effWidth - 1)
|
||||
{
|
||||
spt = SCAN_SOCSBB;
|
||||
}
|
||||
else if ((scan_pos & 15) == 0 && scan_pos > 0 && scan_pos < effHeight * effWidth - 16)
|
||||
{
|
||||
spt = SCAN_EOCSBB;
|
||||
}
|
||||
|
||||
xDecide(&ctxs->m_allStates, &ctxs->m_startState, ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[scan_info->pos_x] + re->m_lastBitsY[scan_info->pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset);
|
||||
|
||||
if (scan_pos) {
|
||||
if (!(scan_pos & 15)) {
|
||||
SWAP(ctxs->m_common_context.m_curr_sbb_ctx_offset, ctxs->m_common_context.m_prev_sbb_ctx_offset, int);
|
||||
uvg_dep_quant_update_state_eos(ctxs, scan_pos, scan_info->cg_pos, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], width_in_sbb, height_in_sbb, scan_info->next_sbb_right, scan_info->next_sbb_below, decisions, 0);
|
||||
uvg_dep_quant_update_state_eos(ctxs, scan_pos, scan_info->cg_pos, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], width_in_sbb, height_in_sbb, scan_info->next_sbb_right, scan_info->next_sbb_below, decisions, 1);
|
||||
uvg_dep_quant_update_state_eos(ctxs, scan_pos, scan_info->cg_pos, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], width_in_sbb, height_in_sbb, scan_info->next_sbb_right, scan_info->next_sbb_below, decisions, 2);
|
||||
uvg_dep_quant_update_state_eos(ctxs, scan_pos, scan_info->cg_pos, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], width_in_sbb, height_in_sbb, scan_info->next_sbb_right, scan_info->next_sbb_below, decisions, 3);
|
||||
memcpy(decisions->prevId + 4, decisions->prevId, 4 * sizeof(int32_t));
|
||||
memcpy(decisions->absLevel + 4, decisions->absLevel, 4 * sizeof(int32_t));
|
||||
memcpy(decisions->rdCost + 4, decisions->rdCost, 4 * sizeof(int64_t));
|
||||
} else if (!zeroOut) {
|
||||
uvg_dep_quant_update_state(ctxs, next_nb_info_ssb.num, scan_pos, decisions, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], next_nb_info_ssb, 4, false, 0);
|
||||
uvg_dep_quant_update_state(ctxs, next_nb_info_ssb.num, scan_pos, decisions, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], next_nb_info_ssb, 4, false, 1);
|
||||
uvg_dep_quant_update_state(ctxs, next_nb_info_ssb.num, scan_pos, decisions, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], next_nb_info_ssb, 4, false, 2);
|
||||
uvg_dep_quant_update_state(ctxs, next_nb_info_ssb.num, scan_pos, decisions, scan_info->sig_ctx_offset[is_chroma], scan_info->gtx_ctx_offset[is_chroma], next_nb_info_ssb, 4, false, 3);
|
||||
}
|
||||
|
||||
if (spt == SCAN_SOCSBB) {
|
||||
SWAP(ctxs->m_skip_state_offset, ctxs->m_prev_state_offset, int);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void uvg_find_first_non_zero_generic(const coeff_t* srcCoeff, const bool enableScalingLists, const context_store * const dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, int width, int height)
|
||||
{
|
||||
const int default_quant_coeff = dep_quant_context->m_quant->m_QScale;
|
||||
const int32_t thres = dep_quant_context->m_quant->m_thresLast;
|
||||
int temp = *firstTestPos;
|
||||
for (; temp >= 0; (temp)--) {
|
||||
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[(temp)]])) : (thres / (4 * default_quant_coeff));
|
||||
if (abs(srcCoeff[scan[(temp)]]) > thresTmp) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
*firstTestPos = temp;
|
||||
}
|
||||
|
||||
int uvg_strategy_register_depquant_generic(void* opaque, uint8_t bitdepth)
|
||||
{
|
||||
bool success = true;
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "dep_quant_decide_and_update", "generic", 0, &uvg_dep_quant_decide_and_update_generic);
|
||||
success &= uvg_strategyselector_register(opaque, "find_first_non_zero_coeff", "generic", 0, &uvg_find_first_non_zero_generic);
|
||||
|
||||
return success;
|
||||
}
|
50
src/strategies/generic/depquant-generic.h
Normal file
50
src/strategies/generic/depquant-generic.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
#ifndef STRATEGIES_DEPQUANT_GENERIC_H_
|
||||
#define STRATEGIES_DEPQUANT_GENERIC_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* \ingroup Optimization
|
||||
* \file
|
||||
* Generic C implementations of optimized functions.
|
||||
*/
|
||||
|
||||
#include "cu.h"
|
||||
#include "encoderstate.h"
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "uvg266.h"
|
||||
#include "tables.h"
|
||||
|
||||
|
||||
int uvg_strategy_register_depquant_generic(void* opaque, uint8_t bitdepth);
|
||||
|
||||
#endif //STRATEGIES_DEPQUANT_GENERIC_H_
|
|
@ -54,11 +54,16 @@
|
|||
void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
uint8_t width,
|
||||
const cu_loc_t * const cu_loc,
|
||||
uint8_t color,
|
||||
int8_t scan_mode,
|
||||
cu_info_t* cur_cu,
|
||||
double* bits_out) {
|
||||
double* bits_out)
|
||||
{
|
||||
const int x = cu_loc->x;
|
||||
const int y = cu_loc->y;
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
|
||||
//const encoder_control_t * const encoder = state->encoder_control;
|
||||
//int c1 = 1;
|
||||
|
@ -75,12 +80,12 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
|
||||
// CONSTANTS
|
||||
|
||||
const int height = width; // TODO: height for non-square blocks.
|
||||
const uint32_t log2_block_size = uvg_g_convert_to_bit[width]+2;
|
||||
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_size][log2_block_size][0] + uvg_g_log2_sbb_size[log2_block_size][log2_block_size][1];
|
||||
const uint32_t *scan =
|
||||
uvg_g_sig_last_scan[scan_mode][log2_block_size - 1];
|
||||
const uint32_t *scan_cg = g_sig_last_scan_cg[log2_block_size - 1][scan_mode];
|
||||
const uint8_t log2_block_width = uvg_g_convert_to_log2[width];
|
||||
const uint8_t log2_block_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
|
||||
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
|
||||
const uint32_t* const scan_cg = uvg_get_scan_order_table(SCAN_GROUP_UNGROUPED, scan_mode, log2_block_width, log2_block_height);
|
||||
|
||||
|
||||
// Init base contexts according to block type
|
||||
|
@ -90,12 +95,13 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
unsigned scan_cg_last = (unsigned)-1;
|
||||
unsigned scan_pos_last = (unsigned)-1;
|
||||
|
||||
for (int i = 0; i < width * width; i++) {
|
||||
for (int i = 0; i < (width * height); ++i) {
|
||||
if (coeff[scan[i]]) {
|
||||
scan_pos_last = i;
|
||||
sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
scan_cg_last = scan_pos_last >> log2_cg_size;
|
||||
|
||||
int pos_last = scan[scan_pos_last];
|
||||
|
@ -120,28 +126,33 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
last_coeff_x,
|
||||
last_coeff_y,
|
||||
width,
|
||||
width,
|
||||
height,
|
||||
color,
|
||||
scan_mode,
|
||||
bits_out);
|
||||
|
||||
|
||||
|
||||
uint32_t quant_state_transition_table = 0; //ToDo: dep quant enable changes this
|
||||
uint32_t quant_state_transition_table = state->encoder_control->cfg.dep_quant ? 32040 : 0;
|
||||
int32_t quant_state = 0;
|
||||
uint8_t ctx_offset[16];
|
||||
int32_t temp_diag = -1;
|
||||
int32_t temp_sum = -1;
|
||||
|
||||
int32_t reg_bins = (width*width * 28) >> 4; //8 for 2x2
|
||||
int32_t reg_bins = (width * height * 28) >> 4; //8 for 2x2
|
||||
|
||||
// significant_coeff_flag
|
||||
for (i = scan_cg_last; i >= 0; i--) {
|
||||
|
||||
//int32_t abs_coeff[64*64];
|
||||
const uint32_t log2_cg_width = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0];
|
||||
const uint32_t log2_cg_height = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
|
||||
const uint32_t cg_width = (MIN((uint8_t)TR_MAX_WIDTH, width) >> log2_cg_width);
|
||||
const uint32_t cg_height = (MIN((uint8_t)TR_MAX_WIDTH, height) >> log2_cg_height);
|
||||
int32_t cg_blk_pos = scan_cg[i];
|
||||
int32_t cg_pos_y = cg_blk_pos / (MIN((uint8_t)32, width) >> (log2_cg_size / 2));
|
||||
int32_t cg_pos_x = cg_blk_pos - (cg_pos_y * (MIN((uint8_t)32, width) >> (log2_cg_size / 2)));
|
||||
int32_t cg_pos_y = cg_blk_pos / (MIN((uint8_t)32, width) >> log2_cg_width);
|
||||
int32_t cg_pos_x = cg_blk_pos - (cg_pos_y * (MIN((uint8_t)32, width) >> log2_cg_width));
|
||||
|
||||
|
||||
// !!! residual_coding_subblock() !!!
|
||||
|
||||
|
@ -151,7 +162,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
} else {
|
||||
uint32_t sig_coeff_group = (sig_coeffgroup_flag[cg_blk_pos] != 0);
|
||||
uint32_t ctx_sig = uvg_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
||||
cg_pos_y, (MIN((uint8_t)32, width) >> (log2_cg_size / 2)));
|
||||
cg_pos_y, cg_width, cg_height);
|
||||
CABAC_FBITS_UPDATE(cabac, &base_coeff_group_ctx[ctx_sig], sig_coeff_group, bits, "significant_coeffgroup_flag");
|
||||
}
|
||||
|
||||
|
@ -182,7 +193,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
|
||||
sig = (coeff[blk_pos] != 0) ? 1 : 0;
|
||||
if (num_non_zero || next_sig_pos != infer_sig_pos) {
|
||||
ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, width, color, &temp_diag, &temp_sum);
|
||||
ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum);
|
||||
cabac_ctx_t* sig_ctx_luma = &(cabac->ctx.cu_sig_model_luma[MAX(0, (quant_state - 1))][ctx_sig]);
|
||||
cabac_ctx_t* sig_ctx_chroma = &(cabac->ctx.cu_sig_model_chroma[MAX(0, (quant_state - 1))][MIN(ctx_sig,7)]);
|
||||
|
||||
|
@ -190,7 +201,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
reg_bins--;
|
||||
|
||||
} else if (next_sig_pos != scan_pos_last) {
|
||||
ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, width, color, &temp_diag, &temp_sum);
|
||||
ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum);
|
||||
}
|
||||
|
||||
|
||||
|
@ -256,7 +267,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
blk_pos = scan[scan_pos];
|
||||
pos_y = blk_pos / width;
|
||||
pos_x = blk_pos - (pos_y * width);
|
||||
int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, width, 4);
|
||||
int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, height, 4);
|
||||
|
||||
rice_param = g_go_rice_pars[abs_sum];
|
||||
uint32_t second_pass_abs_coeff = abs(coeff[blk_pos]);
|
||||
|
@ -274,7 +285,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
pos_y = blk_pos / width;
|
||||
pos_x = blk_pos - (pos_y * width);
|
||||
uint32_t coeff_abs = abs(coeff[blk_pos]);
|
||||
int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, width, 0);
|
||||
int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, height, 0);
|
||||
rice_param = g_go_rice_pars[abs_sum];
|
||||
pos0 = ((quant_state<2)?1:2) << rice_param;
|
||||
uint32_t remainder = (coeff_abs == 0 ? pos0 : coeff_abs <= pos0 ? coeff_abs - 1 : coeff_abs);
|
||||
|
@ -291,7 +302,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
|
||||
uint32_t num_signs = num_non_zero;
|
||||
|
||||
if (state->encoder_control->cfg.signhide_enable && (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4)) {
|
||||
if (state->encoder_control->cfg.signhide_enable && !state->encoder_control->cfg.dep_quant && (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4)) {
|
||||
num_signs--;
|
||||
coeff_signs >>= 1;
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
uint8_t width,
|
||||
const cu_loc_t * const loc,
|
||||
uint8_t color,
|
||||
int8_t scan_mode,
|
||||
cu_info_t* cur_cu,
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cu.h"
|
||||
#include "intra.h"
|
||||
#include "uvg266.h"
|
||||
#include "strategyselector.h"
|
||||
|
@ -42,25 +43,32 @@
|
|||
|
||||
/**
|
||||
* \brief Generate angular predictions.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param cu_loc CU location and size data.
|
||||
* \param intra_mode Angular mode in range 2..34.
|
||||
* \param channel_type Color channel.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=height*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
* \param multi_ref_idx Multi reference line index for use with MRL.
|
||||
*/
|
||||
static void uvg_angular_pred_generic(
|
||||
const int_fast8_t log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int_fast8_t intra_mode,
|
||||
const int_fast8_t channel_type,
|
||||
const uvg_pixel *const in_ref_above,
|
||||
const uvg_pixel *const in_ref_left,
|
||||
uvg_pixel *const dst,
|
||||
const uint8_t multi_ref_idx)
|
||||
const uint8_t multi_ref_idx,
|
||||
const uint8_t isp_mode,
|
||||
const int cu_dim)
|
||||
{
|
||||
int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int log2_width = uvg_g_convert_to_log2[width];
|
||||
const int log2_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
assert(intra_mode >= 2 && intra_mode <= 66);
|
||||
assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5);
|
||||
// assert(intra_mode >= 2 && intra_mode <= 66);
|
||||
|
||||
static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
|
||||
static const int16_t modedisp2invsampledisp[32] = { 0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, 512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16 }; // (512 * 32) / sampledisp
|
||||
|
@ -105,126 +113,105 @@ static void uvg_angular_pred_generic(
|
|||
// Temporary buffer for modes 11-25.
|
||||
// It only needs to be big enough to hold indices from -width to width-1.
|
||||
|
||||
uvg_pixel temp_dst[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
|
||||
|
||||
// TODO: check the correct size for these arrays when MRL is used
|
||||
//uvg_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
uvg_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
uvg_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
const int_fast32_t width = 1 << log2_width;
|
||||
uvg_pixel temp_above[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
uvg_pixel temp_left[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
|
||||
uint32_t pred_mode = intra_mode; // ToDo: handle WAIP
|
||||
|
||||
uint8_t multi_ref_index = multi_ref_idx;
|
||||
uint8_t isp = isp_mode;
|
||||
|
||||
// Whether to swap references to always project on the left reference row.
|
||||
const bool vertical_mode = intra_mode >= 34;
|
||||
// Modes distance to horizontal or vertical mode.
|
||||
const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -((int32_t)pred_mode - 18);
|
||||
//const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
|
||||
|
||||
// Sample displacement per column in fractions of 32.
|
||||
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
||||
const int16_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
||||
|
||||
// TODO: replace latter width with height
|
||||
int scale = MIN(2, log2_width - pre_scale[abs(mode_disp)]);
|
||||
const int side_size = vertical_mode ? log2_height : log2_width;
|
||||
int scale = MIN(2, side_size - pre_scale[abs(mode_disp)]);
|
||||
|
||||
// Pointer for the reference we are interpolating from.
|
||||
uvg_pixel *ref_main;
|
||||
// Pointer for the other reference.
|
||||
const uvg_pixel *ref_side;
|
||||
uvg_pixel* work = width == height || vertical_mode ? dst : temp_dst;
|
||||
|
||||
const int top_ref_length = isp_mode == ISP_MODE_VER ? width + cu_dim : width << 1;
|
||||
const int left_ref_length = isp_mode == ISP_MODE_HOR ? height + cu_dim : height << 1;
|
||||
|
||||
// Set ref_main and ref_side such that, when indexed with 0, they point to
|
||||
// index 0 in block coordinates.
|
||||
if (sample_disp < 0) {
|
||||
memcpy(&temp_above[height], &in_ref_above[0], (width + 2 + multi_ref_index) * sizeof(uvg_pixel));
|
||||
memcpy(&temp_left[width], &in_ref_left[0], (height + 2 + multi_ref_index) * sizeof(uvg_pixel));
|
||||
|
||||
// TODO: for non-square blocks, separate loops for x and y is needed
|
||||
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
|
||||
temp_main[width + i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
|
||||
temp_side[width + i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
|
||||
ref_main = vertical_mode ? temp_above + height : temp_left + width;
|
||||
ref_side = vertical_mode ? temp_left + width : temp_above + height;
|
||||
|
||||
int size_side = vertical_mode ? height : width;
|
||||
for (int i = -size_side; i <= -1; i++) {
|
||||
ref_main[i] = ref_side[MIN((-i * modedisp2invsampledisp[abs(mode_disp)] + 256) >> 9, size_side)];
|
||||
}
|
||||
|
||||
// TODO: take into account non-square blocks
|
||||
ref_main = temp_main + width;
|
||||
ref_side = temp_side + width;
|
||||
|
||||
// TODO: for non square blocks, need to check if width or height is used for reference extension
|
||||
for (int i = -width; i <= -1; i++) {
|
||||
ref_main[i] = ref_side[MIN((-i * modedisp2invsampledisp[abs(mode_disp)] + 256) >> 9, width)];
|
||||
}
|
||||
|
||||
//const uint32_t index_offset = width + 1;
|
||||
//const int32_t last_index = width;
|
||||
//const int_fast32_t most_negative_index = (width * sample_disp) >> 5;
|
||||
//// Negative sample_disp means, we need to use both references.
|
||||
|
||||
//// TODO: update refs to take into account variating block size and shapes
|
||||
//// (height is not always equal to width)
|
||||
//ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
||||
//ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
||||
|
||||
//// Move the reference pixels to start from the middle to the later half of
|
||||
//// the tmp_ref, so there is room for negative indices.
|
||||
//for (int_fast32_t x = -1; x < width; ++x) {
|
||||
// tmp_ref[x + index_offset] = ref_main[x];
|
||||
//}
|
||||
//// Get a pointer to block index 0 in tmp_ref.
|
||||
//ref_main = &tmp_ref[index_offset];
|
||||
//tmp_ref[index_offset -1] = tmp_ref[index_offset];
|
||||
|
||||
//// Extend the side reference to the negative indices of main reference.
|
||||
//int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
|
||||
//int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
|
||||
//// TODO: add 'vertical_mode ? height : width' instead of 'width'
|
||||
//
|
||||
//for (int_fast32_t x = -1; x > most_negative_index; x--) {
|
||||
// col_sample_disp += inv_abs_sample_disp;
|
||||
// int_fast32_t side_index = col_sample_disp >> 8;
|
||||
// tmp_ref[x + index_offset - 1] = ref_side[side_index - 1];
|
||||
//}
|
||||
//tmp_ref[last_index + index_offset] = tmp_ref[last_index + index_offset - 1];
|
||||
//tmp_ref[most_negative_index + index_offset - 1] = tmp_ref[most_negative_index + index_offset];
|
||||
}
|
||||
else {
|
||||
memcpy(&temp_above[0], &in_ref_above[0], (top_ref_length + 1 + multi_ref_index) * sizeof(uvg_pixel));
|
||||
memcpy(&temp_left[0], &in_ref_left[0], (left_ref_length + 1 + multi_ref_index) * sizeof(uvg_pixel));
|
||||
|
||||
// TODO: again, separate loop needed for non-square blocks
|
||||
for (int i = 0; i <= (width << 1) + multi_ref_index; i++) {
|
||||
temp_main[i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
|
||||
temp_side[i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
|
||||
}
|
||||
ref_main = vertical_mode ? temp_above : temp_left;
|
||||
ref_side = vertical_mode ? temp_left : temp_above;
|
||||
|
||||
// TODO: this code block will need to change also when non-square blocks are used
|
||||
// const int log2_ratio = 0;
|
||||
const int s = 0;
|
||||
const int log2_ratio = log2_width - log2_height;
|
||||
const int s = MAX(0, vertical_mode ? log2_ratio : -log2_ratio);
|
||||
const int max_index = (multi_ref_index << s) + 2;
|
||||
const int ref_length = width << 1;
|
||||
const uvg_pixel val = temp_main[ref_length + multi_ref_index];
|
||||
int ref_length;
|
||||
if (isp_mode) {
|
||||
ref_length = vertical_mode ? top_ref_length : left_ref_length;
|
||||
}
|
||||
else {
|
||||
ref_length = vertical_mode ? width << 1 : height << 1;
|
||||
}
|
||||
const uvg_pixel val = ref_main[ref_length + multi_ref_index];
|
||||
for (int j = 1; j <= max_index; j++) {
|
||||
temp_main[ref_length + multi_ref_index + j] = val;
|
||||
ref_main[ref_length + multi_ref_index + j] = val;
|
||||
}
|
||||
}
|
||||
|
||||
ref_main = temp_main;
|
||||
ref_side = temp_side;
|
||||
//// sample_disp >= 0 means we don't need to refer to negative indices,
|
||||
//// which means we can just use the references as is.
|
||||
//ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
||||
//ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
||||
|
||||
//memcpy(tmp_ref + width, ref_main, (width*2) * sizeof(uvg_pixel));
|
||||
//ref_main = &tmp_ref[width];
|
||||
//tmp_ref[width-1] = tmp_ref[width];
|
||||
//int8_t last_index = 1 + width*2;
|
||||
//tmp_ref[width + last_index] = tmp_ref[width + last_index - 1];
|
||||
}
|
||||
|
||||
// compensate for line offset in reference line buffers
|
||||
ref_main += multi_ref_index;
|
||||
ref_side += multi_ref_index;
|
||||
if (!vertical_mode) { SWAP(width, height, int) }
|
||||
|
||||
if (sample_disp != 0) {
|
||||
bool use_cubic = true; // Default to cubic filter
|
||||
static const int uvg_intra_hor_ver_dist_thres[8] = { 24, 24, 24, 14, 2, 0, 0, 0 };
|
||||
int filter_threshold = uvg_intra_hor_ver_dist_thres[(log2_width + log2_height) >> 1];
|
||||
int dist_from_vert_or_hor = MIN(abs((int32_t)pred_mode - 50), abs((int32_t)pred_mode - 18));
|
||||
if (dist_from_vert_or_hor > filter_threshold) {
|
||||
if ((abs(sample_disp) & 0x1F) != 0)
|
||||
{
|
||||
use_cubic = false;
|
||||
}
|
||||
}
|
||||
// Cubic must be used if ref line != 0 or if isp mode is != 0
|
||||
if (multi_ref_index || isp) {
|
||||
use_cubic = true;
|
||||
}
|
||||
// The mode is not horizontal or vertical, we have to do interpolation.
|
||||
|
||||
for (int_fast32_t y = 0, delta_pos = sample_disp * (1 + multi_ref_index); y < width; ++y, delta_pos += sample_disp) {
|
||||
for (int_fast32_t y = 0, delta_pos = sample_disp * (1 + multi_ref_index); y < height; ++y, delta_pos += sample_disp) {
|
||||
|
||||
int_fast32_t delta_int = delta_pos >> 5;
|
||||
int_fast32_t delta_fract = delta_pos & (32 - 1);
|
||||
const int16_t filter_coeff[4] = { 16 - (delta_fract >> 1), 32 - (delta_fract >> 1), 16 + (delta_fract >> 1), delta_fract >> 1 };
|
||||
int16_t const* const f = use_cubic ? cubic_filter[delta_fract] : filter_coeff;
|
||||
|
||||
if ((abs(sample_disp) & 0x1F) != 0) {
|
||||
|
||||
|
@ -232,25 +219,7 @@ static void uvg_angular_pred_generic(
|
|||
if (channel_type == 0) {
|
||||
int32_t ref_main_index = delta_int;
|
||||
uvg_pixel p[4];
|
||||
bool use_cubic = true; // Default to cubic filter
|
||||
static const int uvg_intra_hor_ver_dist_thres[8] = { 24, 24, 24, 14, 2, 0, 0, 0 };
|
||||
int filter_threshold = uvg_intra_hor_ver_dist_thres[log2_width];
|
||||
int dist_from_vert_or_hor = MIN(abs((int32_t)pred_mode - 50), abs((int32_t)pred_mode - 18));
|
||||
if (dist_from_vert_or_hor > filter_threshold) {
|
||||
static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
|
||||
const int_fast8_t mode_disp = (pred_mode >= 34) ? pred_mode - 50 : 18 - pred_mode;
|
||||
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
||||
if ((abs(sample_disp) & 0x1F) != 0)
|
||||
{
|
||||
use_cubic = false;
|
||||
}
|
||||
}
|
||||
// Cubic must be used if ref line != 0
|
||||
if (multi_ref_index) {
|
||||
use_cubic = true;
|
||||
}
|
||||
const int16_t filter_coeff[4] = { 16 - (delta_fract >> 1), 32 - (delta_fract >> 1), 16 + (delta_fract >> 1), delta_fract >> 1 };
|
||||
int16_t const * const f = use_cubic ? cubic_filter[delta_fract] : filter_coeff;
|
||||
|
||||
// Do 4-tap intra interpolation filtering
|
||||
for (int_fast32_t x = 0; x < width; x++, ref_main_index++) {
|
||||
p[0] = ref_main[ref_main_index];
|
||||
|
@ -258,7 +227,7 @@ static void uvg_angular_pred_generic(
|
|||
p[2] = ref_main[ref_main_index + 2];
|
||||
p[3] = ref_main[ref_main_index + 3];
|
||||
|
||||
dst[y * width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6);
|
||||
work[y * width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -268,26 +237,26 @@ static void uvg_angular_pred_generic(
|
|||
for (int_fast32_t x = 0; x < width; ++x) {
|
||||
uvg_pixel ref1 = ref_main[x + delta_int + 1];
|
||||
uvg_pixel ref2 = ref_main[x + delta_int + 2];
|
||||
dst[y * width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5);
|
||||
work[y * width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Just copy the integer samples
|
||||
for (int_fast32_t x = 0; x < width; x++) {
|
||||
dst[y * width + x] = ref_main[x + delta_int + 1];
|
||||
work[y * width + x] = ref_main[x + delta_int + 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// PDPC
|
||||
bool PDPC_filter = (width >= 4 || channel_type != 0);
|
||||
bool PDPC_filter = (width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) && multi_ref_index == 0;
|
||||
if (pred_mode > 1 && pred_mode < 67) {
|
||||
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
|
||||
PDPC_filter = false;
|
||||
}
|
||||
else if (mode_disp > 0) {
|
||||
PDPC_filter = (scale >= 0);
|
||||
PDPC_filter &= (scale >= 0);
|
||||
}
|
||||
}
|
||||
if(PDPC_filter) {
|
||||
|
@ -297,70 +266,50 @@ static void uvg_angular_pred_generic(
|
|||
|
||||
int wL = 32 >> (2 * x >> scale);
|
||||
const uvg_pixel left = ref_side[y + (inv_angle_sum >> 9) + 1];
|
||||
dst[y * width + x] = dst[y * width + x] + ((wL * (left - dst[y * width + x]) + 32) >> 6);
|
||||
work[y * width + x] = work[y * width + x] + ((wL * (left - work[y * width + x]) + 32) >> 6);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if (pred_mode == 2 || pred_mode == 66) {
|
||||
int wT = 16 >> MIN(31, ((y << 1) >> scale));
|
||||
for (int x = 0; x < width; x++) {
|
||||
int wL = 16 >> MIN(31, ((x << 1) >> scale));
|
||||
if (wT + wL == 0) break;
|
||||
int c = x + y + 1;
|
||||
if (c >= 2 * width) { wL = 0; }
|
||||
if (c >= 2 * width) { wT = 0; }
|
||||
const uvg_pixel left = (wL != 0) ? ref_side[c] : 0;
|
||||
const uvg_pixel top = (wT != 0) ? ref_main[c] : 0;
|
||||
dst[y * width + x] = CLIP_TO_PIXEL((wL * left + wT * top + (64 - wL - wT) * dst[y * width + x] + 32) >> 6);
|
||||
}
|
||||
} else if (sample_disp == 0 || sample_disp >= 12) {
|
||||
int inv_angle_sum_0 = 2;
|
||||
for (int x = 0; x < width; x++) {
|
||||
inv_angle_sum_0 += modedisp2invsampledisp[abs(mode_disp)];
|
||||
int delta_pos_0 = inv_angle_sum_0 >> 2;
|
||||
int delta_frac_0 = delta_pos_0 & 63;
|
||||
int delta_int_0 = delta_pos_0 >> 6;
|
||||
int delta_y = y + delta_int_0 + 1;
|
||||
// TODO: convert to JVET_K0500_WAIP
|
||||
if (delta_y > width + width - 1) break;
|
||||
|
||||
int wL = 32 >> MIN(31, ((x << 1) >> scale));
|
||||
if (wL == 0) break;
|
||||
const uvg_pixel *p = ref_side + delta_y - 1;
|
||||
uvg_pixel left = p[delta_frac_0 >> 5];
|
||||
dst[y * width + x] = CLIP_TO_PIXEL((wL * left + (64 - wL) * dst[y * width + x] + 32) >> 6);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Mode is horizontal or vertical, just copy the pixels.
|
||||
|
||||
// TODO: update outer loop to use height instead of width
|
||||
for (int_fast32_t y = 0; y < width; ++y) {
|
||||
for (int_fast32_t x = 0; x < width; ++x) {
|
||||
dst[y * width + x] = ref_main[x + 1];
|
||||
}
|
||||
// Do not apply PDPC if multi ref line index is other than 0
|
||||
if ((width >= 4 || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) {
|
||||
int scale = (log2_width + log2_width - 2) >> 2;
|
||||
// TODO: do not do PDPC if block is in BDPCM mode
|
||||
bool do_pdpc = ((width >= 4 && height >= 4) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/);
|
||||
|
||||
if (do_pdpc) {
|
||||
int scale = (log2_width + log2_height - 2) >> 2;
|
||||
const uvg_pixel top_left = ref_main[0];
|
||||
for (int_fast32_t y = 0; y < height; ++y) {
|
||||
memcpy(&work[y * width], &ref_main[1], width * sizeof(uvg_pixel));
|
||||
const uvg_pixel left = ref_side[1 + y];
|
||||
for (int i = 0; i < MIN(3 << scale, width); i++) {
|
||||
const int wL = 32 >> (2 * i >> scale);
|
||||
const uvg_pixel val = dst[y * width + i];
|
||||
dst[y * width + i] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
|
||||
for (int_fast32_t x = 0; x < MIN(3 << scale, width); ++x) {
|
||||
const int wL = 32 >> (2 * x >> scale);
|
||||
const uvg_pixel val = work[y * width + x];
|
||||
work[y * width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int_fast32_t y = 0; y < height; ++y) {
|
||||
memcpy(&work[y * width], &ref_main[1], width * sizeof(uvg_pixel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Flip the block if this is was a horizontal mode.
|
||||
if (!vertical_mode) {
|
||||
for (int_fast32_t y = 0; y < width - 1; ++y) {
|
||||
if(width == height) {
|
||||
for (int_fast32_t y = 0; y < height - 1; ++y) {
|
||||
for (int_fast32_t x = y + 1; x < width; ++x) {
|
||||
SWAP(dst[y * width + x], dst[x * width + y], uvg_pixel);
|
||||
SWAP(work[y * height + x], work[x * width + y], uvg_pixel);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(int y = 0; y < width; ++y) {
|
||||
for(int x = 0; x < height; ++x) {
|
||||
dst[x + y * height] = work[y + x * width];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -369,23 +318,32 @@ static void uvg_angular_pred_generic(
|
|||
|
||||
/**
|
||||
* \brief Generate planar prediction.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param cu_loc CU location and size data.
|
||||
* \param color Color channel.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
*/
|
||||
static void uvg_intra_pred_planar_generic(
|
||||
const int_fast8_t log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
const uvg_pixel *const ref_top,
|
||||
const uvg_pixel *const ref_left,
|
||||
uvg_pixel *const dst)
|
||||
{
|
||||
// TODO: Add height
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int log2_width = uvg_g_convert_to_log2[width];
|
||||
const int log2_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
const int offset = 1 << (log2_width + log2_height);
|
||||
const int final_shift = 1 + log2_width + log2_height;
|
||||
|
||||
// If ISP is enabled log_dim 1 is possible (limit was previously 2)
|
||||
assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5);
|
||||
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
const uvg_pixel top_right = ref_top[width + 1];
|
||||
const uvg_pixel bottom_left = ref_left[width + 1];
|
||||
const uvg_pixel bottom_left = ref_left[height + 1];
|
||||
|
||||
#if 0
|
||||
// Unoptimized version for reference.
|
||||
|
@ -397,18 +355,27 @@ static void uvg_intra_pred_planar_generic(
|
|||
}
|
||||
}
|
||||
#else
|
||||
int_fast16_t top[32];
|
||||
// TODO: get rid of magic numbers. Make a define for this
|
||||
int_fast16_t top[64];
|
||||
int_fast16_t bottom[64];
|
||||
int_fast16_t left[64];
|
||||
int_fast16_t right[64];
|
||||
for (int i = 0; i < width; ++i) {
|
||||
top[i] = ref_top[i + 1] << log2_width;
|
||||
bottom[i] = bottom_left - ref_top[i + 1];
|
||||
top[i] = ref_top[i + 1] << log2_height;
|
||||
}
|
||||
|
||||
for (int y = 0; y < width; ++y) {
|
||||
int_fast16_t hor = (ref_left[y + 1] << log2_width) + width;
|
||||
for (int j = 0; j < height; ++j) {
|
||||
right[j] = top_right - ref_left[j + 1];
|
||||
left[j] = ref_left[j + 1] << log2_width;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
int_fast16_t hor = left[y];
|
||||
for (int x = 0; x < width; ++x) {
|
||||
hor += top_right - ref_left[y + 1];
|
||||
top[x] += bottom_left - ref_top[x + 1];
|
||||
dst[y * width + x] = (hor + top[x]) >> (log2_width + 1);
|
||||
//
|
||||
hor += right[y];
|
||||
top[x] += bottom[x];
|
||||
dst[y * width + x] = ((hor << log2_height) + (top[x] << log2_width) + offset) >> final_shift;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -461,25 +428,26 @@ static void uvg_intra_pred_filtered_dc_generic(
|
|||
|
||||
/**
|
||||
* \brief Position Dependent Prediction Combination for Planar and DC modes.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param width Block width matching log2_width.
|
||||
* \param cu_loc CU location and size data.
|
||||
* \param used_ref Pointer used reference pixel struct.
|
||||
* \param dst Buffer of size width*width.
|
||||
*/
|
||||
static void uvg_pdpc_planar_dc_generic(
|
||||
const int mode,
|
||||
const int width,
|
||||
const int log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const color_t color,
|
||||
const uvg_intra_ref *const used_ref,
|
||||
uvg_pixel *const dst)
|
||||
{
|
||||
assert(mode == 0 || mode == 1); // planar or DC
|
||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||
const int log2_width = uvg_g_convert_to_log2[width];
|
||||
const int log2_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
// TODO: replace latter log2_width with log2_height
|
||||
const int scale = ((log2_width - 2 + log2_width - 2 + 2) >> 2);
|
||||
const int scale = (log2_width + log2_height - 2) >> 2;
|
||||
|
||||
// TODO: replace width with height
|
||||
for (int y = 0; y < width; y++) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
int wT = 32 >> MIN(31, ((y << 1) >> scale));
|
||||
for (int x = 0; x < width; x++) {
|
||||
int wL = 32 >> MIN(31, ((x << 1) >> scale));
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#include "strategies/generic/picture-generic.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "strategies/strategies-picture.h"
|
||||
|
@ -474,6 +475,577 @@ SATD_DUAL_NXN(64, uvg_pixel)
|
|||
|
||||
SATD_ANY_SIZE_MULTI_GENERIC(quad_generic, 4)
|
||||
|
||||
static uint64_t xCalcHADs2x2(const uvg_pixel* piOrg, const uvg_pixel* piCur, int iStrideOrg, int iStrideCur)
|
||||
{
|
||||
uint64_t satd = 0;
|
||||
coeff_t diff[4], m[4];
|
||||
|
||||
diff[0] = piOrg[0] - piCur[0];
|
||||
diff[1] = piOrg[1] - piCur[1];
|
||||
diff[2] = piOrg[iStrideOrg] - piCur[0 + iStrideCur];
|
||||
diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur];
|
||||
m[0] = diff[0] + diff[2];
|
||||
m[1] = diff[1] + diff[3];
|
||||
m[2] = diff[0] - diff[2];
|
||||
m[3] = diff[1] - diff[3];
|
||||
|
||||
satd += abs(m[0] + m[1]) >> 2;
|
||||
satd += abs(m[0] - m[1]);
|
||||
satd += abs(m[2] + m[3]);
|
||||
satd += abs(m[2] - m[3]);
|
||||
|
||||
return satd;
|
||||
}
|
||||
|
||||
|
||||
static uint64_t xCalcHADs16x8(const uvg_pixel* piOrg, const uvg_pixel* piCur, int iStrideOrg, int iStrideCur)
|
||||
{ //need to add SIMD implementation ,JCA
|
||||
int k, i, j, jj, sad = 0;
|
||||
int diff[128], m1[8][16], m2[8][16];
|
||||
for (k = 0; k < 128; k += 16)
|
||||
{
|
||||
diff[k + 0] = piOrg[0] - piCur[0];
|
||||
diff[k + 1] = piOrg[1] - piCur[1];
|
||||
diff[k + 2] = piOrg[2] - piCur[2];
|
||||
diff[k + 3] = piOrg[3] - piCur[3];
|
||||
diff[k + 4] = piOrg[4] - piCur[4];
|
||||
diff[k + 5] = piOrg[5] - piCur[5];
|
||||
diff[k + 6] = piOrg[6] - piCur[6];
|
||||
diff[k + 7] = piOrg[7] - piCur[7];
|
||||
|
||||
diff[k + 8] = piOrg[8] - piCur[8];
|
||||
diff[k + 9] = piOrg[9] - piCur[9];
|
||||
diff[k + 10] = piOrg[10] - piCur[10];
|
||||
diff[k + 11] = piOrg[11] - piCur[11];
|
||||
diff[k + 12] = piOrg[12] - piCur[12];
|
||||
diff[k + 13] = piOrg[13] - piCur[13];
|
||||
diff[k + 14] = piOrg[14] - piCur[14];
|
||||
diff[k + 15] = piOrg[15] - piCur[15];
|
||||
|
||||
piCur += iStrideCur;
|
||||
piOrg += iStrideOrg;
|
||||
}
|
||||
|
||||
//horizontal
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
jj = j << 4;
|
||||
|
||||
m2[j][0] = diff[jj] + diff[jj + 8];
|
||||
m2[j][1] = diff[jj + 1] + diff[jj + 9];
|
||||
m2[j][2] = diff[jj + 2] + diff[jj + 10];
|
||||
m2[j][3] = diff[jj + 3] + diff[jj + 11];
|
||||
m2[j][4] = diff[jj + 4] + diff[jj + 12];
|
||||
m2[j][5] = diff[jj + 5] + diff[jj + 13];
|
||||
m2[j][6] = diff[jj + 6] + diff[jj + 14];
|
||||
m2[j][7] = diff[jj + 7] + diff[jj + 15];
|
||||
m2[j][8] = diff[jj] - diff[jj + 8];
|
||||
m2[j][9] = diff[jj + 1] - diff[jj + 9];
|
||||
m2[j][10] = diff[jj + 2] - diff[jj + 10];
|
||||
m2[j][11] = diff[jj + 3] - diff[jj + 11];
|
||||
m2[j][12] = diff[jj + 4] - diff[jj + 12];
|
||||
m2[j][13] = diff[jj + 5] - diff[jj + 13];
|
||||
m2[j][14] = diff[jj + 6] - diff[jj + 14];
|
||||
m2[j][15] = diff[jj + 7] - diff[jj + 15];
|
||||
|
||||
m1[j][0] = m2[j][0] + m2[j][4];
|
||||
m1[j][1] = m2[j][1] + m2[j][5];
|
||||
m1[j][2] = m2[j][2] + m2[j][6];
|
||||
m1[j][3] = m2[j][3] + m2[j][7];
|
||||
m1[j][4] = m2[j][0] - m2[j][4];
|
||||
m1[j][5] = m2[j][1] - m2[j][5];
|
||||
m1[j][6] = m2[j][2] - m2[j][6];
|
||||
m1[j][7] = m2[j][3] - m2[j][7];
|
||||
m1[j][8] = m2[j][8] + m2[j][12];
|
||||
m1[j][9] = m2[j][9] + m2[j][13];
|
||||
m1[j][10] = m2[j][10] + m2[j][14];
|
||||
m1[j][11] = m2[j][11] + m2[j][15];
|
||||
m1[j][12] = m2[j][8] - m2[j][12];
|
||||
m1[j][13] = m2[j][9] - m2[j][13];
|
||||
m1[j][14] = m2[j][10] - m2[j][14];
|
||||
m1[j][15] = m2[j][11] - m2[j][15];
|
||||
|
||||
m2[j][0] = m1[j][0] + m1[j][2];
|
||||
m2[j][1] = m1[j][1] + m1[j][3];
|
||||
m2[j][2] = m1[j][0] - m1[j][2];
|
||||
m2[j][3] = m1[j][1] - m1[j][3];
|
||||
m2[j][4] = m1[j][4] + m1[j][6];
|
||||
m2[j][5] = m1[j][5] + m1[j][7];
|
||||
m2[j][6] = m1[j][4] - m1[j][6];
|
||||
m2[j][7] = m1[j][5] - m1[j][7];
|
||||
m2[j][8] = m1[j][8] + m1[j][10];
|
||||
m2[j][9] = m1[j][9] + m1[j][11];
|
||||
m2[j][10] = m1[j][8] - m1[j][10];
|
||||
m2[j][11] = m1[j][9] - m1[j][11];
|
||||
m2[j][12] = m1[j][12] + m1[j][14];
|
||||
m2[j][13] = m1[j][13] + m1[j][15];
|
||||
m2[j][14] = m1[j][12] - m1[j][14];
|
||||
m2[j][15] = m1[j][13] - m1[j][15];
|
||||
|
||||
m1[j][0] = m2[j][0] + m2[j][1];
|
||||
m1[j][1] = m2[j][0] - m2[j][1];
|
||||
m1[j][2] = m2[j][2] + m2[j][3];
|
||||
m1[j][3] = m2[j][2] - m2[j][3];
|
||||
m1[j][4] = m2[j][4] + m2[j][5];
|
||||
m1[j][5] = m2[j][4] - m2[j][5];
|
||||
m1[j][6] = m2[j][6] + m2[j][7];
|
||||
m1[j][7] = m2[j][6] - m2[j][7];
|
||||
m1[j][8] = m2[j][8] + m2[j][9];
|
||||
m1[j][9] = m2[j][8] - m2[j][9];
|
||||
m1[j][10] = m2[j][10] + m2[j][11];
|
||||
m1[j][11] = m2[j][10] - m2[j][11];
|
||||
m1[j][12] = m2[j][12] + m2[j][13];
|
||||
m1[j][13] = m2[j][12] - m2[j][13];
|
||||
m1[j][14] = m2[j][14] + m2[j][15];
|
||||
m1[j][15] = m2[j][14] - m2[j][15];
|
||||
}
|
||||
|
||||
//vertical
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
m2[0][i] = m1[0][i] + m1[4][i];
|
||||
m2[1][i] = m1[1][i] + m1[5][i];
|
||||
m2[2][i] = m1[2][i] + m1[6][i];
|
||||
m2[3][i] = m1[3][i] + m1[7][i];
|
||||
m2[4][i] = m1[0][i] - m1[4][i];
|
||||
m2[5][i] = m1[1][i] - m1[5][i];
|
||||
m2[6][i] = m1[2][i] - m1[6][i];
|
||||
m2[7][i] = m1[3][i] - m1[7][i];
|
||||
|
||||
m1[0][i] = m2[0][i] + m2[2][i];
|
||||
m1[1][i] = m2[1][i] + m2[3][i];
|
||||
m1[2][i] = m2[0][i] - m2[2][i];
|
||||
m1[3][i] = m2[1][i] - m2[3][i];
|
||||
m1[4][i] = m2[4][i] + m2[6][i];
|
||||
m1[5][i] = m2[5][i] + m2[7][i];
|
||||
m1[6][i] = m2[4][i] - m2[6][i];
|
||||
m1[7][i] = m2[5][i] - m2[7][i];
|
||||
|
||||
m2[0][i] = m1[0][i] + m1[1][i];
|
||||
m2[1][i] = m1[0][i] - m1[1][i];
|
||||
m2[2][i] = m1[2][i] + m1[3][i];
|
||||
m2[3][i] = m1[2][i] - m1[3][i];
|
||||
m2[4][i] = m1[4][i] + m1[5][i];
|
||||
m2[5][i] = m1[4][i] - m1[5][i];
|
||||
m2[6][i] = m1[6][i] + m1[7][i];
|
||||
m2[7][i] = m1[6][i] - m1[7][i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
sad += abs(m2[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
sad -= abs(m2[0][0]);
|
||||
sad += abs(m2[0][0]) >> 2;
|
||||
sad = (int)(sad / sqrt(16.0 * 8) * 2);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
static uint64_t xCalcHADs8x16(const uvg_pixel* piOrg, const uvg_pixel* piCur, int iStrideOrg, int iStrideCur)
|
||||
{
|
||||
int k, i, j, jj, sad = 0;
|
||||
int diff[128], m1[16][8], m2[16][8];
|
||||
for (k = 0; k < 128; k += 8)
|
||||
{
|
||||
diff[k + 0] = piOrg[0] - piCur[0];
|
||||
diff[k + 1] = piOrg[1] - piCur[1];
|
||||
diff[k + 2] = piOrg[2] - piCur[2];
|
||||
diff[k + 3] = piOrg[3] - piCur[3];
|
||||
diff[k + 4] = piOrg[4] - piCur[4];
|
||||
diff[k + 5] = piOrg[5] - piCur[5];
|
||||
diff[k + 6] = piOrg[6] - piCur[6];
|
||||
diff[k + 7] = piOrg[7] - piCur[7];
|
||||
|
||||
piCur += iStrideCur;
|
||||
piOrg += iStrideOrg;
|
||||
}
|
||||
|
||||
//horizontal
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
jj = j << 3;
|
||||
|
||||
m2[j][0] = diff[jj] + diff[jj + 4];
|
||||
m2[j][1] = diff[jj + 1] + diff[jj + 5];
|
||||
m2[j][2] = diff[jj + 2] + diff[jj + 6];
|
||||
m2[j][3] = diff[jj + 3] + diff[jj + 7];
|
||||
m2[j][4] = diff[jj] - diff[jj + 4];
|
||||
m2[j][5] = diff[jj + 1] - diff[jj + 5];
|
||||
m2[j][6] = diff[jj + 2] - diff[jj + 6];
|
||||
m2[j][7] = diff[jj + 3] - diff[jj + 7];
|
||||
|
||||
m1[j][0] = m2[j][0] + m2[j][2];
|
||||
m1[j][1] = m2[j][1] + m2[j][3];
|
||||
m1[j][2] = m2[j][0] - m2[j][2];
|
||||
m1[j][3] = m2[j][1] - m2[j][3];
|
||||
m1[j][4] = m2[j][4] + m2[j][6];
|
||||
m1[j][5] = m2[j][5] + m2[j][7];
|
||||
m1[j][6] = m2[j][4] - m2[j][6];
|
||||
m1[j][7] = m2[j][5] - m2[j][7];
|
||||
|
||||
m2[j][0] = m1[j][0] + m1[j][1];
|
||||
m2[j][1] = m1[j][0] - m1[j][1];
|
||||
m2[j][2] = m1[j][2] + m1[j][3];
|
||||
m2[j][3] = m1[j][2] - m1[j][3];
|
||||
m2[j][4] = m1[j][4] + m1[j][5];
|
||||
m2[j][5] = m1[j][4] - m1[j][5];
|
||||
m2[j][6] = m1[j][6] + m1[j][7];
|
||||
m2[j][7] = m1[j][6] - m1[j][7];
|
||||
}
|
||||
|
||||
//vertical
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
m1[0][i] = m2[0][i] + m2[8][i];
|
||||
m1[1][i] = m2[1][i] + m2[9][i];
|
||||
m1[2][i] = m2[2][i] + m2[10][i];
|
||||
m1[3][i] = m2[3][i] + m2[11][i];
|
||||
m1[4][i] = m2[4][i] + m2[12][i];
|
||||
m1[5][i] = m2[5][i] + m2[13][i];
|
||||
m1[6][i] = m2[6][i] + m2[14][i];
|
||||
m1[7][i] = m2[7][i] + m2[15][i];
|
||||
m1[8][i] = m2[0][i] - m2[8][i];
|
||||
m1[9][i] = m2[1][i] - m2[9][i];
|
||||
m1[10][i] = m2[2][i] - m2[10][i];
|
||||
m1[11][i] = m2[3][i] - m2[11][i];
|
||||
m1[12][i] = m2[4][i] - m2[12][i];
|
||||
m1[13][i] = m2[5][i] - m2[13][i];
|
||||
m1[14][i] = m2[6][i] - m2[14][i];
|
||||
m1[15][i] = m2[7][i] - m2[15][i];
|
||||
|
||||
m2[0][i] = m1[0][i] + m1[4][i];
|
||||
m2[1][i] = m1[1][i] + m1[5][i];
|
||||
m2[2][i] = m1[2][i] + m1[6][i];
|
||||
m2[3][i] = m1[3][i] + m1[7][i];
|
||||
m2[4][i] = m1[0][i] - m1[4][i];
|
||||
m2[5][i] = m1[1][i] - m1[5][i];
|
||||
m2[6][i] = m1[2][i] - m1[6][i];
|
||||
m2[7][i] = m1[3][i] - m1[7][i];
|
||||
m2[8][i] = m1[8][i] + m1[12][i];
|
||||
m2[9][i] = m1[9][i] + m1[13][i];
|
||||
m2[10][i] = m1[10][i] + m1[14][i];
|
||||
m2[11][i] = m1[11][i] + m1[15][i];
|
||||
m2[12][i] = m1[8][i] - m1[12][i];
|
||||
m2[13][i] = m1[9][i] - m1[13][i];
|
||||
m2[14][i] = m1[10][i] - m1[14][i];
|
||||
m2[15][i] = m1[11][i] - m1[15][i];
|
||||
|
||||
m1[0][i] = m2[0][i] + m2[2][i];
|
||||
m1[1][i] = m2[1][i] + m2[3][i];
|
||||
m1[2][i] = m2[0][i] - m2[2][i];
|
||||
m1[3][i] = m2[1][i] - m2[3][i];
|
||||
m1[4][i] = m2[4][i] + m2[6][i];
|
||||
m1[5][i] = m2[5][i] + m2[7][i];
|
||||
m1[6][i] = m2[4][i] - m2[6][i];
|
||||
m1[7][i] = m2[5][i] - m2[7][i];
|
||||
m1[8][i] = m2[8][i] + m2[10][i];
|
||||
m1[9][i] = m2[9][i] + m2[11][i];
|
||||
m1[10][i] = m2[8][i] - m2[10][i];
|
||||
m1[11][i] = m2[9][i] - m2[11][i];
|
||||
m1[12][i] = m2[12][i] + m2[14][i];
|
||||
m1[13][i] = m2[13][i] + m2[15][i];
|
||||
m1[14][i] = m2[12][i] - m2[14][i];
|
||||
m1[15][i] = m2[13][i] - m2[15][i];
|
||||
|
||||
m2[0][i] = m1[0][i] + m1[1][i];
|
||||
m2[1][i] = m1[0][i] - m1[1][i];
|
||||
m2[2][i] = m1[2][i] + m1[3][i];
|
||||
m2[3][i] = m1[2][i] - m1[3][i];
|
||||
m2[4][i] = m1[4][i] + m1[5][i];
|
||||
m2[5][i] = m1[4][i] - m1[5][i];
|
||||
m2[6][i] = m1[6][i] + m1[7][i];
|
||||
m2[7][i] = m1[6][i] - m1[7][i];
|
||||
m2[8][i] = m1[8][i] + m1[9][i];
|
||||
m2[9][i] = m1[8][i] - m1[9][i];
|
||||
m2[10][i] = m1[10][i] + m1[11][i];
|
||||
m2[11][i] = m1[10][i] - m1[11][i];
|
||||
m2[12][i] = m1[12][i] + m1[13][i];
|
||||
m2[13][i] = m1[12][i] - m1[13][i];
|
||||
m2[14][i] = m1[14][i] + m1[15][i];
|
||||
m2[15][i] = m1[14][i] - m1[15][i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
sad += abs(m2[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
sad -= abs(m2[0][0]);
|
||||
sad += abs(m2[0][0]) >> 2;
|
||||
sad = (int)(sad / sqrt(16.0 * 8) * 2);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
static uint64_t xCalcHADs4x8(const uvg_pixel* piOrg, const uvg_pixel* piCur, int iStrideOrg, int iStrideCur)
|
||||
{
|
||||
int k, i, j, jj, sad = 0;
|
||||
int diff[32], m1[8][4], m2[8][4];
|
||||
for (k = 0; k < 32; k += 4)
|
||||
{
|
||||
diff[k + 0] = piOrg[0] - piCur[0];
|
||||
diff[k + 1] = piOrg[1] - piCur[1];
|
||||
diff[k + 2] = piOrg[2] - piCur[2];
|
||||
diff[k + 3] = piOrg[3] - piCur[3];
|
||||
|
||||
piCur += iStrideCur;
|
||||
piOrg += iStrideOrg;
|
||||
}
|
||||
|
||||
//horizontal
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
jj = j << 2;
|
||||
m2[j][0] = diff[jj] + diff[jj + 2];
|
||||
m2[j][1] = diff[jj + 1] + diff[jj + 3];
|
||||
m2[j][2] = diff[jj] - diff[jj + 2];
|
||||
m2[j][3] = diff[jj + 1] - diff[jj + 3];
|
||||
|
||||
m1[j][0] = m2[j][0] + m2[j][1];
|
||||
m1[j][1] = m2[j][0] - m2[j][1];
|
||||
m1[j][2] = m2[j][2] + m2[j][3];
|
||||
m1[j][3] = m2[j][2] - m2[j][3];
|
||||
}
|
||||
|
||||
//vertical
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
m2[0][i] = m1[0][i] + m1[4][i];
|
||||
m2[1][i] = m1[1][i] + m1[5][i];
|
||||
m2[2][i] = m1[2][i] + m1[6][i];
|
||||
m2[3][i] = m1[3][i] + m1[7][i];
|
||||
m2[4][i] = m1[0][i] - m1[4][i];
|
||||
m2[5][i] = m1[1][i] - m1[5][i];
|
||||
m2[6][i] = m1[2][i] - m1[6][i];
|
||||
m2[7][i] = m1[3][i] - m1[7][i];
|
||||
|
||||
m1[0][i] = m2[0][i] + m2[2][i];
|
||||
m1[1][i] = m2[1][i] + m2[3][i];
|
||||
m1[2][i] = m2[0][i] - m2[2][i];
|
||||
m1[3][i] = m2[1][i] - m2[3][i];
|
||||
m1[4][i] = m2[4][i] + m2[6][i];
|
||||
m1[5][i] = m2[5][i] + m2[7][i];
|
||||
m1[6][i] = m2[4][i] - m2[6][i];
|
||||
m1[7][i] = m2[5][i] - m2[7][i];
|
||||
|
||||
m2[0][i] = m1[0][i] + m1[1][i];
|
||||
m2[1][i] = m1[0][i] - m1[1][i];
|
||||
m2[2][i] = m1[2][i] + m1[3][i];
|
||||
m2[3][i] = m1[2][i] - m1[3][i];
|
||||
m2[4][i] = m1[4][i] + m1[5][i];
|
||||
m2[5][i] = m1[4][i] - m1[5][i];
|
||||
m2[6][i] = m1[6][i] + m1[7][i];
|
||||
m2[7][i] = m1[6][i] - m1[7][i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
sad += abs(m2[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
sad -= abs(m2[0][0]);
|
||||
sad += abs(m2[0][0]) >> 2;
|
||||
sad = (int)(sad / sqrt(4.0 * 8) * 2);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
static uint64_t xCalcHADs8x4(const uvg_pixel* piOrg, const uvg_pixel* piCur, int iStrideOrg, int iStrideCur)
|
||||
{
|
||||
int k, i, j, jj, sad = 0;
|
||||
int diff[32], m1[4][8], m2[4][8];
|
||||
for (k = 0; k < 32; k += 8)
|
||||
{
|
||||
diff[k + 0] = piOrg[0] - piCur[0];
|
||||
diff[k + 1] = piOrg[1] - piCur[1];
|
||||
diff[k + 2] = piOrg[2] - piCur[2];
|
||||
diff[k + 3] = piOrg[3] - piCur[3];
|
||||
diff[k + 4] = piOrg[4] - piCur[4];
|
||||
diff[k + 5] = piOrg[5] - piCur[5];
|
||||
diff[k + 6] = piOrg[6] - piCur[6];
|
||||
diff[k + 7] = piOrg[7] - piCur[7];
|
||||
|
||||
piCur += iStrideCur;
|
||||
piOrg += iStrideOrg;
|
||||
}
|
||||
|
||||
//horizontal
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
jj = j << 3;
|
||||
|
||||
m2[j][0] = diff[jj] + diff[jj + 4];
|
||||
m2[j][1] = diff[jj + 1] + diff[jj + 5];
|
||||
m2[j][2] = diff[jj + 2] + diff[jj + 6];
|
||||
m2[j][3] = diff[jj + 3] + diff[jj + 7];
|
||||
m2[j][4] = diff[jj] - diff[jj + 4];
|
||||
m2[j][5] = diff[jj + 1] - diff[jj + 5];
|
||||
m2[j][6] = diff[jj + 2] - diff[jj + 6];
|
||||
m2[j][7] = diff[jj + 3] - diff[jj + 7];
|
||||
|
||||
m1[j][0] = m2[j][0] + m2[j][2];
|
||||
m1[j][1] = m2[j][1] + m2[j][3];
|
||||
m1[j][2] = m2[j][0] - m2[j][2];
|
||||
m1[j][3] = m2[j][1] - m2[j][3];
|
||||
m1[j][4] = m2[j][4] + m2[j][6];
|
||||
m1[j][5] = m2[j][5] + m2[j][7];
|
||||
m1[j][6] = m2[j][4] - m2[j][6];
|
||||
m1[j][7] = m2[j][5] - m2[j][7];
|
||||
|
||||
m2[j][0] = m1[j][0] + m1[j][1];
|
||||
m2[j][1] = m1[j][0] - m1[j][1];
|
||||
m2[j][2] = m1[j][2] + m1[j][3];
|
||||
m2[j][3] = m1[j][2] - m1[j][3];
|
||||
m2[j][4] = m1[j][4] + m1[j][5];
|
||||
m2[j][5] = m1[j][4] - m1[j][5];
|
||||
m2[j][6] = m1[j][6] + m1[j][7];
|
||||
m2[j][7] = m1[j][6] - m1[j][7];
|
||||
}
|
||||
|
||||
//vertical
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
m1[0][i] = m2[0][i] + m2[2][i];
|
||||
m1[1][i] = m2[1][i] + m2[3][i];
|
||||
m1[2][i] = m2[0][i] - m2[2][i];
|
||||
m1[3][i] = m2[1][i] - m2[3][i];
|
||||
|
||||
m2[0][i] = m1[0][i] + m1[1][i];
|
||||
m2[1][i] = m1[0][i] - m1[1][i];
|
||||
m2[2][i] = m1[2][i] + m1[3][i];
|
||||
m2[3][i] = m1[2][i] - m1[3][i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
sad += abs(m2[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
sad -= abs(m2[0][0]);
|
||||
sad += abs(m2[0][0]) >> 2;
|
||||
sad = (int)(sad / sqrt(4.0 * 8) * 2);
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
|
||||
static uint64_t xGetHADs(int width, int height, const uvg_pixel* ref_in, int ref_stride, const uvg_pixel* pred_in, int pred_stride)
|
||||
{
|
||||
const uvg_pixel* piOrg = ref_in;
|
||||
const uvg_pixel* piCur = pred_in;
|
||||
const int iRows = height;
|
||||
const int iCols = width;
|
||||
const int iStrideOrg = ref_stride;
|
||||
const int iStrideCur = pred_stride;
|
||||
|
||||
int x = 0, y = 0;
|
||||
|
||||
uint64_t uiSum = 0;
|
||||
|
||||
if (iCols > iRows && (iRows & 7) == 0 && (iCols & 15) == 0)
|
||||
{
|
||||
for (y = 0; y < iRows; y += 8)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 16)
|
||||
{
|
||||
uiSum += xCalcHADs16x8(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
|
||||
}
|
||||
piOrg += iStrideOrg * 8;
|
||||
piCur += iStrideCur * 8;
|
||||
}
|
||||
}
|
||||
else if (iCols < iRows && (iCols & 7) == 0 && (iRows & 15) == 0)
|
||||
{
|
||||
for (y = 0; y < iRows; y += 16)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 8)
|
||||
{
|
||||
uiSum += xCalcHADs8x16(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
|
||||
}
|
||||
piOrg += iStrideOrg * 16;
|
||||
piCur += iStrideCur * 16;
|
||||
}
|
||||
}
|
||||
else if (iCols > iRows && (iRows & 3) == 0 && (iCols & 7) == 0)
|
||||
{
|
||||
for (y = 0; y < iRows; y += 4)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 8)
|
||||
{
|
||||
uiSum += xCalcHADs8x4(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
|
||||
}
|
||||
piOrg += iStrideOrg * 4;
|
||||
piCur += iStrideCur * 4;
|
||||
}
|
||||
}
|
||||
else if (iCols < iRows && (iCols & 3) == 0 && (iRows & 7) == 0)
|
||||
{
|
||||
for (y = 0; y < iRows; y += 8)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 4)
|
||||
{
|
||||
uiSum += xCalcHADs4x8(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
|
||||
}
|
||||
piOrg += iStrideOrg * 8;
|
||||
piCur += iStrideCur * 8;
|
||||
}
|
||||
}
|
||||
else if ((iRows % 8 == 0) && (iCols % 8 == 0))
|
||||
{
|
||||
for (y = 0; y < iRows; y += 8)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 8)
|
||||
{
|
||||
uiSum += satd_8x8_subblock_generic(&piOrg[x], iStrideOrg, &piCur[x], iStrideCur);
|
||||
}
|
||||
piOrg += 8 * iStrideOrg;
|
||||
piCur += 8 * iStrideCur;
|
||||
}
|
||||
}
|
||||
else if ((iRows % 4 == 0) && (iCols % 4 == 0))
|
||||
{
|
||||
for (y = 0; y < iRows; y += 4)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 4)
|
||||
{
|
||||
uiSum += uvg_satd_4x4_subblock_generic(&piOrg[x], iStrideOrg, &piCur[x], iStrideCur);
|
||||
}
|
||||
piOrg += 4 * iStrideOrg;
|
||||
piCur += 4 * iStrideCur;
|
||||
}
|
||||
}
|
||||
else if ((iRows % 2 == 0) && (iCols % 2 == 0))
|
||||
{
|
||||
for (y = 0; y < iRows; y += 2)
|
||||
{
|
||||
for (x = 0; x < iCols; x += 2)
|
||||
{
|
||||
uiSum += xCalcHADs2x2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
|
||||
}
|
||||
piOrg += 2 * iStrideOrg;
|
||||
piCur += 2 * iStrideCur;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: 10 bit
|
||||
return (uiSum >> 0);
|
||||
}
|
||||
|
||||
|
||||
// Function macro for defining SAD calculating functions
|
||||
// for fixed size blocks.
|
||||
#define SAD_NXN(n, pixel_type) \
|
||||
|
@ -539,12 +1111,12 @@ SAD_DUAL_NXN(64, uvg_pixel)
|
|||
|
||||
static unsigned pixels_calc_ssd_generic(const uvg_pixel *const ref, const uvg_pixel *const rec,
|
||||
const int ref_stride, const int rec_stride,
|
||||
const int width)
|
||||
const int width, const int height)
|
||||
{
|
||||
int ssd = 0;
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
int diff = ref[x + y * ref_stride] - rec[x + y * rec_stride];
|
||||
ssd += diff * diff;
|
||||
|
@ -783,10 +1355,10 @@ static double pixel_var_generic(const uvg_pixel *arr, const uint32_t len)
|
|||
|
||||
|
||||
static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual,
|
||||
int width, int ref_stride, int pred_stride)
|
||||
int width, int height, int ref_stride, int pred_stride)
|
||||
{
|
||||
int y, x;
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
residual[x + y * width] = (int16_t)(ref_in[x + y * ref_stride] - pred_in[x + y * pred_stride]);
|
||||
}
|
||||
|
@ -897,6 +1469,7 @@ int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
|
|||
success &= uvg_strategyselector_register(opaque, "satd_32x32_dual", "generic", 0, &satd_32x32_dual_generic);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_64x64_dual", "generic", 0, &satd_64x64_dual_generic);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_any_size", "generic", 0, &satd_any_size_generic);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_any_size_vtm", "generic", 0, &xGetHADs);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_any_size_quad", "generic", 0, &satd_any_size_quad_generic);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "pixels_calc_ssd", "generic", 0, &pixels_calc_ssd_generic);
|
||||
|
|
|
@ -44,7 +44,6 @@
|
|||
#include "fast_coeff_cost.h"
|
||||
#include "reshape.h"
|
||||
|
||||
#define QUANT_SHIFT 14
|
||||
/**
|
||||
* \brief quantize transformed coefficents
|
||||
*
|
||||
|
@ -62,22 +61,28 @@ void uvg_quant_generic(
|
|||
uint8_t lfnst_idx)
|
||||
{
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
const uint32_t log2_block_size = uvg_g_convert_to_bit[width] + 2;
|
||||
const uint32_t * const scan = uvg_g_sig_last_scan[scan_idx][log2_block_size - 1];
|
||||
const uint32_t log2_tr_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
|
||||
const uint32_t * const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_idx, log2_tr_width, log2_tr_height);
|
||||
|
||||
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
|
||||
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
|
||||
uint32_t log2_tr_width = uvg_math_floor_log2(height);
|
||||
uint32_t log2_tr_height = uvg_math_floor_log2(width);
|
||||
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
|
||||
needs_block_size_trafo_scale |= 0; // Non log2 block size
|
||||
|
||||
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)color;
|
||||
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
|
||||
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); //!< Represents scaling through forward transform
|
||||
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1) - needs_block_size_trafo_scale; //!< Represents scaling through forward transform
|
||||
const int64_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift );
|
||||
const int32_t add = ((state->frame->slicetype == UVG_SLICE_I) ? 171 : 85) << (q_bits - 9);
|
||||
const int32_t q_bits8 = q_bits - 8;
|
||||
|
||||
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
|
||||
|
||||
uint32_t ac_sum = 0;
|
||||
|
||||
const bool use_scaling_list = state->encoder_control->cfg.scaling_list != UVG_SCALING_LIST_OFF;
|
||||
|
||||
if(lfnst_idx == 0){
|
||||
for (int32_t n = 0; n < width * height; n++) {
|
||||
int32_t level = coef[n];
|
||||
|
@ -86,7 +91,7 @@ void uvg_quant_generic(
|
|||
|
||||
sign = (level < 0 ? -1 : 1);
|
||||
|
||||
int32_t curr_quant_coeff = quant_coeff[n];
|
||||
int32_t curr_quant_coeff = use_scaling_list ? quant_coeff[n] : default_quant_coeff;
|
||||
level = (int32_t)((abs_level * curr_quant_coeff + add) >> q_bits);
|
||||
ac_sum += level;
|
||||
|
||||
|
@ -237,6 +242,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
encoder_state_t* const state,
|
||||
const cu_info_t* const cur_cu,
|
||||
const int width,
|
||||
const int height,
|
||||
const coeff_scan_order_t scan_order,
|
||||
const int in_stride, const int out_stride,
|
||||
const uvg_pixel* const u_ref_in,
|
||||
|
@ -247,28 +253,28 @@ int uvg_quant_cbcr_residual_generic(
|
|||
uvg_pixel* v_rec_out,
|
||||
coeff_t* coeff_out,
|
||||
bool early_skip,
|
||||
int lmcs_chroma_adj, enum uvg_tree_type tree_type
|
||||
) {
|
||||
int lmcs_chroma_adj, enum uvg_tree_type tree_type)
|
||||
{
|
||||
ALIGNED(64) int16_t u_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) int16_t combined_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
|
||||
// TODO: this function is not fully converted to handle non-square blocks
|
||||
{
|
||||
int y, x;
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
u_residual[x + y * width] = (int16_t)(u_ref_in[x + y * in_stride] - u_pred_in[x + y * in_stride]);
|
||||
v_residual[x + y * width] = (int16_t)(v_ref_in[x + y * in_stride] - v_pred_in[x + y * in_stride]);
|
||||
}
|
||||
}
|
||||
}
|
||||
uvg_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride);
|
||||
uvg_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride);
|
||||
uvg_generate_residual(u_ref_in, u_pred_in, u_residual, width, height, in_stride, in_stride);
|
||||
uvg_generate_residual(v_ref_in, v_pred_in, v_residual, width, height, in_stride, in_stride);
|
||||
|
||||
|
||||
const int cbf_mask = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
|
||||
for (int y = 0; y < width; y++)
|
||||
for (int y = 0; y < height; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
|
@ -305,33 +311,44 @@ int uvg_quant_cbcr_residual_generic(
|
|||
}
|
||||
|
||||
|
||||
uvg_transform2d(state->encoder_control, combined_residual, coeff, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
if(cur_cu->cr_lfnst_idx) {
|
||||
uvg_fwd_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
|
||||
uvg_transform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
uint8_t lfnst_idx = tree_type == UVG_CHROMA_T ? cur_cu->cr_lfnst_idx : cur_cu->lfnst_idx;
|
||||
if(lfnst_idx) {
|
||||
uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, lfnst_idx, coeff, tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
int abs_sum = 0;
|
||||
if (!false && state->encoder_control->cfg.dep_quant) {
|
||||
uvg_dep_quant(
|
||||
state,
|
||||
cur_cu,
|
||||
width,
|
||||
height,
|
||||
coeff,
|
||||
coeff_out,
|
||||
COLOR_U,
|
||||
tree_type,
|
||||
&abs_sum,
|
||||
state->encoder_control->cfg.scaling_list);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
|
||||
cur_cu->cr_lfnst_idx);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->cbf, lfnst_idx, 0);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && false) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
|
||||
scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, cur_cu->lfnst_idx);
|
||||
uvg_quant(state, coeff, coeff_out, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false, lfnst_idx);
|
||||
}
|
||||
|
||||
int8_t has_coeffs = 0;
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width * width; ++i) {
|
||||
for (i = 0; i < width * height; ++i) {
|
||||
if (coeff_out[i] != 0) {
|
||||
has_coeffs = 1;
|
||||
break;
|
||||
|
@ -342,13 +359,13 @@ int uvg_quant_cbcr_residual_generic(
|
|||
if (has_coeffs && !early_skip) {
|
||||
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_dequant(state, coeff_out, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
if (cur_cu->cr_lfnst_idx) {
|
||||
uvg_inv_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
|
||||
if (lfnst_idx) {
|
||||
uvg_inv_lfnst(cur_cu, width, height, COLOR_UV, lfnst_idx, coeff, tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
|
||||
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
|
||||
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
|
@ -371,7 +388,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
//}
|
||||
const int temp = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
|
||||
// Get quantized reconstruction. (residual + pred_in -> rec_out)
|
||||
for (int y = 0; y < width; y++) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
if (temp == 2) {
|
||||
u_residual[x + y * width] = combined_residual[x + y * width];
|
||||
|
@ -400,7 +417,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
}
|
||||
}
|
||||
}
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
int16_t u_val = u_residual[x + y * width] + u_pred_in[x + y * in_stride];
|
||||
u_rec_out[x + y * out_stride] = (uvg_pixel)CLIP(0, PIXEL_MAX, u_val);
|
||||
|
@ -413,7 +430,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
// With no coeffs and rec_out == pred_int we skip copying the coefficients
|
||||
// because the reconstruction is just the prediction.
|
||||
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u_rec_out[x + y * out_stride] = u_pred_in[x + y * in_stride];
|
||||
v_rec_out[x + y * out_stride] = v_pred_in[x + y * in_stride];
|
||||
|
@ -441,7 +458,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
* \returns Whether coeff_out contains any non-zero coefficients.
|
||||
*/
|
||||
int uvg_quantize_residual_generic(encoder_state_t *const state,
|
||||
const cu_info_t *const cur_cu, const int width, const color_t color,
|
||||
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
|
||||
const coeff_scan_order_t scan_order, const int use_trskip,
|
||||
const int in_stride, const int out_stride,
|
||||
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
|
||||
|
@ -454,19 +471,19 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
|
||||
int has_coeffs = 0;
|
||||
|
||||
assert(width <= TR_MAX_WIDTH);
|
||||
assert(width >= TR_MIN_WIDTH);
|
||||
|
||||
const int height = width; // TODO: height for non-square blocks
|
||||
// With ISP these checks no longer apply, since width and height 2 is now possible
|
||||
// With MTT even 1x16 and 16x1 ISP splits are possible
|
||||
//assert(width <= TR_MAX_WIDTH && height <= TR_MAX_WIDTH);
|
||||
//assert(width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH);
|
||||
|
||||
// Get residual. (ref_in - pred_in -> residual)
|
||||
uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
|
||||
uvg_generate_residual(ref_in, pred_in, residual, width, height, in_stride, in_stride);
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
int y, x;
|
||||
int sign, absval;
|
||||
int maxAbsclipBD = (1 << UVG_BIT_DEPTH) - 1;
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
sign = residual[x + y * width] >= 0 ? 1 : -1;
|
||||
absval = sign * residual[x + y * width];
|
||||
|
@ -477,43 +494,54 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
|
||||
// Transform residual. (residual -> coeff)
|
||||
if (use_trskip) {
|
||||
uvg_transformskip(state->encoder_control, residual, coeff, width);
|
||||
uvg_transformskip(state->encoder_control, residual, coeff, width, height);
|
||||
}
|
||||
else {
|
||||
uvg_transform2d(state->encoder_control, residual, coeff, width, color, cur_cu);
|
||||
uvg_transform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
|
||||
}
|
||||
|
||||
const uint8_t lfnst_index = color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;
|
||||
const uint8_t lfnst_index = tree_type != UVG_CHROMA_T || color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;
|
||||
|
||||
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {
|
||||
// Forward low frequency non-separable transform
|
||||
uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type);
|
||||
uvg_fwd_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
|
||||
|
||||
// Quantize coeffs. (coeff -> coeff_out)
|
||||
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
int abs_sum = 0;
|
||||
if (!use_trskip && state->encoder_control->cfg.dep_quant) {
|
||||
uvg_dep_quant(
|
||||
state,
|
||||
cur_cu,
|
||||
width,
|
||||
height,
|
||||
coeff,
|
||||
coeff_out,
|
||||
color,
|
||||
tree_type,
|
||||
&abs_sum,
|
||||
state->encoder_control->cfg.scaling_list);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
|
||||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, color,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
|
||||
lfnst_index);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order, cur_cu->type, cur_cu->cbf, lfnst_index, color == 0 ? cur_cu->tr_idx : 0);
|
||||
} else if(state->encoder_control->cfg.rdoq_enable && use_trskip) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, color,
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order);
|
||||
} else {
|
||||
|
||||
uvg_quant(state, coeff, coeff_out, width, width, color,
|
||||
uvg_quant(state, coeff, coeff_out, width, height, color,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y, lfnst_index);
|
||||
}
|
||||
|
||||
// Check if there are any non-zero coefficients.
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < width * width; ++i) {
|
||||
for (i = 0; i < width * height; ++i) {
|
||||
if (coeff_out[i] != 0) {
|
||||
has_coeffs = 1;
|
||||
break;
|
||||
|
@ -527,25 +555,25 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
int y, x;
|
||||
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, color,
|
||||
uvg_dequant(state, coeff_out, coeff, width, height, color,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
|
||||
|
||||
if (state->encoder_control->cfg.lfnst && cur_cu->type == CU_INTRA) {
|
||||
// Inverse low frequency non-separable transform
|
||||
uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type);
|
||||
uvg_inv_lfnst(cur_cu, width, height, color, lfnst_index, coeff, tree_type, state->collocated_luma_mode);
|
||||
}
|
||||
if (use_trskip) {
|
||||
uvg_itransformskip(state->encoder_control, residual, coeff, width);
|
||||
uvg_itransformskip(state->encoder_control, residual, coeff, width, height);
|
||||
}
|
||||
else {
|
||||
uvg_itransform2d(state->encoder_control, residual, coeff, width, color, cur_cu);
|
||||
uvg_itransform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
|
||||
}
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
int y, x;
|
||||
int sign, absval;
|
||||
int maxAbsclipBD = (1 << UVG_BIT_DEPTH) - 1;
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
residual[x + y * width] = (int16_t)CLIP((int16_t)(-maxAbsclipBD - 1), (int16_t)maxAbsclipBD, residual[x + y * width]);
|
||||
sign = residual[x + y * width] >= 0 ? 1 : -1;
|
||||
|
@ -561,7 +589,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
}
|
||||
|
||||
// Get quantized reconstruction. (residual + pred_in -> rec_out)
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
int16_t val = residual[x + y * width] + pred_in[x + y * in_stride];
|
||||
rec_out[x + y * out_stride] = (uvg_pixel)CLIP(0, PIXEL_MAX, val);
|
||||
|
@ -573,7 +601,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
// because the reconstruction is just the prediction.
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
rec_out[x + y * out_stride] = pred_in[x + y * in_stride];
|
||||
}
|
||||
|
@ -590,23 +618,29 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,color_t color, int8_t block_type, int8_t transform_skip)
|
||||
{
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
if(encoder->cfg.dep_quant && !transform_skip) {
|
||||
uvg_dep_quant_dequant(state, block_type, width, height, color, q_coef, coef, encoder->cfg.scaling_list);
|
||||
return;
|
||||
}
|
||||
int32_t shift,add,coeff_q;
|
||||
int32_t n;
|
||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((uvg_math_floor_log2(width) + uvg_math_floor_log2(height)) >> 1); // Represents scaling through forward transform
|
||||
const uint32_t log2_tr_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_tr_height = uvg_g_convert_to_log2[height];
|
||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); // Represents scaling through forward transform
|
||||
|
||||
bool needs_block_size_trafo_scale = !transform_skip && ((log2_tr_height + log2_tr_width) % 2 == 1);
|
||||
needs_block_size_trafo_scale |= 0; // Non log2 block size
|
||||
|
||||
int32_t qp_scaled = uvg_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
|
||||
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
|
||||
|
||||
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift);
|
||||
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift - needs_block_size_trafo_scale);
|
||||
|
||||
if (encoder->scaling_list.enable)
|
||||
{
|
||||
uint32_t log2_tr_width = uvg_math_floor_log2(height) + 2;
|
||||
uint32_t log2_tr_height = uvg_math_floor_log2(width) + 2;
|
||||
int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)(color);
|
||||
|
||||
const int32_t *dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width -2][log2_tr_height -2][scalinglist_type][qp_scaled%6];
|
||||
const int32_t *dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
|
||||
shift += 4;
|
||||
|
||||
if (shift >qp_scaled / 6) {
|
||||
|
@ -624,7 +658,7 @@ void uvg_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
|
|||
}
|
||||
}
|
||||
} else {
|
||||
int32_t scale = uvg_g_inv_quant_scales[qp_scaled%6] << (qp_scaled/6);
|
||||
int32_t scale = uvg_g_inv_quant_scales[needs_block_size_trafo_scale][qp_scaled%6] << (qp_scaled/6);
|
||||
add = 1 << (shift-1);
|
||||
|
||||
for (n = 0; n < width * height; n++) {
|
||||
|
@ -651,14 +685,15 @@ static INLINE void get_coeff_weights(uint64_t wts_packed, uint16_t *weights)
|
|||
weights[3] = (wts_packed >> 48) & 0xffff;
|
||||
}
|
||||
|
||||
static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, uint64_t weights)
|
||||
static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights)
|
||||
{
|
||||
assert((width == height) && "Non-square block handling not implemented for this function.");
|
||||
uint32_t sum = 0;
|
||||
uint16_t weights_unpacked[4];
|
||||
|
||||
get_coeff_weights(weights, weights_unpacked);
|
||||
|
||||
for (int32_t i = 0; i < width * width; i++) {
|
||||
for (int32_t i = 0; i < width * height; i++) {
|
||||
int16_t curr = coeff[i];
|
||||
uint32_t curr_abs = abs(curr);
|
||||
if (curr_abs > 3) {
|
||||
|
|
|
@ -44,8 +44,6 @@
|
|||
#include "uvg266.h"
|
||||
#include "tables.h"
|
||||
|
||||
#define QUANT_SHIFT 14
|
||||
|
||||
int uvg_strategy_register_quant_generic(void* opaque, uint8_t bitdepth);
|
||||
void uvg_quant_generic(
|
||||
const encoder_state_t * const state,
|
||||
|
@ -60,7 +58,7 @@ void uvg_quant_generic(
|
|||
uint8_t lfnst_idx);
|
||||
|
||||
int uvg_quantize_residual_generic(encoder_state_t *const state,
|
||||
const cu_info_t *const cur_cu, const int width, const color_t color,
|
||||
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
|
||||
const coeff_scan_order_t scan_order, const int use_trskip,
|
||||
const int in_stride, const int out_stride,
|
||||
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
|
||||
|
@ -71,6 +69,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
encoder_state_t* const state,
|
||||
const cu_info_t* const cur_cu,
|
||||
const int width,
|
||||
const int height,
|
||||
const coeff_scan_order_t scan_order,
|
||||
const int in_stride, const int out_stride,
|
||||
const uvg_pixel* const u_ref_in,
|
||||
|
|
|
@ -44,6 +44,7 @@ dct_func * uvg_dct_4x4 = 0;
|
|||
dct_func * uvg_dct_8x8 = 0;
|
||||
dct_func * uvg_dct_16x16 = 0;
|
||||
dct_func * uvg_dct_32x32 = 0;
|
||||
dct_func * uvg_dct_non_square = 0;
|
||||
|
||||
dct_func * uvg_fast_inverse_dst_4x4 = 0;
|
||||
|
||||
|
@ -56,16 +57,19 @@ void(*uvg_mts_dct)(int8_t bitdepth,
|
|||
color_t color,
|
||||
const cu_info_t *tu,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
const int16_t *input,
|
||||
int16_t *output,
|
||||
const int8_t mts_idx);
|
||||
const int8_t mts_type);
|
||||
|
||||
void(*uvg_mts_idct)(int8_t bitdepth,
|
||||
color_t color,
|
||||
const cu_info_t *tu,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
const int16_t *input,
|
||||
int16_t *output,
|
||||
const int8_t mts_idx);
|
||||
const int8_t mts_type);
|
||||
|
||||
|
||||
int uvg_strategy_register_dct(void* opaque, uint8_t bitdepth) {
|
||||
|
@ -90,8 +94,13 @@ int uvg_strategy_register_dct(void* opaque, uint8_t bitdepth) {
|
|||
*
|
||||
* \returns Pointer to the function.
|
||||
*/
|
||||
dct_func * uvg_get_dct_func(int8_t width, color_t color, cu_type_t type)
|
||||
dct_func * uvg_get_dct_func(int8_t width, int8_t height, color_t color, cu_type_t type)
|
||||
{
|
||||
if (width != height) {
|
||||
// Non-square block. Return generic dct for non-square blokcs.
|
||||
assert(false && "This should never be called at this point. Non-square stuff is done inside mts_dct function.");
|
||||
//return uvg_dct_non_square;
|
||||
}
|
||||
switch (width) {
|
||||
case 4:
|
||||
//if (color == COLOR_Y && type == CU_INTRA) {
|
||||
|
@ -119,8 +128,13 @@ dct_func * uvg_get_dct_func(int8_t width, color_t color, cu_type_t type)
|
|||
*
|
||||
* \returns Pointer to the function.
|
||||
*/
|
||||
dct_func * uvg_get_idct_func(int8_t width, color_t color, cu_type_t type)
|
||||
dct_func * uvg_get_idct_func(int8_t width, int8_t height, color_t color, cu_type_t type)
|
||||
{
|
||||
if (width != height) {
|
||||
// Non-square block. Return generic dct for non-square blokcs.
|
||||
assert(false && "This should never be called at this point. Non-square stuff is done inside mts_idct function.");
|
||||
//return uvg_idct_non_square;
|
||||
}
|
||||
switch (width) {
|
||||
case 4:
|
||||
//if (color == COLOR_Y && type == CU_INTRA) {
|
||||
|
|
|
@ -51,6 +51,7 @@ extern dct_func * uvg_dct_4x4;
|
|||
extern dct_func * uvg_dct_8x8;
|
||||
extern dct_func * uvg_dct_16x16;
|
||||
extern dct_func * uvg_dct_32x32;
|
||||
extern dct_func * uvg_dct_non_square;
|
||||
|
||||
extern dct_func * uvg_fast_inverse_dst_4x4;
|
||||
|
||||
|
@ -64,9 +65,10 @@ typedef void (mts_dct_func)(
|
|||
color_t color,
|
||||
const cu_info_t* tu,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
const int16_t* input,
|
||||
int16_t* output,
|
||||
const int8_t mts_idx);
|
||||
const int8_t mts_type);
|
||||
|
||||
extern mts_dct_func* uvg_mts_dct;
|
||||
|
||||
|
@ -75,15 +77,16 @@ typedef void (mts_idct_func)(
|
|||
color_t color,
|
||||
const cu_info_t* tu,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
const int16_t* input,
|
||||
int16_t* output,
|
||||
const int8_t mts_idx);
|
||||
const int8_t mts_type);
|
||||
|
||||
extern mts_idct_func* uvg_mts_idct;
|
||||
|
||||
int uvg_strategy_register_dct(void* opaque, uint8_t bitdepth);
|
||||
dct_func * uvg_get_dct_func(int8_t width, color_t color, cu_type_t type);
|
||||
dct_func * uvg_get_idct_func(int8_t width, color_t color, cu_type_t type);
|
||||
dct_func * uvg_get_dct_func(int8_t width, int8_t height, color_t color, cu_type_t type);
|
||||
dct_func * uvg_get_idct_func(int8_t width, int8_t height, color_t color, cu_type_t type);
|
||||
|
||||
|
||||
|
||||
|
|
55
src/strategies/strategies-depquant.c
Normal file
55
src/strategies/strategies-depquant.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
#include "strategies/strategies-depquant.h"
|
||||
|
||||
#include "strategies/avx2/depquant-avx2.h"
|
||||
#include "strategies/generic/depquant-generic.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
|
||||
// Define function pointers.
|
||||
dep_quant_decide_and_update_func* uvg_dep_quant_decide_and_update;
|
||||
find_first_non_zero_coeff_func* uvg_find_first_non_zero_coeff;
|
||||
|
||||
|
||||
int uvg_strategy_register_depquant(void *opaque, uint8_t bitdepth)
|
||||
{
|
||||
bool success = true;
|
||||
|
||||
success &= uvg_strategy_register_depquant_generic(opaque, bitdepth);
|
||||
|
||||
if (uvg_g_hardware_flags.intel_flags.avx2) {
|
||||
success &= uvg_strategy_register_depquant_avx2(opaque, bitdepth);
|
||||
}
|
||||
return success;
|
||||
}
|
88
src/strategies/strategies-depquant.h
Normal file
88
src/strategies/strategies-depquant.h
Normal file
|
@ -0,0 +1,88 @@
|
|||
#ifndef STRATEGIES_DEPQUANT_H_
|
||||
#define STRATEGIES_DEPQUANT_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* \ingroup Optimization
|
||||
* \file
|
||||
* Interface for sao functions.
|
||||
*/
|
||||
|
||||
#include "encoder.h"
|
||||
#include "encoderstate.h"
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "uvg266.h"
|
||||
#include "dep_quant.h"
|
||||
|
||||
|
||||
// Declare function pointers.
|
||||
typedef int(dep_quant_decide_and_update_func)(
|
||||
rate_estimator_t* re,
|
||||
context_store* ctxs,
|
||||
struct dep_quant_scan_info const* const scan_info,
|
||||
const coeff_t absCoeff,
|
||||
const uint32_t scan_pos,
|
||||
const uint32_t width_in_sbb,
|
||||
const uint32_t height_in_sbb,
|
||||
const NbInfoSbb next_nb_info_ssb,
|
||||
bool zeroOut,
|
||||
coeff_t quantCoeff,
|
||||
const uint32_t effWidth,
|
||||
const uint32_t effHeight,
|
||||
bool is_chroma);
|
||||
|
||||
typedef void (find_first_non_zero_coeff_func)(
|
||||
const coeff_t* srcCoeff,
|
||||
const bool enableScalingLists,
|
||||
const context_store* const dep_quant_context,
|
||||
const uint32_t* const scan,
|
||||
const int32_t* q_coeff,
|
||||
int* firstTestPos,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
|
||||
// Declare function pointers.
|
||||
extern dep_quant_decide_and_update_func* uvg_dep_quant_decide_and_update;
|
||||
extern find_first_non_zero_coeff_func* uvg_find_first_non_zero_coeff;
|
||||
|
||||
int uvg_strategy_register_depquant(void* opaque, uint8_t bitdepth);
|
||||
|
||||
|
||||
#define STRATEGIES_DEPQUANT_EXPORTS \
|
||||
{"dep_quant_decide_and_update", (void**)&uvg_dep_quant_decide_and_update}, \
|
||||
{"find_first_non_zero_coeff", (void**)&uvg_find_first_non_zero_coeff}, \
|
||||
|
||||
|
||||
|
||||
#endif //STRATEGIES_DEPQUANT_H_
|
|
@ -49,7 +49,7 @@
|
|||
typedef unsigned (encode_coeff_nxn_func)(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
uint8_t width,
|
||||
const cu_loc_t * const loc,
|
||||
uint8_t color,
|
||||
int8_t scan_mode,
|
||||
cu_info_t* cur_cu,
|
||||
|
|
|
@ -38,22 +38,26 @@
|
|||
* Interface for intra prediction functions.
|
||||
*/
|
||||
|
||||
#include "cu.h"
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "intra.h"
|
||||
#include "uvg266.h"
|
||||
|
||||
|
||||
typedef void (angular_pred_func)(
|
||||
const int_fast8_t log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int_fast8_t intra_mode,
|
||||
const int_fast8_t channel_type,
|
||||
const uvg_pixel *const in_ref_above,
|
||||
const uvg_pixel *const in_ref_left,
|
||||
uvg_pixel *const dst,
|
||||
const uint8_t multi_ref_idx);
|
||||
const uint8_t multi_ref_idx,
|
||||
const uint8_t isp_mode,
|
||||
const int cu_dim);
|
||||
|
||||
typedef void (intra_pred_planar_func)(
|
||||
const int_fast8_t log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
const uvg_pixel *const ref_top,
|
||||
const uvg_pixel *const ref_left,
|
||||
uvg_pixel *const dst);
|
||||
|
@ -67,8 +71,8 @@ typedef void (intra_pred_filtered_dc_func)(
|
|||
|
||||
typedef void (pdpc_planar_dc_func)(
|
||||
const int mode,
|
||||
const int width,
|
||||
const int log2_width,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const color_t color,
|
||||
const uvg_intra_ref *const used_ref,
|
||||
uvg_pixel *const dst);
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "strategies/generic/picture-generic.h"
|
||||
#include "strategies/sse2/picture-sse2.h"
|
||||
#include "strategies/sse41/picture-sse41.h"
|
||||
#include "strategies/sse42/picture-sse42.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
|
||||
|
@ -70,6 +71,7 @@ cost_pixel_nxn_multi_func * uvg_satd_32x32_dual = 0;
|
|||
cost_pixel_nxn_multi_func * uvg_satd_64x64_dual = 0;
|
||||
|
||||
cost_pixel_any_size_func * uvg_satd_any_size = 0;
|
||||
cost_pixel_any_size_func * uvg_satd_any_size_vtm = 0;
|
||||
cost_pixel_any_size_multi_func * uvg_satd_any_size_quad = 0;
|
||||
|
||||
pixels_calc_ssd_func * uvg_pixels_calc_ssd = 0;
|
||||
|
@ -115,13 +117,14 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
|||
/**
|
||||
* \brief Get a function that calculates SATD for NxN block.
|
||||
*
|
||||
* \param n Width of the region for which SATD is calculated.
|
||||
* \param width Width of the region for which SATD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_16bit_nxn_func.
|
||||
*/
|
||||
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n)
|
||||
cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_satd_4x4;
|
||||
case 8:
|
||||
|
@ -136,18 +139,21 @@ cost_pixel_nxn_func * uvg_pixels_get_satd_func(unsigned n)
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Get a function that calculates SAD for NxN block.
|
||||
*
|
||||
* \param n Width of the region for which SAD is calculated.
|
||||
* \param width Width of the region for which SAD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_16bit_nxn_func.
|
||||
*/
|
||||
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n)
|
||||
cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_sad_4x4;
|
||||
case 8:
|
||||
|
@ -162,17 +168,21 @@ cost_pixel_nxn_func * uvg_pixels_get_sad_func(unsigned n)
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get a function that calculates SATDs for 2 NxN blocks.
|
||||
*
|
||||
* \param n Width of the region for which SATD is calculated.
|
||||
* \param width Width of the region for which SATD is calculated.
|
||||
* \param height Height of the region for which SATD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_pixel_nxn_multi_func.
|
||||
*/
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n)
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_satd_4x4_dual;
|
||||
case 8:
|
||||
|
@ -187,18 +197,21 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n)
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Get a function that calculates SADs for 2 NxN blocks.
|
||||
*
|
||||
* \param n Width of the region for which SAD is calculated.
|
||||
* \param width Width of the region for which SAD is calculated.
|
||||
*
|
||||
* \returns Pointer to cost_pixel_nxn_multi_func.
|
||||
*/
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height)
|
||||
{
|
||||
switch (n) {
|
||||
if(width == height) {
|
||||
switch (width) {
|
||||
case 4:
|
||||
return uvg_sad_4x4_dual;
|
||||
case 8:
|
||||
|
@ -213,6 +226,8 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n)
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Precomputed CRC32C lookup table for polynomial 0x04C11DB7
|
||||
const uint32_t uvg_crc_table[256] = {
|
||||
|
|
|
@ -124,7 +124,7 @@ typedef unsigned (cost_pixel_any_size_func)(
|
|||
typedef void (cost_pixel_nxn_multi_func)(const pred_buffer preds, const uvg_pixel *orig, unsigned num_modes, unsigned *costs_out);
|
||||
typedef void (cost_pixel_any_size_multi_func)(int width, int height, const uvg_pixel **preds, const int stride, const uvg_pixel *orig, const int orig_stride, unsigned num_modes, unsigned *costs_out, int8_t *valid);
|
||||
|
||||
typedef unsigned (pixels_calc_ssd_func)(const uvg_pixel *const ref, const uvg_pixel *const rec, const int ref_stride, const int rec_stride, const int width);
|
||||
typedef unsigned (pixels_calc_ssd_func)(const uvg_pixel *const ref, const uvg_pixel *const rec, const int ref_stride, const int rec_stride, const int width, const int height);
|
||||
typedef optimized_sad_func_ptr_t (get_optimized_sad_func)(int32_t);
|
||||
typedef uint32_t (ver_sad_func)(const uvg_pixel *pic_data, const uvg_pixel *ref_data,
|
||||
int32_t block_width, int32_t block_height,
|
||||
|
@ -149,7 +149,7 @@ typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
|
|||
|
||||
typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
|
||||
|
||||
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
|
||||
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int height, int ref_stride, int pred_stride);
|
||||
|
||||
|
||||
extern const uint32_t uvg_crc_table[256];
|
||||
|
@ -175,6 +175,7 @@ extern cost_pixel_nxn_func * uvg_satd_16x16;
|
|||
extern cost_pixel_nxn_func * uvg_satd_32x32;
|
||||
extern cost_pixel_nxn_func * uvg_satd_64x64;
|
||||
extern cost_pixel_any_size_func *uvg_satd_any_size;
|
||||
extern cost_pixel_any_size_func *uvg_satd_any_size_vtm;
|
||||
|
||||
extern cost_pixel_nxn_multi_func * uvg_sad_4x4_dual;
|
||||
extern cost_pixel_nxn_multi_func * uvg_sad_8x8_dual;
|
||||
|
@ -203,8 +204,8 @@ extern pixel_var_func *uvg_pixel_var;
|
|||
extern generate_residual_func* uvg_generate_residual;
|
||||
|
||||
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height);
|
||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height);
|
||||
|
||||
#define STRATEGIES_PICTURE_EXPORTS \
|
||||
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
|
||||
|
@ -221,6 +222,7 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
|
|||
{"satd_32x32", (void**) &uvg_satd_32x32}, \
|
||||
{"satd_64x64", (void**) &uvg_satd_64x64}, \
|
||||
{"satd_any_size", (void**) &uvg_satd_any_size}, \
|
||||
{"satd_any_size_vtm", (void**) &uvg_satd_any_size_vtm}, \
|
||||
{"sad_4x4_dual", (void**) &uvg_sad_4x4_dual}, \
|
||||
{"sad_8x8_dual", (void**) &uvg_sad_8x8_dual}, \
|
||||
{"sad_16x16_dual", (void**) &uvg_sad_16x16_dual}, \
|
||||
|
|
|
@ -46,7 +46,8 @@ coeff_abs_sum_func *uvg_coeff_abs_sum;
|
|||
fast_coeff_cost_func *uvg_fast_coeff_cost;
|
||||
|
||||
|
||||
int uvg_strategy_register_quant(void* opaque, uint8_t bitdepth) {
|
||||
int uvg_strategy_register_quant(void *opaque, uint8_t bitdepth)
|
||||
{
|
||||
bool success = true;
|
||||
|
||||
success &= uvg_strategy_register_quant_generic(opaque, bitdepth);
|
||||
|
|
|
@ -45,12 +45,23 @@
|
|||
#include "tables.h"
|
||||
|
||||
// Declare function pointers.
|
||||
typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
|
||||
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip, uint8_t lfnst_idx);
|
||||
typedef unsigned (quant_func)(
|
||||
const encoder_state_t * const state,
|
||||
coeff_t *coef,
|
||||
coeff_t *q_coef,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
color_t color,
|
||||
int8_t scan_idx,
|
||||
int8_t block_type,
|
||||
int8_t transform_skip,
|
||||
uint8_t lfnst_idx);
|
||||
|
||||
typedef unsigned (quant_cbcr_func)(
|
||||
encoder_state_t* const state,
|
||||
const cu_info_t* const cur_cu,
|
||||
const int width,
|
||||
const int height,
|
||||
const coeff_scan_order_t scan_order,
|
||||
const int in_stride, const int out_stride,
|
||||
const uvg_pixel* const u_ref_in,
|
||||
|
@ -63,16 +74,19 @@ typedef unsigned (quant_cbcr_func)(
|
|||
bool early_skip,
|
||||
int lmcs_chroma_adj,
|
||||
enum uvg_tree_type tree_type);
|
||||
|
||||
typedef unsigned (quant_residual_func)(encoder_state_t *const state,
|
||||
const cu_info_t *const cur_cu, const int width, const color_t color,
|
||||
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
|
||||
const coeff_scan_order_t scan_order, const int use_trskip,
|
||||
const int in_stride, const int out_stride,
|
||||
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
|
||||
uvg_pixel *rec_out, coeff_t *coeff_out,
|
||||
bool early_skip, int lmcs_chroma_adj, enum uvg_tree_type tree_type);
|
||||
|
||||
typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width,
|
||||
int32_t height, color_t color, int8_t block_type, int8_t transform_skip);
|
||||
typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights);
|
||||
|
||||
typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights);
|
||||
|
||||
typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length);
|
||||
|
||||
|
|
|
@ -107,6 +107,10 @@ int uvg_strategyselector_init(int32_t cpuid, uint8_t bitdepth) {
|
|||
fprintf(stderr, "uvg_strategy_register_encode failed!\n");
|
||||
return 0;
|
||||
}
|
||||
if (!uvg_strategy_register_depquant(&strategies, bitdepth)) {
|
||||
fprintf(stderr, "uvg_strategy_register_depquant failed!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while(cur_strategy_to_select->fptr) {
|
||||
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
|
||||
|
|
|
@ -108,6 +108,7 @@ int uvg_strategyselector_register(void *opaque, const char *type, const char *st
|
|||
#include "strategies/strategies-intra.h"
|
||||
#include "strategies/strategies-sao.h"
|
||||
#include "strategies/strategies-encode.h"
|
||||
#include "strategies/strategies-depquant.h"
|
||||
#include "strategies/strategies-alf.h"
|
||||
|
||||
static const strategy_to_select_t strategies_to_select[] = {
|
||||
|
@ -120,6 +121,7 @@ static const strategy_to_select_t strategies_to_select[] = {
|
|||
STRATEGIES_SAO_EXPORTS
|
||||
STRATEGIES_ENCODE_EXPORTS
|
||||
STRATEGIES_ALF_EXPORTS
|
||||
STRATEGIES_DEPQUANT_EXPORTS
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
|
|
2542
src/tables.c
2542
src/tables.c
File diff suppressed because it is too large
Load diff
|
@ -134,6 +134,15 @@ typedef enum
|
|||
*/
|
||||
extern const uint32_t* const uvg_g_sig_last_scan[3][5];
|
||||
extern const int8_t uvg_g_convert_to_bit[LCU_WIDTH + 1];
|
||||
extern const int8_t uvg_g_convert_to_log2[LCU_WIDTH + 1];
|
||||
extern const uint32_t uvg_g_log2_sbb_size[7 + 1][7 + 1][2];
|
||||
|
||||
#define SCAN_GROUP_TYPES 2
|
||||
#define MAX_LOG2_INDEX 7
|
||||
|
||||
#define SCAN_GROUP_UNGROUPED 0
|
||||
#define SCAN_GROUP_4X4 1
|
||||
|
||||
const uint32_t* const uvg_get_scan_order_table(int scan_group, int scan_type, int log2_w, int log2_h);
|
||||
|
||||
#endif //TABLES_H_
|
||||
|
|
667
src/transform.c
667
src/transform.c
File diff suppressed because it is too large
Load diff
|
@ -44,23 +44,28 @@
|
|||
#include "global.h" // IWYU pragma: keep
|
||||
|
||||
extern const uint8_t uvg_g_chroma_scale[58];
|
||||
extern const int16_t uvg_g_inv_quant_scales[6];
|
||||
extern const int16_t uvg_g_quant_scales[6];
|
||||
extern const int16_t uvg_g_inv_quant_scales[2][6];
|
||||
extern const int16_t uvg_g_quant_scales[2][6];
|
||||
|
||||
void uvg_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
|
||||
void uvg_itransformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
|
||||
#define COEFF_ORDER_LINEAR 0
|
||||
#define COEFF_ORDER_CU 1
|
||||
|
||||
void uvg_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t width, int8_t height);
|
||||
void uvg_itransformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t width, int8_t height);
|
||||
|
||||
void uvg_transform2d(const encoder_control_t * const encoder,
|
||||
int16_t *block,
|
||||
int16_t *coeff,
|
||||
int8_t block_size,
|
||||
int8_t block_width,
|
||||
int8_t block_height,
|
||||
color_t color,
|
||||
const cu_info_t *tu);
|
||||
|
||||
void uvg_itransform2d(const encoder_control_t * const encoder,
|
||||
int16_t *block,
|
||||
int16_t *coeff,
|
||||
int8_t block_size,
|
||||
int8_t block_width,
|
||||
int8_t block_height,
|
||||
color_t color,
|
||||
const cu_info_t *tu);
|
||||
|
||||
|
@ -69,11 +74,12 @@ int32_t uvg_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t con
|
|||
|
||||
void uvg_derive_lfnst_constraints(
|
||||
cu_info_t* const pred_cu,
|
||||
const int depth,
|
||||
bool* constraints,
|
||||
const coeff_t* coeff,
|
||||
const int width,
|
||||
const int height);
|
||||
const int height,
|
||||
const vector2d_t * const ,
|
||||
color_t color);
|
||||
|
||||
typedef struct {
|
||||
double best_u_cost;
|
||||
|
@ -82,6 +88,10 @@ typedef struct {
|
|||
int best_u_index;
|
||||
int best_v_index;
|
||||
int best_combined_index;
|
||||
uint64_t u_distortion;
|
||||
uint64_t v_distortion;
|
||||
double u_bits;
|
||||
double v_bits;
|
||||
} uvg_chorma_ts_out_t;
|
||||
|
||||
void uvg_quantize_lcu_residual(
|
||||
|
@ -89,9 +99,7 @@ void uvg_quantize_lcu_residual(
|
|||
bool luma,
|
||||
bool chroma,
|
||||
const bool jccr,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
uint8_t depth,
|
||||
const cu_loc_t* cu_loc,
|
||||
cu_info_t *cur_cu,
|
||||
lcu_t* lcu,
|
||||
bool early_skip,
|
||||
|
@ -99,13 +107,10 @@ void uvg_quantize_lcu_residual(
|
|||
|
||||
void uvg_chroma_transform_search(
|
||||
encoder_state_t* const state,
|
||||
int depth,
|
||||
lcu_t* const lcu,
|
||||
cabac_data_t* temp_cabac,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int offset,
|
||||
const uint8_t mode,
|
||||
cu_info_t* pred_cu,
|
||||
uvg_pixel u_pred[1024],
|
||||
uvg_pixel v_pred[1024],
|
||||
|
@ -130,7 +135,8 @@ void uvg_fwd_lfnst(
|
|||
const color_t color,
|
||||
const uint16_t lfnst_idx,
|
||||
coeff_t *coeffs,
|
||||
enum uvg_tree_type tree_type);
|
||||
enum uvg_tree_type tree_type,
|
||||
int8_t luma_mode);
|
||||
|
||||
void uvg_inv_lfnst(
|
||||
const cu_info_t* cur_cu,
|
||||
|
@ -139,6 +145,7 @@ void uvg_inv_lfnst(
|
|||
const color_t color,
|
||||
const uint16_t lfnst_idx,
|
||||
coeff_t* coeffs,
|
||||
enum uvg_tree_type tree_type);
|
||||
enum uvg_tree_type tree_type,
|
||||
int8_t luma_mode);
|
||||
|
||||
#endif
|
||||
|
|
10
src/uvg266.h
10
src/uvg266.h
|
@ -338,7 +338,6 @@ typedef struct uvg_config
|
|||
int32_t trskip_max_size; /*!< \brief Transform skip max block size. */
|
||||
enum uvg_mts mts; /*< \brief flag to enable multiple transform selection*/
|
||||
int32_t mts_implicit; /*< \brief flag to enable implicit multiple transform selection*/
|
||||
int32_t tr_depth_intra; /*!< \brief Maximum transform depth for intra. */
|
||||
enum uvg_ime_algorithm ime_algorithm; /*!< \brief Integer motion estimation algorithm. */
|
||||
int32_t fme_level; /*!< \brief Fractional pixel motion estimation level (0: disabled, 1: enabled). */
|
||||
int8_t source_scan_type; /*!< \brief Source scan type (0: progressive, 1: top field first, 2: bottom field first).*/
|
||||
|
@ -526,6 +525,8 @@ typedef struct uvg_config
|
|||
/** \brief enable low frequency non-separable transform */
|
||||
int8_t lfnst;
|
||||
|
||||
/** \brief enable intra sub partitions*/
|
||||
int8_t isp;
|
||||
|
||||
int8_t jccr;
|
||||
|
||||
|
@ -542,9 +543,16 @@ typedef struct uvg_config
|
|||
|
||||
uint8_t dual_tree;
|
||||
|
||||
uint8_t min_qt_size[3]; /* intra, inter, dual tree chroma*/
|
||||
uint8_t max_bt_size[3]; /* intra, inter, dual tree chroma*/
|
||||
uint8_t max_tt_size[3]; /* intra, inter, dual tree chroma*/
|
||||
|
||||
uint8_t max_btt_depth[3]; /* intra, inter, dual tree chroma*/
|
||||
|
||||
uint8_t intra_rough_search_levels;
|
||||
|
||||
uint8_t ibc; /* \brief Intra Block Copy parameter */
|
||||
uint8_t dep_quant;
|
||||
} uvg_config;
|
||||
|
||||
/**
|
||||
|
|
|
@ -61,7 +61,7 @@ videoframe_t * uvg_videoframe_alloc(int32_t width,
|
|||
frame->sao_chroma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu);
|
||||
if (cclm) {
|
||||
assert(chroma_format == UVG_CSP_420);
|
||||
frame->cclm_luma_rec = MALLOC(uvg_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4);
|
||||
frame->cclm_luma_rec = MALLOC(uvg_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 15) & ~7) + FRAME_PADDING_LUMA) / 4);
|
||||
frame->cclm_luma_rec_top_line = MALLOC(uvg_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) / 2 * CEILDIV(height, 64));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ def main(state_file: Path, ctx_names: list, ctx_count: int = 332, ctx_size: int
|
|||
with open(state_file, "rb") as file:
|
||||
try:
|
||||
while True:
|
||||
type_, x, y, depth, tree_type = file.read(15).decode().split()
|
||||
type_, x, y, depth, tree_type = file.read(23).decode().split()
|
||||
# Reset stored data at the beginning of the frame
|
||||
if x == '0' and y == '0' and type_ == "S" and tree_type != "2":
|
||||
if not was_zero_last:
|
||||
|
@ -38,7 +38,7 @@ def main(state_file: Path, ctx_names: list, ctx_count: int = 332, ctx_size: int
|
|||
ctx_store = dict()
|
||||
e_store = set()
|
||||
was_zero_last = True
|
||||
else:
|
||||
elif int(x) >= 64 and int(y) >= 64:
|
||||
was_zero_last = False
|
||||
|
||||
ctx = file.read(ctx_count * ctx_size)
|
||||
|
|
|
@ -111,7 +111,8 @@ static void setup_tests()
|
|||
tu.tr_idx = MTS_DST7_DST7 + trafo;
|
||||
tu.lfnst_idx = 0;
|
||||
tu.cr_lfnst_idx = 0;
|
||||
mts_generic(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + block), dct_bufs[trafo*NUM_SIZES+block], dct_result[trafo][block], UVG_MTS_BOTH);
|
||||
tu.intra.isp_mode = 0;
|
||||
mts_generic(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + block), 1 << (LCU_MIN_LOG_W + block), dct_bufs[trafo*NUM_SIZES+block], dct_result[trafo][block], UVG_MTS_BOTH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -134,7 +135,8 @@ static void setup_tests()
|
|||
tu.tr_idx = MTS_DST7_DST7 + trafo;
|
||||
tu.lfnst_idx = 0;
|
||||
tu.cr_lfnst_idx = 0;
|
||||
idct_generic(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + block), dct_bufs[trafo * NUM_SIZES + block], idct_result[trafo][block], UVG_MTS_BOTH);
|
||||
tu.intra.isp_mode = 0;
|
||||
idct_generic(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + block), 1 << (LCU_MIN_LOG_W + block), dct_bufs[trafo * NUM_SIZES + block], idct_result[trafo][block], UVG_MTS_BOTH);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,6 +158,7 @@ TEST dct(void)
|
|||
{
|
||||
char testname[100];
|
||||
for (int blocksize = 0; blocksize < NUM_SIZES; blocksize++) {
|
||||
size_t size = 1 << (LCU_MIN_LOG_W + blocksize);
|
||||
for (int trafo = 0; trafo < NUM_TRANSFORM; trafo++) {
|
||||
sprintf(testname, "Block: %d x %d, trafo: %d", 1 << (LCU_MIN_LOG_W + blocksize), 1 << (LCU_MIN_LOG_W + blocksize), trafo);
|
||||
cu_info_t tu;
|
||||
|
@ -163,15 +166,21 @@ TEST dct(void)
|
|||
tu.tr_idx = MTS_DST7_DST7 + trafo;
|
||||
tu.lfnst_idx = 0;
|
||||
tu.cr_lfnst_idx = 0;
|
||||
tu.intra.isp_mode = 0;
|
||||
|
||||
int16_t* buf = dct_bufs[trafo * NUM_SIZES + blocksize];
|
||||
ALIGNED(32) int16_t test_result[LCU_WIDTH * LCU_WIDTH] = { 0 };
|
||||
|
||||
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
|
||||
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
|
||||
|
||||
for (int i = 0; i < LCU_WIDTH * LCU_WIDTH; ++i) {
|
||||
for (int y = 0; y < size; ++y) {
|
||||
if (y>= 16) break;
|
||||
for (int x = 0; x < size; ++x) {
|
||||
if (x >= 16) break;
|
||||
int i = y * size + x;
|
||||
ASSERT_EQm(testname, test_result[i], dct_result[trafo][blocksize][i]);
|
||||
}
|
||||
}
|
||||
//fprintf(stderr, "PASS: %s\r\n", testname);
|
||||
}
|
||||
}
|
||||
|
@ -188,11 +197,14 @@ TEST idct(void)
|
|||
cu_info_t tu;
|
||||
tu.type = CU_INTRA;
|
||||
tu.tr_idx = MTS_DST7_DST7 + trafo;
|
||||
tu.lfnst_idx = 0;
|
||||
tu.cr_lfnst_idx = 0;
|
||||
tu.intra.isp_mode = 0;
|
||||
|
||||
int16_t* buf = dct_bufs[trafo * NUM_SIZES + blocksize];
|
||||
ALIGNED(32) int16_t test_result[LCU_WIDTH * LCU_WIDTH] = { 0 };
|
||||
|
||||
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
|
||||
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
|
||||
|
||||
for (int i = 0; i < LCU_WIDTH * LCU_WIDTH; ++i) {
|
||||
ASSERT_EQm(testname, test_result[i], idct_result[trafo][blocksize][i]);
|
||||
|
|
|
@ -46,8 +46,11 @@ TEST test_get_spatial_merge_cand(void)
|
|||
|
||||
merge_candidates_t cand = { 0 };
|
||||
|
||||
get_spatial_merge_candidates(64 + 32, 64, // x, y
|
||||
32, 24, // width, height
|
||||
cu_loc_t cu_loc;
|
||||
uvg_cu_loc_ctor(&cu_loc, 64 + 32, 64, // x, y
|
||||
32, 24); // width, height)
|
||||
|
||||
get_spatial_merge_candidates(&cu_loc,
|
||||
1920, 1080, // picture size
|
||||
&lcu,
|
||||
&cand,
|
||||
|
|
|
@ -6,10 +6,10 @@ set -eu
|
|||
|
||||
cabacfile="$(mktemp)"
|
||||
|
||||
valgrind_test 256x128 10 yuv420p --preset veryslow --rd 3 --mip --jccr --mrl --lfnst -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
valgrind_test 256x128 10 yuv420p --no-cpuid --preset veryslow --pu-depth-intra 0-8 --mtt-depth-intra 3 --mtt-depth-intra-chroma 3 --cclm --rd 3 --mip --jccr --mrl --lfnst -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
python3 check_cabac_state_consistency.py "${cabacfile}"
|
||||
|
||||
valgrind_test 256x128 10 yuv420p --preset veryslow --rd 3 --mip --jccr --mrl --lfnst --dual-tree -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
valgrind_test 256x128 10 yuv420p --no-cpuid --preset veryslow --pu-depth-intra 0-8 --mtt-depth-intra 3 --mtt-depth-intra-chroma 3 --cclm --rd 3 --mip --jccr --mrl --lfnst --dual-tree -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
python3 check_cabac_state_consistency.py "${cabacfile}"
|
||||
|
||||
rm -rf "${cabacfile}"
|
||||
|
|
|
@ -19,3 +19,5 @@ valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra
|
|||
valgrind_test $common_args --rd=3 --cclm --jccr
|
||||
valgrind_test $common_args --lfnst
|
||||
valgrind_test $common_args --lfnst --rd=3 --cclm --mip --dual-tree --fast-residual-cost 0
|
||||
valgrind_test $common_args --rd=2 --isp --cpuid=0 --fast-residual-cost 0
|
||||
valgrind_test $common_args --rd=2 --isp --cpuid=0 --lfnst --mts=intra --fast-residual-cost 0
|
||||
|
|
14
tests/test_mtt.sh
Executable file
14
tests/test_mtt.sh
Executable file
|
@ -0,0 +1,14 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Test all-intra coding.
|
||||
|
||||
set -eu
|
||||
|
||||
. "${0%/*}/util.sh"
|
||||
|
||||
common_args='264x130 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-cpuid --no-wpp --fast-residual-cost 0'
|
||||
valgrind_test $common_args --rd=0 --mtt-depth-intra 1 --pu-depth-intra 2-3
|
||||
valgrind_test $common_args --rd=3 --mtt-depth-intra 1 --pu-depth-intra 0-5
|
||||
valgrind_test $common_args --rd=3 --mtt-depth-intra 3 --pu-depth-intra 0-8
|
||||
valgrind_test $common_args --rd=3 --mtt-depth-intra 3 --mtt-depth-intra-chroma 3 --dual-tree --pu-depth-intra 0-8
|
||||
valgrind_test $common_args --rd=3 --rdoq --jccr --isp --lfnst --mip --mrl --mts intra --cclm --mtt-depth-intra 3 --mtt-depth-intra-chroma 3 --dual-tree --pu-depth-intra 0-8
|
|
@ -51,7 +51,7 @@ static void init_sig_last_scan(uint32_t *buff_d, uint32_t *buff_h,
|
|||
uint32_t *buff_v,
|
||||
int32_t width, int32_t height)
|
||||
{
|
||||
uint32_t num_scan_pos = width * width;
|
||||
uint32_t num_scan_pos = width * height;
|
||||
uint32_t next_scan_pos = 0;
|
||||
int32_t xx, yy, x, y;
|
||||
uint32_t scan_line;
|
||||
|
|
Loading…
Reference in a new issue