From 03d15759c15066a0bb684a5ebe38c0ed346a5e9e Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Thu, 1 Dec 2022 14:38:35 +0200 Subject: [PATCH] [mtt] WIP --- src/cfg.c | 67 +++++++++++++++++++++++++++++++++------- src/cli.c | 27 +++++++++++++--- src/cu.c | 17 +++++----- src/encode_coding_tree.c | 8 +++-- src/global.h | 6 ++-- src/intra.c | 13 ++++++-- src/search.c | 11 ++++--- src/uvg266.h | 8 ++--- 8 files changed, 116 insertions(+), 41 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index e183bb05..0ac0c079 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -227,9 +227,9 @@ int uvg_config_init(uvg_config *cfg) cfg->min_qt_size[1] = 4; cfg->min_qt_size[2] = 4; - cfg->max_btt_depth[0] = 2; + cfg->max_btt_depth[0] = 0; cfg->max_btt_depth[1] = 0; - cfg->max_btt_depth[2] = 1; + cfg->max_btt_depth[2] = 0; cfg->max_tt_size[0] = 64; cfg->max_bt_size[0] = 64; @@ -345,7 +345,7 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil return 1; } -/* + static int parse_uint8(const char *numstr,uint8_t* number,int min, int max) { char *tail; @@ -361,7 +361,7 @@ static int parse_uint8(const char *numstr,uint8_t* number,int min, int max) return 1; } } -*/ + static int parse_int8(const char *numstr,int8_t* number,int min, int max) { char *tail; @@ -377,7 +377,7 @@ static int parse_int8(const char *numstr,int8_t* number,int min, int max) return 1; } } -/* + static int parse_array(const char *array, uint8_t *coeff_key, int size, int min, int max) { @@ -401,15 +401,15 @@ static int parse_array(const char *array, uint8_t *coeff_key, int size, free(key); return 0; } - else if (idual_tree = atobool(value); } + else if OPT("mtt-depth-intra") { + cfg->max_btt_depth[0] = atoi(value); + } + else if OPT("mtt-depth-intra-chroma") { + cfg->max_btt_depth[2] = atoi(value); + } + else if OPT("mtt-depth-inter") { + cfg->max_btt_depth[1] = atoi(value); + } + else if OPT("max-bt-size") { + uint8_t sizes[3]; + const int got = parse_array(value, sizes, 3, 0, 128); + if (got == 1) { + cfg->max_bt_size[0] = sizes[0]; + cfg->max_bt_size[1] = sizes[0]; + cfg->max_bt_size[2] = sizes[0]; + } + else if (got == 3) { + cfg->max_bt_size[0] = sizes[0]; + cfg->max_bt_size[1] = sizes[1]; + cfg->max_bt_size[2] = sizes[2]; + } else { + fprintf(stderr, "Incorrect amount of values provided for max-bt-size\n"); + return 0; + } + } + else if OPT("max-tt-size") { + uint8_t sizes[3]; + const int got = parse_array(value, sizes, 3, 0, 128); + if (got == 1) { + cfg->max_tt_size[0] = sizes[0]; + cfg->max_tt_size[1] = sizes[0]; + cfg->max_tt_size[2] = sizes[0]; + } + else if (got == 3) { + cfg->max_tt_size[0] = sizes[0]; + cfg->max_tt_size[1] = sizes[1]; + cfg->max_tt_size[2] = sizes[2]; + } else { + fprintf(stderr, "Incorrect amount of values provided for max-tt-size\n"); + return 0; + } + } else { return 0; } diff --git a/src/cli.c b/src/cli.c index 5b069b33..d5006374 100644 --- a/src/cli.c +++ b/src/cli.c @@ -192,6 +192,11 @@ static const struct option long_options[] = { { "dual-tree", no_argument, NULL, 0 }, { "no-dual-tree", no_argument, NULL, 0 }, { "cabac-debug-file", required_argument, NULL, 0 }, + {"mtt-depth-intra", required_argument, NULL, 0 }, + {"mtt-depth-inter", required_argument, NULL, 0 }, + {"mtt-depth-intra-chroma", required_argument, NULL, 0 }, + {"max_bt_size", required_argument, NULL, 0 }, + {"max_tt_size", required_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -601,14 +606,14 @@ void print_help(void) " - 2: + 1/2-pixel diagonal\n" " - 3: + 1/4-pixel horizontal and vertical\n" " - 4: + 1/4-pixel diagonal\n" - " --pu-depth-inter - : Inter prediction units sizes [0-3]\n" - " - 0, 1, 2, 3: from 64x64 to 8x8\n" + " --pu-depth-inter - : Maximum and minimum split depths where\n" + " inter search is performed 0..8. [0-3]\n" " - Accepts a list of values separated by ','\n" " for setting separate depths per GOP layer\n" " (values can be omitted to use the first\n" " value for the respective layer).\n" - " --pu-depth-intra - : Intra prediction units sizes [1-4]\n" - " - 0, 1, 2, 3, 4: from 64x64 to 4x4\n" + " --pu-depth-intra - : Maximum and minimum split depths where\n" + " intra search is performed 0..8. [1-4]\n" " - Accepts a list of values separated by ','\n" " for setting separate depths per GOP layer\n" " (values can be omitted to use the first\n" @@ -616,6 +621,20 @@ void print_help(void) " --ml-pu-depth-intra : Predict the pu-depth-intra using machine\n" " learning trees, overrides the\n" " --pu-depth-intra parameter. [disabled]\n" + " --mtt-depth-intra : Depth of mtt for intra slices 0..3.[0]\n" + " --mtt-depth-intra-chroma : Depth of mtt for chroma dual tree in\n" + " intra slices 0..3.[0]\n" + " --mtt-depth-inter : Depth of mtt for inter slices 0..3.[0]\n" + " --max-bt-size : maximum size for a CU resulting from\n" + " a bt split. A singular value shared for all\n" + " or a list of three values for the different\n" + " slices types (intra, inter, intra-chroma)\n" + " can be provided. [64, 64, 32]\n" + " --max-tt-size : maximum size for a CU resulting from\n" + " a tt split. A singular value shared for all\n" + " or a list of three values for the different\n" + " slices types (intra, inter, intra-chroma)\n" + " can be provided. [64, 64, 32]\n" " --(no-)combine-intra-cus: Whether the encoder tries to code a cu\n" " on lower depth even when search is not\n" " performed on said depth. Should only\n" diff --git a/src/cu.c b/src/cu.c index 0c8dd9f7..9908d43e 100644 --- a/src/cu.c +++ b/src/cu.c @@ -330,6 +330,7 @@ int uvg_get_split_locs( const int half_height = origin->height >> 1; const int quarter_width = origin->width >> 2; const int quarter_height = origin->height >> 2; + if (origin->width == 4 && separate_chroma) *separate_chroma = 1; switch (split) { case NO_SPLIT: @@ -350,7 +351,7 @@ int uvg_get_split_locs( case BT_VER_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + half_width, origin->y, half_width, origin->height); - if (half_width == 4 && separate_chroma) *separate_chroma = 1; + if ((half_width == 4 || half_width * origin->height < 64) && separate_chroma) *separate_chroma = 1; return 2; case TT_HOR_SPLIT: uvg_cu_loc_ctor(&out[0], origin->x, origin->y, origin->width, quarter_height); @@ -362,7 +363,7 @@ int uvg_get_split_locs( uvg_cu_loc_ctor(&out[0], origin->x, origin->y, quarter_width, origin->height); uvg_cu_loc_ctor(&out[1], origin->x + quarter_width, origin->y, half_width, origin->height); uvg_cu_loc_ctor(&out[2], origin->x + quarter_width + half_width, origin->y, quarter_width, origin->height); - if (quarter_width == 4 && separate_chroma) *separate_chroma = 1; + if ((quarter_width == 4 || quarter_width * origin->height < 64) && separate_chroma) *separate_chroma = 1; return 3; } return 0; @@ -390,10 +391,10 @@ int uvg_get_possible_splits(const encoder_state_t * const state, const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1; const unsigned max_btd = state->encoder_control->cfg.max_btt_depth[slice_type]; // +currImplicitBtDepth; - const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type]; - const unsigned min_bt_size = 1 << MIN_SIZE; - const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type]; - const unsigned min_tt_size = 1 << MIN_SIZE; + const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type] >> (tree_type == UVG_CHROMA_T); + const unsigned min_bt_size = 1 << MIN_SIZE >> (tree_type == UVG_CHROMA_T); + const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type] >> (tree_type == UVG_CHROMA_T); + const unsigned min_tt_size = 1 << MIN_SIZE >> (tree_type == UVG_CHROMA_T); const unsigned min_qt_size = state->encoder_control->cfg.min_qt_size[slice_type]; splits[NO_SPLIT] = splits[QT_SPLIT] = splits[BT_HOR_SPLIT] = splits[TT_HOR_SPLIT] = splits[BT_VER_SPLIT] = splits[TT_VER_SPLIT] = true; @@ -459,12 +460,12 @@ int uvg_get_possible_splits(const encoder_state_t * const state, //if (modeType == MODE_TYPE_INTER && width * height == 32) splits[BT_VER_SPLIT] = splits[BT_HOR_SPLIT] = false; - if (cu_loc->chroma_height <= min_tt_size || height > max_tt_size || width > max_tt_size) + if (height <= 2 * min_tt_size || height > max_tt_size || width > max_tt_size) splits[TT_HOR_SPLIT] = false; if (width > 64 || height > 64) splits[TT_HOR_SPLIT] = false; if (tree_type == UVG_CHROMA_T && width * height <= 16 * 2) splits[TT_HOR_SPLIT] = false; - if (cu_loc->chroma_width <= min_tt_size || width > max_tt_size || height > max_tt_size) + if (width <= 2 * min_tt_size || width > max_tt_size || height > max_tt_size) splits[TT_VER_SPLIT] = false; if (width > 64 || height > 64) splits[TT_VER_SPLIT] = false; if (tree_type == UVG_CHROMA_T && (width * height <= 16 * 2 || width == 8)) splits[TT_VER_SPLIT] = false; diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 13321248..1d3a0710 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -657,7 +657,7 @@ static void encode_transform_coeff( split_cu_loc[i].chroma_height = split_cu_loc[i].height; } encode_transform_coeff(state, &split_cu_loc[i], only_chroma, - coeff, NULL, tree_type, true, false, luma_cbf_ctx, &split_cu_loc[i], &split_cu_loc[i]); + coeff, NULL, tree_type, true, false, luma_cbf_ctx, &split_cu_loc[i], chroma_loc); } return; } @@ -1391,14 +1391,16 @@ void uvg_encode_coding_tree( uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc[split], separate_chroma ? chroma_loc :(tree_type == UVG_CHROMA_T ? &chroma_tree_loc : &new_cu_loc[split]), - new_split_tree, !separate_chroma || split == splits - 1); + new_split_tree, !separate_chroma || (split == splits - 1 && has_chroma)); } return; } } DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_height, (cur_cu->type == CU_INTRA) ? 0 : 1); - + + fprintf(stderr, "%4d %4d %2d %2d %d\n", x, y, cu_width, cu_height, has_chroma); + if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; CABAC_BIN(cabac, 1, "cu_transquant_bypass_flag"); diff --git a/src/global.h b/src/global.h index 7f1c1103..55d59752 100644 --- a/src/global.h +++ b/src/global.h @@ -145,11 +145,11 @@ typedef int16_t mv_t; #define INTERNAL_MV_PREC 4 // Internal motion vector precision, 4 = 1/16 pel -//! Limits for prediction block sizes. 0 = 64x64, 4 = 4x4. +//! Limits for prediction block sizes. #define PU_DEPTH_INTER_MIN 0 -#define PU_DEPTH_INTER_MAX 3 +#define PU_DEPTH_INTER_MAX 8 #define PU_DEPTH_INTRA_MIN 0 -#define PU_DEPTH_INTRA_MAX 4 +#define PU_DEPTH_INTRA_MAX 8 //! Maximum number of layers in GOP structure (for allocating structures) #define MAX_GOP_LAYERS 6 diff --git a/src/intra.c b/src/intra.c index 09c66c13..439910f5 100644 --- a/src/intra.c +++ b/src/intra.c @@ -289,8 +289,15 @@ bool uvg_cclm_is_allowed(const encoder_state_t* const state, const cu_loc_t * co if (tree_type != UVG_CHROMA_T) { return true; } - uint32_t chroma_split = GET_SPLITDATA(cur_cu, 0); - if((chroma_split == BT_VER_SPLIT || chroma_split == TT_VER_SPLIT || chroma_split == TT_HOR_SPLIT) && GET_SPLITDATA(cur_cu, 1) == NO_SPLIT) return false; + uint32_t chroma_split_depth0 = GET_SPLITDATA(cur_cu, 0); + uint32_t chroma_split_depth1 = GET_SPLITDATA(cur_cu, 1); + bool allow = false; + if (chroma_split_depth0 == QT_SPLIT || (chroma_split_depth0 == BT_HOR_SPLIT && chroma_split_depth1 == BT_VER_SPLIT)) allow = true; + else if (chroma_split_depth0 == NO_SPLIT) allow = true; + else if (chroma_split_depth0 == BT_HOR_SPLIT && chroma_split_depth1 == NO_SPLIT) allow = true; + if (!allow) { + return false; + } const cu_info_t* const luma_cu = uvg_cu_array_at_const(state->tile->frame->cu_array, luma_loc->x, luma_loc->y); uint32_t split = GET_SPLITDATA(luma_cu, 0); if (split != QT_SPLIT && split != NO_SPLIT) { @@ -318,7 +325,7 @@ static void get_cclm_parameters( uvg_intra_ref* luma_src, uvg_intra_references*chroma_ref, int16_t *a, int16_t*b, int16_t*shift) { - const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX); + const int base_unit_size = 4; // TODO: take into account YUV422 const int unit_w = base_unit_size >> 1; diff --git a/src/search.c b/src/search.c index bd0d0cc5..696bd691 100644 --- a/src/search.c +++ b/src/search.c @@ -214,7 +214,7 @@ static void work_tree_copy_up( copy_cu_info (from, to, cu_loc, tree_type); copy_cu_pixels(from, to, cu_loc, cu_loc != chroma_loc && tree_type == UVG_LUMA_T ? UVG_LUMA_T : tree_type); copy_cu_coeffs(cu_loc, from, to, joint, cu_loc != chroma_loc && tree_type == UVG_LUMA_T ? UVG_LUMA_T : tree_type); - if (cu_loc != chroma_loc && tree_type != UVG_LUMA_T) { + if (chroma_loc && tree_type != UVG_LUMA_T) { copy_cu_pixels(from, to, chroma_loc, UVG_CHROMA_T); copy_cu_coeffs(chroma_loc, from, to, joint, UVG_CHROMA_T); } @@ -1170,7 +1170,7 @@ static double search_cu( recon_luma, recon_chroma); - if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 ) + if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) || tree_type == UVG_CHROMA_T) { intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; uvg_intra_recon_cu(state, @@ -1329,7 +1329,10 @@ static double search_cu( cabac_data_t best_split_cabac; memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) { - if (!can_split[split_type] || (tree_type == UVG_CHROMA_T && split_type == TT_HOR_SPLIT && cu_loc->chroma_height == 8)) continue; + if (!can_split[split_type] + || (tree_type == UVG_CHROMA_T && split_type == TT_HOR_SPLIT && cu_loc->chroma_height == 8) + || (tree_type == UVG_CHROMA_T && split_type == BT_HOR_SPLIT && cu_loc->chroma_height == 4)) + continue; split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1, @@ -1397,7 +1400,7 @@ static double search_cu( &new_cu_loc[split], separate_chroma ? chroma_loc : &new_cu_loc[split], &split_lcu[split_type -1], tree_type, new_split, - !separate_chroma || split == splits - 1); + !separate_chroma || (split == splits - 1 && has_chroma)); // If there is no separate chroma the block will always have chroma, otherwise it is the last block of the split that has the chroma if (split_cost > cost || split_cost > best_split_cost) { break; diff --git a/src/uvg266.h b/src/uvg266.h index 4adc7b7a..c0aac5a2 100644 --- a/src/uvg266.h +++ b/src/uvg266.h @@ -543,11 +543,11 @@ typedef struct uvg_config uint8_t dual_tree; - uint8_t min_qt_size[3]; /* intra, inter, dual tree chroma*/ - uint8_t max_bt_size[3]; - uint8_t max_tt_size[3]; + uint8_t min_qt_size[3]; /* intra, inter, dual tree chroma*/ + uint8_t max_bt_size[3]; /* intra, inter, dual tree chroma*/ + uint8_t max_tt_size[3]; /* intra, inter, dual tree chroma*/ - uint8_t max_btt_depth[3]; + uint8_t max_btt_depth[3]; /* intra, inter, dual tree chroma*/ } uvg_config; /**