diff --git a/README.md b/README.md index ccc0c7f9..37f12ee3 100644 --- a/README.md +++ b/README.md @@ -196,8 +196,16 @@ Compression tools: - 4: + 1/4-pixel diagonal --pu-depth-inter - : Inter prediction units sizes [0-3] - 0, 1, 2, 3: from 64x64 to 8x8 + - Accepts a list of values separated by ',' + for setting separate depths per GOP layer + (values can be omitted to use the first + value for the respective layer). --pu-depth-intra - : Intra prediction units sizes [1-4] - 0, 1, 2, 3, 4: from 64x64 to 4x4 + - Accepts a list of values separated by ',' + for setting separate depths per GOP layer + (values can be omitted to use the first + value for the respective layer). --ml-pu-depth-intra : Predict the pu-depth-intra using machine learning trees, overrides the --pu-depth-intra parameter. [disabled] diff --git a/doc/kvazaar.1 b/doc/kvazaar.1 index 362db5d3..56450655 100644 --- a/doc/kvazaar.1 +++ b/doc/kvazaar.1 @@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "February 2020" "kvazaar v1.3.0" "User Commands" +.TH KVAZAAR "1" "March 2020" "kvazaar v1.3.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -254,10 +254,18 @@ Fractional pixel motion estimation level [4] \fB\-\-pu\-depth\-inter \- Inter prediction units sizes [0\-3] \- 0, 1, 2, 3: from 64x64 to 8x8 + \- Accepts a list of values separated by ',' + for setting separate depths per GOP layer + (values can be omitted to use the first + value for the respective layer). .TP \fB\-\-pu\-depth\-intra \- Intra prediction units sizes [1\-4] \- 0, 1, 2, 3, 4: from 64x64 to 4x4 + \- Accepts a list of values separated by ',' + for setting separate depths per GOP layer + (values can be omitted to use the first + value for the respective layer). .TP \fB\-\-ml\-pu\-depth\-intra Predict the pu\-depth\-intra using machine diff --git a/src/cfg.c b/src/cfg.c index 753d44ca..efc58db9 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -100,10 +100,14 @@ int kvz_config_init(kvz_config *cfg) cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. - cfg->pu_depth_inter.min = 2; // 0-3 - cfg->pu_depth_inter.max = 3; // 0-3 - cfg->pu_depth_intra.min = 2; // 0-4 - cfg->pu_depth_intra.max = 3; // 0-4 + memset( cfg->pu_depth_inter.min, -1, sizeof( cfg->pu_depth_inter.min ) ); + memset( cfg->pu_depth_inter.max, -1, sizeof( cfg->pu_depth_inter.max ) ); + memset( cfg->pu_depth_intra.min, -1, sizeof( cfg->pu_depth_intra.min ) ); + memset( cfg->pu_depth_intra.max, -1, sizeof( cfg->pu_depth_intra.max ) ); + *cfg->pu_depth_inter.min = 2; // 0-3 + *cfg->pu_depth_inter.max = 3; // 0-3 + *cfg->pu_depth_intra.min = 2; // 0-4 + *cfg->pu_depth_intra.max = 3; // 0-4 cfg->add_encoder_info = true; cfg->calc_psnr = true; @@ -312,6 +316,41 @@ static int parse_array(const char *array, uint8_t *coeff_key, int size, return 1; } +static int parse_pu_depth_list( const char *array, int32_t *depths_min, int32_t *depths_max, int size ) +{ + char *list = strdup( array ); + char *token; + int i = 0; + int ptr = -1; + int len = strlen( list ); + int retval = 1; + + token = strtok( list, "," ); + while( ptr < len && list[ptr + 1] == ',' ) + { + i++; + ptr++; + } + while( retval && token != NULL && i < size ) { + retval &= (sscanf( token, "%d-%d", &depths_min[i], &depths_max[i] ) == 2); + ptr += (retval ? 4 : 0); + i++; + token = strtok( NULL, "," ); + while(ptr < len && list[ptr + 1] == ',' ){ + i++; + ptr++; + } + } + + if( i >= size && ( token != NULL ) ) { + fprintf( stderr, "parsing failed : too many values.\n" ); + retval = 0; + } + + free( list ); + return retval; +} + static int parse_slice_specification(const char* const arg, int32_t * const nslices, int32_t** const array) { const char* current_arg = NULL; int32_t current_value; @@ -919,9 +958,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) else if OPT("cpuid") cfg->cpuid = atobool(value); else if OPT("pu-depth-inter") - return sscanf(value, "%d-%d", &cfg->pu_depth_inter.min, &cfg->pu_depth_inter.max) == 2; + return parse_pu_depth_list(value, cfg->pu_depth_inter.min, cfg->pu_depth_inter.max, KVZ_MAX_GOP_LAYERS); else if OPT("pu-depth-intra") - return sscanf(value, "%d-%d", &cfg->pu_depth_intra.min, &cfg->pu_depth_intra.max) == 2; + return parse_pu_depth_list(value, cfg->pu_depth_intra.min, cfg->pu_depth_intra.max, KVZ_MAX_GOP_LAYERS); else if OPT("info") cfg->add_encoder_info = atobool(value); else if OPT("gop") { @@ -1541,28 +1580,39 @@ int kvz_config_validate(const kvz_config *const cfg) error = 1; } - if (!WITHIN(cfg->pu_depth_inter.min, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX) || - !WITHIN(cfg->pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)) + for( size_t i = 0; i < KVZ_MAX_GOP_LAYERS; i++ ) { - fprintf(stderr, "Input error: illegal value for --pu-depth-inter (%d-%d)\n", - cfg->pu_depth_inter.min, cfg->pu_depth_inter.max); - error = 1; - } else if (cfg->pu_depth_inter.min > cfg->pu_depth_inter.max) { - fprintf(stderr, "Input error: Inter PU depth min (%d) > max (%d)\n", - cfg->pu_depth_inter.min, cfg->pu_depth_inter.max); - error = 1; - } + if( cfg->pu_depth_inter.min[i] < 0 || cfg->pu_depth_inter.max[i] < 0 ) continue; - if (!WITHIN(cfg->pu_depth_intra.min, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX) || - !WITHIN(cfg->pu_depth_intra.max, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX)) - { - fprintf(stderr, "Input error: illegal value for --pu-depth-intra (%d-%d)\n", - cfg->pu_depth_intra.min, cfg->pu_depth_intra.max); - error = 1; - } else if (cfg->pu_depth_intra.min > cfg->pu_depth_intra.max) { - fprintf(stderr, "Input error: Intra PU depth min (%d) > max (%d)\n", - cfg->pu_depth_intra.min, cfg->pu_depth_intra.max); - error = 1; + if( !WITHIN( cfg->pu_depth_inter.min[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) || + !WITHIN( cfg->pu_depth_inter.max[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) ) + { + fprintf( stderr, "Input error: illegal value for --pu-depth-inter (%d-%d)\n", + cfg->pu_depth_inter.min[i], cfg->pu_depth_inter.max[i] ); + error = 1; + } + else if( cfg->pu_depth_inter.min[i] > cfg->pu_depth_inter.max[i] ) + { + fprintf( stderr, "Input error: Inter PU depth min (%d) > max (%d)\n", + cfg->pu_depth_inter.min[i], cfg->pu_depth_inter.max[i] ); + error = 1; + } + + if( cfg->pu_depth_intra.min[i] < 0 || cfg->pu_depth_intra.max[i] < 0 ) continue; + + if( !WITHIN( cfg->pu_depth_intra.min[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) || + !WITHIN( cfg->pu_depth_intra.max[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) ) + { + fprintf( stderr, "Input error: illegal value for --pu-depth-intra (%d-%d)\n", + cfg->pu_depth_intra.min[i], cfg->pu_depth_intra.max[i] ); + error = 1; + } + else if( cfg->pu_depth_intra.min[i] > cfg->pu_depth_intra.max[i] ) + { + fprintf( stderr, "Input error: Intra PU depth min (%d) > max (%d)\n", + cfg->pu_depth_intra.min[i], cfg->pu_depth_intra.max[i] ); + error = 1; + } } // Tile separation should be at round position in terms of LCU, should be monotonic, and should not start by 0 diff --git a/src/cli.c b/src/cli.c index 5b74aaa3..a628abc6 100644 --- a/src/cli.c +++ b/src/cli.c @@ -499,8 +499,16 @@ void print_help(void) " - 4: + 1/4-pixel diagonal\n" " --pu-depth-inter - : Inter prediction units sizes [0-3]\n" " - 0, 1, 2, 3: from 64x64 to 8x8\n" + " - Accepts a list of values separated by ','\n" + " for setting separate depths per GOP layer\n" + " (values can be omitted to use the first\n" + " value for the respective layer).\n" " --pu-depth-intra - : Intra prediction units sizes [1-4]\n" " - 0, 1, 2, 3, 4: from 64x64 to 4x4\n" + " - Accepts a list of values separated by ','\n" + " for setting separate depths per GOP layer\n" + " (values can be omitted to use the first\n" + " value for the respective layer).\n" " --ml-pu-depth-intra : Predict the pu-depth-intra using machine\n" " learning trees, overrides the\n" " --pu-depth-intra parameter. [disabled]\n" diff --git a/src/encoder.c b/src/encoder.c index 3b7a7e75..ffddfe02 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -608,11 +608,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) #endif //KVZ_DEBUG } - assert(WITHIN(encoder->cfg.pu_depth_inter.min, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)); - assert(WITHIN(encoder->cfg.pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)); - assert(WITHIN(encoder->cfg.pu_depth_intra.min, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX)); - assert(WITHIN(encoder->cfg.pu_depth_intra.max, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX)); + for( size_t i = 0; i < KVZ_MAX_GOP_LAYERS; i++ ) + { + if( encoder->cfg.pu_depth_inter.min[i] < 0 || cfg->pu_depth_inter.max[i] < 0 ) continue; + assert( WITHIN( encoder->cfg.pu_depth_inter.min[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) ); + assert( WITHIN( encoder->cfg.pu_depth_inter.max[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) ); + if( encoder->cfg.pu_depth_intra.min[i] < 0 || cfg->pu_depth_intra.max[i] < 0 ) continue; + assert( WITHIN( encoder->cfg.pu_depth_intra.min[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) ); + assert( WITHIN( encoder->cfg.pu_depth_intra.max[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) ); + } // Disable in-loop filters, sign hiding and transform skip when using // lossless coding. if (encoder->cfg.lossless) { diff --git a/src/kvazaar.h b/src/kvazaar.h index 6dbb50e8..8792cbc6 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -64,11 +64,11 @@ extern "C" { */ #define KVZ_MAX_GOP_LENGTH 32 - -/** -* Maximum amount of GoP layers. -*/ + /** + * Maximum amount of GoP layers. + */ #define KVZ_MAX_GOP_LAYERS 6 + /** * Size of data chunks. */ @@ -319,8 +319,8 @@ typedef struct kvz_config int32_t cpuid; struct { - int32_t min; - int32_t max; + int32_t min[KVZ_MAX_GOP_LAYERS]; + int32_t max[KVZ_MAX_GOP_LAYERS]; } pu_depth_inter, pu_depth_intra; int32_t add_encoder_info; diff --git a/src/search.c b/src/search.c index de03f0e5..a09dacb4 100644 --- a/src/search.c +++ b/src/search.c @@ -471,6 +471,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, return 0; } + int gop_layer = ctrl->cfg.gop_len != 0 ? ctrl->cfg.gop[state->frame->gop_offset].layer - 1 : 0; + // Assign correct depth limit constraint_t* constr = state->constraint; if(constr->ml_intra_depth_ctu) { @@ -478,11 +480,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, pu_depth_intra.max = constr->ml_intra_depth_ctu->_mat_lower_depth[(x_local >> 3) + (y_local >> 3) * 8]; } else { - pu_depth_intra.min = ctrl->cfg.pu_depth_intra.min; - pu_depth_intra.max = ctrl->cfg.pu_depth_intra.max; + pu_depth_intra.min = ctrl->cfg.pu_depth_intra.min[gop_layer] >= 0 ? ctrl->cfg.pu_depth_intra.min[gop_layer] : ctrl->cfg.pu_depth_intra.min[0]; + pu_depth_intra.max = ctrl->cfg.pu_depth_intra.max[gop_layer] >= 0 ? ctrl->cfg.pu_depth_intra.max[gop_layer] : ctrl->cfg.pu_depth_intra.max[0]; } - pu_depth_inter.min = ctrl->cfg.pu_depth_inter.min; - pu_depth_inter.max = ctrl->cfg.pu_depth_inter.max; + pu_depth_inter.min = ctrl->cfg.pu_depth_inter.min[gop_layer] >= 0 ? ctrl->cfg.pu_depth_inter.min[gop_layer] : ctrl->cfg.pu_depth_inter.min[0]; + pu_depth_inter.max = ctrl->cfg.pu_depth_inter.max[gop_layer] >= 0 ? ctrl->cfg.pu_depth_inter.max[gop_layer] : ctrl->cfg.pu_depth_inter.max[0]; cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); // Assign correct depth diff --git a/src/strategies/missing-intel-intrinsics.h b/src/strategies/missing-intel-intrinsics.h index 7dfab54d..f132f033 100644 --- a/src/strategies/missing-intel-intrinsics.h +++ b/src/strategies/missing-intel-intrinsics.h @@ -20,6 +20,9 @@ // Some Visual Studio headers apparently lack these pseudoinstructions #if COMPILE_INTEL_AVX2 + #ifndef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) _mm256_srli_si256((a), (imm8)) + #endif #ifndef _mm256_insert_epi32 #define _mm256_insert_epi32(a, i, index) (_mm256_blend_epi32((a), _mm256_set1_epi32(i), (1 << (index)))) #endif diff --git a/tests/Makefile.am b/tests/Makefile.am index e0a6b77e..78a0d220 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -11,7 +11,8 @@ TESTS = $(check_PROGRAMS) \ test_slices.sh \ test_smp.sh \ test_tools.sh \ - test_weird_shapes.sh + test_weird_shapes.sh \ + test_pu_depth_constraints.sh EXTRA_DIST = \ test_external_symbols.sh \ @@ -26,6 +27,7 @@ EXTRA_DIST = \ test_smp.sh \ test_tools.sh \ test_weird_shapes.sh \ + test_pu_depth_constraints.sh \ util.sh check_PROGRAMS = kvazaar_tests diff --git a/tests/test_pu_depth_constraints.sh b/tests/test_pu_depth_constraints.sh new file mode 100755 index 00000000..ba79c043 --- /dev/null +++ b/tests/test_pu_depth_constraints.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +# Test pu depth constraints. + +set -eu +. "${0%/*}/util.sh" + +common_args='264x130 8 --preset=ultrafast --gop=8' + +# Default +valgrind_test $common_args +valgrind_test $common_args --pu-depth-inter=1-3 +valgrind_test $common_args --pu-depth-intra=1-3 +valgrind_test $common_args --pu-depth-inter=1-3,2-3 +valgrind_test $common_args --pu-depth-intra=1-3,2-3 +valgrind_test $common_args --pu-depth-inter=,1-3,,,2-3,2-2 +valgrind_test $common_args --pu-depth-intra=,1-3,,,2-3,2-2 + +# Test invalid input +encode_test 264x130 1 1 --pu-depth-intra=1-2,,1-3,1-3,,,1-1 +encode_test 264x130 1 1 --pu-depth-inter=1-2,,1-3,1-3,,,1-1 +