diff --git a/src/cfg.c b/src/cfg.c index 8d273499..f33ff87a 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -100,10 +100,14 @@ int kvz_config_init(kvz_config *cfg) cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. - cfg->pu_depth_inter.min = 2; // 0-3 - cfg->pu_depth_inter.max = 3; // 0-3 - cfg->pu_depth_intra.min = 2; // 0-4 - cfg->pu_depth_intra.max = 3; // 0-4 + memset( cfg->pu_depth_inter.min, -1, sizeof( cfg->pu_depth_inter.min ) ); + memset( cfg->pu_depth_inter.max, -1, sizeof( cfg->pu_depth_inter.max ) ); + memset( cfg->pu_depth_intra.min, -1, sizeof( cfg->pu_depth_intra.min ) ); + memset( cfg->pu_depth_intra.max, -1, sizeof( cfg->pu_depth_intra.max ) ); + *cfg->pu_depth_inter.min = 2; // 0-3 + *cfg->pu_depth_inter.max = 3; // 0-3 + *cfg->pu_depth_intra.min = 2; // 0-4 + *cfg->pu_depth_intra.max = 3; // 0-4 cfg->add_encoder_info = true; cfg->calc_psnr = true; @@ -307,6 +311,41 @@ static int parse_array(const char *array, uint8_t *coeff_key, int size, return 1; } +static int parse_pu_depth_list( const char *array, int32_t *depths_min, int32_t *depths_max, int size ) +{ + char *list = strdup( array ); + char *token; + int i = 0; + int ptr = -1; + int len = strlen( list ); + int retval = 1; + + token = strtok( list, "," ); + while( ptr < len && list[ptr + 1] == ',' ) + { + i++; + ptr++; + } + while( retval && token != NULL && i < size ) { + retval &= (sscanf( token, "%d-%d", &depths_min[i], &depths_max[i] ) == 2); + ptr += (retval ? 4 : 0); + i++; + token = strtok( NULL, "," ); + while(ptr < len && list[ptr + 1] == ',' ){ + i++; + ptr++; + } + } + + if( i >= size && ( token != NULL ) ) { + fprintf( stderr, "parsing failed : too many values.\n" ); + retval = 0; + } + + free( list ); + return retval; +} + static int parse_slice_specification(const char* const arg, int32_t * const nslices, int32_t** const array) { const char* current_arg = NULL; int32_t current_value; @@ -912,9 +951,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) else if OPT("cpuid") cfg->cpuid = atobool(value); else if OPT("pu-depth-inter") - return sscanf(value, "%d-%d", &cfg->pu_depth_inter.min, &cfg->pu_depth_inter.max) == 2; + return parse_pu_depth_list(value, cfg->pu_depth_inter.min, cfg->pu_depth_inter.max, KVZ_MAX_GOP_LENGTH); else if OPT("pu-depth-intra") - return sscanf(value, "%d-%d", &cfg->pu_depth_intra.min, &cfg->pu_depth_intra.max) == 2; + return parse_pu_depth_list(value, cfg->pu_depth_intra.min, cfg->pu_depth_intra.max, KVZ_MAX_GOP_LENGTH); else if OPT("info") cfg->add_encoder_info = atobool(value); else if OPT("gop") { @@ -1506,28 +1545,39 @@ int kvz_config_validate(const kvz_config *const cfg) error = 1; } - if (!WITHIN(cfg->pu_depth_inter.min, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX) || - !WITHIN(cfg->pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)) + for( size_t i = 0; i < MAX_GOP_LAYERS; i++ ) { - fprintf(stderr, "Input error: illegal value for --pu-depth-inter (%d-%d)\n", - cfg->pu_depth_inter.min, cfg->pu_depth_inter.max); - error = 1; - } else if (cfg->pu_depth_inter.min > cfg->pu_depth_inter.max) { - fprintf(stderr, "Input error: Inter PU depth min (%d) > max (%d)\n", - cfg->pu_depth_inter.min, cfg->pu_depth_inter.max); - error = 1; - } + if( cfg->pu_depth_inter.min[i] < 0 || cfg->pu_depth_inter.max[i] < 0 ) continue; - if (!WITHIN(cfg->pu_depth_intra.min, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX) || - !WITHIN(cfg->pu_depth_intra.max, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX)) - { - fprintf(stderr, "Input error: illegal value for --pu-depth-intra (%d-%d)\n", - cfg->pu_depth_intra.min, cfg->pu_depth_intra.max); - error = 1; - } else if (cfg->pu_depth_intra.min > cfg->pu_depth_intra.max) { - fprintf(stderr, "Input error: Intra PU depth min (%d) > max (%d)\n", - cfg->pu_depth_intra.min, cfg->pu_depth_intra.max); - error = 1; + if( !WITHIN( cfg->pu_depth_inter.min[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) || + !WITHIN( cfg->pu_depth_inter.max[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) ) + { + fprintf( stderr, "Input error: illegal value for --pu-depth-inter (%d-%d)\n", + cfg->pu_depth_inter.min[i], cfg->pu_depth_inter.max[i] ); + error = 1; + } + else if( cfg->pu_depth_inter.min[i] > cfg->pu_depth_inter.max[i] ) + { + fprintf( stderr, "Input error: Inter PU depth min (%d) > max (%d)\n", + cfg->pu_depth_inter.min[i], cfg->pu_depth_inter.max[i] ); + error = 1; + } + + if( cfg->pu_depth_intra.min[i] < 0 || cfg->pu_depth_intra.max[i] < 0 ) continue; + + if( !WITHIN( cfg->pu_depth_intra.min[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) || + !WITHIN( cfg->pu_depth_intra.max[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) ) + { + fprintf( stderr, "Input error: illegal value for --pu-depth-intra (%d-%d)\n", + cfg->pu_depth_intra.min[i], cfg->pu_depth_intra.max[i] ); + error = 1; + } + else if( cfg->pu_depth_intra.min[i] > cfg->pu_depth_intra.max[i] ) + { + fprintf( stderr, "Input error: Intra PU depth min (%d) > max (%d)\n", + cfg->pu_depth_intra.min[i], cfg->pu_depth_intra.max[i] ); + error = 1; + } } // Tile separation should be at round position in terms of LCU, should be monotonic, and should not start by 0 diff --git a/src/encoder.c b/src/encoder.c index f17060d1..10dfc30a 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -608,11 +608,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) #endif //KVZ_DEBUG } - assert(WITHIN(encoder->cfg.pu_depth_inter.min, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)); - assert(WITHIN(encoder->cfg.pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)); - assert(WITHIN(encoder->cfg.pu_depth_intra.min, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX)); - assert(WITHIN(encoder->cfg.pu_depth_intra.max, PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX)); + for( size_t i = 0; i < MAX_GOP_LAYERS; i++ ) + { + if( encoder->cfg.pu_depth_inter.min[i] < 0 || cfg->pu_depth_inter.max[i] < 0 ) continue; + assert( WITHIN( encoder->cfg.pu_depth_inter.min[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) ); + assert( WITHIN( encoder->cfg.pu_depth_inter.max[i], PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX ) ); + if( encoder->cfg.pu_depth_intra.min[i] < 0 || cfg->pu_depth_intra.max[i] < 0 ) continue; + assert( WITHIN( encoder->cfg.pu_depth_intra.min[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) ); + assert( WITHIN( encoder->cfg.pu_depth_intra.max[i], PU_DEPTH_INTRA_MIN, PU_DEPTH_INTRA_MAX ) ); + } // Disable in-loop filters, sign hiding and transform skip when using // lossless coding. if (encoder->cfg.lossless) { diff --git a/src/kvazaar.h b/src/kvazaar.h index bc4e657a..84994c11 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -308,8 +308,8 @@ typedef struct kvz_config int32_t cpuid; struct { - int32_t min; - int32_t max; + int32_t min[KVZ_MAX_GOP_LENGTH]; + int32_t max[KVZ_MAX_GOP_LENGTH]; } pu_depth_inter, pu_depth_intra; int32_t add_encoder_info; diff --git a/src/search.c b/src/search.c index 4673b3a6..5f9619c2 100644 --- a/src/search.c +++ b/src/search.c @@ -471,6 +471,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, return 0; } + int gop_layer = ctrl->cfg.gop_len != 1 ? ctrl->cfg.gop[state->frame->gop_offset].layer - 1 : 0; + // Assign correct depth limit constraint_t* constr = state->constraint; if(constr->ml_intra_depth_ctu) { @@ -478,11 +480,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, pu_depth_intra.max = constr->ml_intra_depth_ctu->_mat_lower_depth[(x_local >> 3) + (y_local >> 3) * 8]; } else { - pu_depth_intra.min = ctrl->cfg.pu_depth_intra.min; - pu_depth_intra.max = ctrl->cfg.pu_depth_intra.max; + pu_depth_intra.min = ctrl->cfg.pu_depth_intra.min[gop_layer] >= 0 ? ctrl->cfg.pu_depth_intra.min[gop_layer] : ctrl->cfg.pu_depth_intra.min[0]; + pu_depth_intra.max = ctrl->cfg.pu_depth_intra.max[gop_layer] >= 0 ? ctrl->cfg.pu_depth_intra.max[gop_layer] : ctrl->cfg.pu_depth_intra.max[0]; } - pu_depth_inter.min = ctrl->cfg.pu_depth_inter.min; - pu_depth_inter.max = ctrl->cfg.pu_depth_inter.max; + pu_depth_inter.min = ctrl->cfg.pu_depth_inter.min[gop_layer] >= 0 ? ctrl->cfg.pu_depth_inter.min[gop_layer] : ctrl->cfg.pu_depth_inter.min[0]; + pu_depth_inter.max = ctrl->cfg.pu_depth_inter.max[gop_layer] >= 0 ? ctrl->cfg.pu_depth_inter.max[gop_layer] : ctrl->cfg.pu_depth_inter.max[0]; cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); // Assign correct depth diff --git a/src/strategies/missing-intel-intrinsics.h b/src/strategies/missing-intel-intrinsics.h index 7dfab54d..f132f033 100644 --- a/src/strategies/missing-intel-intrinsics.h +++ b/src/strategies/missing-intel-intrinsics.h @@ -20,6 +20,9 @@ // Some Visual Studio headers apparently lack these pseudoinstructions #if COMPILE_INTEL_AVX2 + #ifndef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) _mm256_srli_si256((a), (imm8)) + #endif #ifndef _mm256_insert_epi32 #define _mm256_insert_epi32(a, i, index) (_mm256_blend_epi32((a), _mm256_set1_epi32(i), (1 << (index)))) #endif