From 31c5ff0f160eed9fa13093bc56f445fe5cf8c78b Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 28 Sep 2016 23:05:21 +0300 Subject: [PATCH] Add cross-platform core number detection Well, turns out pthread_num_processors_np isn't standard so we need to do this crap. Threw in hyper threading detection as a bonus. --- src/cfg.c | 4 +--- src/encoder.c | 20 ++++++++++++++++++++ src/strategyselector.c | 25 +++++++++++++++++++++++++ src/strategyselector.h | 5 +++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 5a02fadb..82dc6135 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -23,7 +23,6 @@ #include #include #include -#include kvz_config *kvz_config_alloc(void) @@ -99,8 +98,7 @@ int kvz_config_init(kvz_config *cfg) cfg->slice_addresses_in_ts = MALLOC(int32_t, 1); cfg->slice_addresses_in_ts[0] = 0; - // Set number of threads to equal number of processors available. - cfg->threads = pthread_num_processors_np(); + cfg->threads = -1; cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. diff --git a/src/encoder.c b/src/encoder.c index b686e158..0f5cb9fa 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -24,6 +24,7 @@ #include #include "cfg.h" +#include "strategyselector.h" static int encoder_control_init_gop_layer_weights(encoder_control_t * const); @@ -98,6 +99,21 @@ static int select_owf_auto(const kvz_config *const cfg) } } + +static unsigned cfg_num_threads(void) +{ + unsigned cpus = kvz_g_hardware_flags.physical_cpu_count; + unsigned fake_cpus = kvz_g_hardware_flags.logical_cpu_count - cpus; + + // Default to 4 if we don't know the number of CPUs. + if (cpus == 0) return 4; + + // 1.5 times the number of physical cores seems to be a good compromise + // when hyperthreading is available on Haswell. + return cpus + fake_cpus / 2; +} + + /** * \brief Allocate and initialize an encoder control structure. * @@ -112,6 +128,10 @@ encoder_control_t* kvz_encoder_control_init(kvz_config *const cfg) { goto init_failed; } + if (cfg->threads == -1) { + cfg->threads = cfg_num_threads(); + } + if (cfg->gop_len > 0) { if (cfg->tmvp_enable) { cfg->tmvp_enable = false; diff --git a/src/strategyselector.c b/src/strategyselector.c index 60819ca9..e3a49e5f 100644 --- a/src/strategyselector.c +++ b/src/strategyselector.c @@ -24,6 +24,15 @@ #include #include +#ifdef _WIN32 +#include +#elif MACOS +#include +#include +#else +#include +#endif + hardware_flags_t kvz_g_hardware_flags; hardware_flags_t kvz_g_strategies_in_use; hardware_flags_t kvz_g_strategies_available; @@ -410,6 +419,7 @@ static void set_hardware_flags(int32_t cpuid) { CPUID1_EDX_MMX = 1 << 23, CPUID1_EDX_SSE = 1 << 25, CPUID1_EDX_SSE2 = 1 << 26, + CPUID1_EDX_HYPER_THREADING = 1 << 28, }; enum { CPUID1_ECX_SSE3 = 1 << 0, @@ -430,6 +440,21 @@ static void set_hardware_flags(int32_t cpuid) { // Dig CPU features with cpuid get_cpuid(1, 0, &cpuid1); + +#ifdef _WIN32 + SYSTEM_INFO systeminfo; + GetSystemInfo(&systeminfo); + + kvz_g_hardware_flags.logical_cpu_count = systeminfo.dwNumberOfProcessors; +#else + kvz_g_hardware_flags.logical_cpu_count = sysconf(_SC_NPROCESSORS_ONLN); +#endif + + kvz_g_hardware_flags.physical_cpu_count = kvz_g_hardware_flags.logical_cpu_count; + kvz_g_hardware_flags.intel_flags.hyper_threading = cpuid1.edx & CPUID1_EDX_HYPER_THREADING; + if (kvz_g_hardware_flags.intel_flags.hyper_threading) { + kvz_g_hardware_flags.physical_cpu_count /= 2; + } // EDX if (cpuid1.edx & CPUID1_EDX_MMX) kvz_g_hardware_flags.intel_flags.mmx = 1; diff --git a/src/strategyselector.h b/src/strategyselector.h index 1eae4d5a..4c6819ea 100644 --- a/src/strategyselector.h +++ b/src/strategyselector.h @@ -63,6 +63,8 @@ typedef struct { int sse42; int avx; int avx2; + + bool hyper_threading; } intel_flags; struct { @@ -72,6 +74,9 @@ typedef struct { struct { int neon; } arm_flags; + + int logical_cpu_count; + int physical_cpu_count; } hardware_flags_t; extern hardware_flags_t kvz_g_hardware_flags;