Add cross-platform core number detection

Well, turns out pthread_num_processors_np isn't standard so we need to
do this crap. Threw in hyper threading detection as a bonus.
This commit is contained in:
Ari Koivula 2016-09-28 23:05:21 +03:00
parent 8c7351eac8
commit 31c5ff0f16
4 changed files with 51 additions and 3 deletions

View file

@ -23,7 +23,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
kvz_config *kvz_config_alloc(void)
@ -99,8 +98,7 @@ int kvz_config_init(kvz_config *cfg)
cfg->slice_addresses_in_ts = MALLOC(int32_t, 1);
cfg->slice_addresses_in_ts[0] = 0;
// Set number of threads to equal number of processors available.
cfg->threads = pthread_num_processors_np();
cfg->threads = -1;
cfg->cpuid = 1;
// Defaults for what sizes of PUs are tried.

View file

@ -24,6 +24,7 @@
#include <stdlib.h>
#include "cfg.h"
#include "strategyselector.h"
static int encoder_control_init_gop_layer_weights(encoder_control_t * const);
@ -98,6 +99,21 @@ static int select_owf_auto(const kvz_config *const cfg)
}
}
static unsigned cfg_num_threads(void)
{
unsigned cpus = kvz_g_hardware_flags.physical_cpu_count;
unsigned fake_cpus = kvz_g_hardware_flags.logical_cpu_count - cpus;
// Default to 4 if we don't know the number of CPUs.
if (cpus == 0) return 4;
// 1.5 times the number of physical cores seems to be a good compromise
// when hyperthreading is available on Haswell.
return cpus + fake_cpus / 2;
}
/**
* \brief Allocate and initialize an encoder control structure.
*
@ -112,6 +128,10 @@ encoder_control_t* kvz_encoder_control_init(kvz_config *const cfg) {
goto init_failed;
}
if (cfg->threads == -1) {
cfg->threads = cfg_num_threads();
}
if (cfg->gop_len > 0) {
if (cfg->tmvp_enable) {
cfg->tmvp_enable = false;

View file

@ -24,6 +24,15 @@
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <windows.h>
#elif MACOS
#include <sys/param.h>
#include <sys/sysctl.h>
#else
#include <unistd.h>
#endif
hardware_flags_t kvz_g_hardware_flags;
hardware_flags_t kvz_g_strategies_in_use;
hardware_flags_t kvz_g_strategies_available;
@ -410,6 +419,7 @@ static void set_hardware_flags(int32_t cpuid) {
CPUID1_EDX_MMX = 1 << 23,
CPUID1_EDX_SSE = 1 << 25,
CPUID1_EDX_SSE2 = 1 << 26,
CPUID1_EDX_HYPER_THREADING = 1 << 28,
};
enum {
CPUID1_ECX_SSE3 = 1 << 0,
@ -431,6 +441,21 @@ static void set_hardware_flags(int32_t cpuid) {
// Dig CPU features with cpuid
get_cpuid(1, 0, &cpuid1);
#ifdef _WIN32
SYSTEM_INFO systeminfo;
GetSystemInfo(&systeminfo);
kvz_g_hardware_flags.logical_cpu_count = systeminfo.dwNumberOfProcessors;
#else
kvz_g_hardware_flags.logical_cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
#endif
kvz_g_hardware_flags.physical_cpu_count = kvz_g_hardware_flags.logical_cpu_count;
kvz_g_hardware_flags.intel_flags.hyper_threading = cpuid1.edx & CPUID1_EDX_HYPER_THREADING;
if (kvz_g_hardware_flags.intel_flags.hyper_threading) {
kvz_g_hardware_flags.physical_cpu_count /= 2;
}
// EDX
if (cpuid1.edx & CPUID1_EDX_MMX) kvz_g_hardware_flags.intel_flags.mmx = 1;
if (cpuid1.edx & CPUID1_EDX_SSE) kvz_g_hardware_flags.intel_flags.sse = 1;

View file

@ -63,6 +63,8 @@ typedef struct {
int sse42;
int avx;
int avx2;
bool hyper_threading;
} intel_flags;
struct {
@ -72,6 +74,9 @@ typedef struct {
struct {
int neon;
} arm_flags;
int logical_cpu_count;
int physical_cpu_count;
} hardware_flags_t;
extern hardware_flags_t kvz_g_hardware_flags;