Merge branch 'cpuid-fix'

This commit is contained in:
Ari Koivula 2015-07-09 11:40:46 +03:00
commit c94d91061c

View file

@ -172,31 +172,46 @@ static void* strategyselector_choose_for(const strategy_list_t * const strategie
#if COMPILE_INTEL #if COMPILE_INTEL
#if defined(__GNUC__) typedef struct {
unsigned int eax;
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
} cpuid_t;
// CPUID adapters for different compilers.
# if defined(__GNUC__)
#include <cpuid.h> #include <cpuid.h>
static INLINE int get_cpuid(unsigned int level, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info) {
return __get_cpuid(level, eax, ebx, ecx, edx); if (__get_cpuid_max(level & 0x80000000, NULL) < level) return 0;
__cpuid_count(level, sublevel, cpu_info->eax, cpu_info->ebx, cpu_info->ecx, cpu_info->edx);
return 1;
} }
#else # elif defined(_MSC_VER)
#include <intrin.h> #include <intrin.h>
//Adapter from __cpuid (VS) to __get_cpuid (GNU C). static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info) {
static INLINE int get_cpuid(unsigned int level, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { int vendor_info[4] = { 0, 0, 0, 0 };
int CPUInfo[4] = {*eax, *ebx, *ecx, *edx}; __cpuidex(vendor_info, 0, 0);
__cpuid(CPUInfo, 0);
// check if the CPU supports the cpuid instruction. // Check highest supported function.
if (CPUInfo[0] != 0) { if (level > vendor_info[0]) return 0;
__cpuid(CPUInfo, level);
*eax = CPUInfo[0]; int ms_cpu_info[4] = { cpu_info->eax, cpu_info->ebx, cpu_info->ecx, cpu_info->edx };
*ebx = CPUInfo[1]; __cpuidex(ms_cpu_info, level, sublevel);
*ecx = CPUInfo[2]; cpu_info->eax = ms_cpu_info[0];
*edx = CPUInfo[3]; cpu_info->ebx = ms_cpu_info[1];
return 1; cpu_info->ecx = ms_cpu_info[2];
} cpu_info->edx = ms_cpu_info[3];
return 1;
}
# else
static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info)
{
return 0; return 0;
} }
#endif //defined(__GNUC__) # endif
#endif // COMPILE_INTEL
#endif
#if COMPILE_POWERPC #if COMPILE_POWERPC
#include <unistd.h> #include <unistd.h>
@ -244,54 +259,68 @@ static void set_hardware_flags(int32_t cpuid) {
#if COMPILE_INTEL #if COMPILE_INTEL
if (cpuid) { if (cpuid) {
unsigned int eax = 0, ebx = 0, ecx = 0, edx =0; cpuid_t cpuid1 = { 0, 0, 0, 0 };
/* CPU feature bits */ /* CPU feature bits */
enum { BIT_SSE3 = 0, BIT_SSSE3 = 9, BIT_SSE41 = 19, BIT_SSE42 = 20, enum {
BIT_MMX = 24, BIT_SSE = 25, BIT_SSE2 = 26, CPUID1_EDX_MMX = 1 << 23,
BIT_OSXSAVE = 27, BIT_AVX = 28}; CPUID1_EDX_SSE = 1 << 25,
enum { XCR0_XMM = 1, XCR0_YMM = 2 }; CPUID1_EDX_SSE2 = 1 << 26,
};
enum {
CPUID1_ECX_SSE3 = 1 << 0,
CPUID1_ECX_SSSE3 = 1 << 9,
CPUID1_ECX_SSE41 = 1 << 19,
CPUID1_ECX_SSE42 = 1 << 20,
CPUID1_ECX_XSAVE = 1 << 26,
CPUID1_ECX_OSXSAVE = 1 << 27,
CPUID1_ECX_AVX = 1 << 28,
};
enum {
CPUID7_EBX_AVX2 = 1 << 5,
};
enum {
XGETBV_XCR0_XMM = 1 << 1,
XGETBV_XCR0_YMM = 1 << 2,
};
// Dig CPU features with cpuid // Dig CPU features with cpuid
get_cpuid(1, &eax, &ebx, &ecx, &edx); get_cpuid(1, 0, &cpuid1);
// EDX // EDX
if (edx & (1<<BIT_MMX)) g_hardware_flags.intel_flags.mmx = 1; if (cpuid1.edx & CPUID1_EDX_MMX) g_hardware_flags.intel_flags.mmx = 1;
if (edx & (1<<BIT_SSE)) g_hardware_flags.intel_flags.sse = 1; if (cpuid1.edx & CPUID1_EDX_SSE) g_hardware_flags.intel_flags.sse = 1;
if (edx & (1<<BIT_SSE2)) g_hardware_flags.intel_flags.sse2 = 1; if (cpuid1.edx & CPUID1_EDX_SSE2) g_hardware_flags.intel_flags.sse2 = 1;
// ECX // ECX
if (ecx & (1<<BIT_SSE3)) g_hardware_flags.intel_flags.sse3 = 1;; if (cpuid1.ecx & CPUID1_ECX_SSE3) g_hardware_flags.intel_flags.sse3 = 1;;
if (ecx & (1<<BIT_SSSE3)) g_hardware_flags.intel_flags.ssse3 = 1; if (cpuid1.ecx & CPUID1_ECX_SSSE3) g_hardware_flags.intel_flags.ssse3 = 1;
if (ecx & (1<<BIT_SSE41)) g_hardware_flags.intel_flags.sse41 = 1; if (cpuid1.ecx & CPUID1_ECX_SSE41) g_hardware_flags.intel_flags.sse41 = 1;
if (ecx & (1<<BIT_SSE42)) g_hardware_flags.intel_flags.sse42 = 1; if (cpuid1.ecx & CPUID1_ECX_SSE42) g_hardware_flags.intel_flags.sse42 = 1;
// Check hardware and OS support for AVX. // Check hardware and OS support for xsave and xgetbv.
if (ecx & (1 << BIT_OSXSAVE)) { if (cpuid1.ecx & (CPUID1_ECX_XSAVE | CPUID1_ECX_OSXSAVE)) {
uint64_t xcr0 = 0; uint64_t xcr0 = 0;
// Use _XCR_XFEATURE_ENABLED_MASK to check if _xgetbv intrinsic is // Use _XCR_XFEATURE_ENABLED_MASK to check if _xgetbv intrinsic is
// supported by the compiler. // supported by the compiler.
#ifdef _XCR_XFEATURE_ENABLED_MASK #ifdef _XCR_XFEATURE_ENABLED_MASK
xcr0 = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); xcr0 = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
#elif defined(__GNUC__) #elif defined(__GNUC__)
uint32_t geax = 0; unsigned eax = 0, edx = 0;
// Apparently there are some older assemblers that don't support xgetbv, asm("xgetbv" : "=a"(eax), "=d"(edx) : "c" (0));
// so we use the byte sequence for xgetbv just in case. xcr0 = (uint64_t)edx << 32 | eax;
//__asm__("xgetbv" : "=a" (geax), "=d" (gedx) : "c" (0));
__asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (geax) : "c" (0) : "edx");
// edx is spillover, but we don't care about those bits.
xcr0 = geax;
#endif #endif
bool avx_support = ecx & (1 << BIT_AVX) || false; bool avx_support = cpuid1.ecx & CPUID1_ECX_AVX || false;
bool xmm_support = xcr0 & (1 << XCR0_XMM); bool xmm_support = xcr0 & XGETBV_XCR0_XMM || false;
bool ymm_support = xcr0 & (1 << XCR0_YMM); bool ymm_support = xcr0 & XGETBV_XCR0_YMM || false;
if (avx_support && xmm_support && ymm_support) { if (avx_support && xmm_support && ymm_support) {
g_hardware_flags.intel_flags.avx = 1; g_hardware_flags.intel_flags.avx = 1;
} }
}
if (g_hardware_flags.intel_flags.avx) { if (g_hardware_flags.intel_flags.avx) {
get_cpuid(7, &eax, &ebx, &ecx, &edx); cpuid_t cpuid7 = { 0, 0, 0, 0 };
if (ebx & (1 << 5)) g_hardware_flags.intel_flags.avx2 = 1; get_cpuid(7, 0, &cpuid7);
if (cpuid7.ebx & CPUID7_EBX_AVX2) g_hardware_flags.intel_flags.avx2 = 1;
}
} }
} }