| //! x86 run-time feature detection is OS independent. |
| |
| #[cfg(target_arch = "x86")] |
| use crate::arch::x86::*; |
| #[cfg(target_arch = "x86_64")] |
| use crate::arch::x86_64::*; |
| |
| use crate::mem; |
| |
| use crate::detect::{bit, cache, Feature}; |
| |
| /// Performs run-time feature detection. |
| #[inline] |
| pub fn check_for(x: Feature) -> bool { |
| cache::test(x as u32, detect_features) |
| } |
| |
| /// Run-time feature detection on x86 works by using the CPUID instruction. |
| /// |
| /// The [CPUID Wikipedia page][wiki_cpuid] contains |
| /// all the information about which flags to set to query which values, and in |
| /// which registers these are reported. |
| /// |
| /// The definitive references are: |
| /// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: |
| /// Instruction Set Reference, A-Z][intel64_ref]. |
| /// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and |
| /// System Instructions][amd64_ref]. |
| /// |
| /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID |
| /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
| /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf |
| #[allow(clippy::similar_names)] |
| fn detect_features() -> cache::Initializer { |
| let mut value = cache::Initializer::default(); |
| |
| // If the x86 CPU does not support the CPUID instruction then it is too |
| // old to support any of the currently-detectable features. |
| if !has_cpuid() { |
| return value; |
| } |
| |
| // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU |
| // has `cpuid` support. |
| |
| // 0. EAX = 0: Basic Information: |
| // - EAX returns the "Highest Function Parameter", that is, the maximum |
| // leaf value for subsequent calls of `cpuinfo` in range [0, |
| // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, |
| // returned in EBX, EDX, and ECX (in that order): |
| let (max_basic_leaf, vendor_id) = unsafe { |
| let CpuidResult { |
| eax: max_basic_leaf, |
| ebx, |
| ecx, |
| edx, |
| } = __cpuid(0); |
| let vendor_id: [[u8; 4]; 3] = [ |
| mem::transmute(ebx), |
| mem::transmute(edx), |
| mem::transmute(ecx), |
| ]; |
| let vendor_id: [u8; 12] = mem::transmute(vendor_id); |
| (max_basic_leaf, vendor_id) |
| }; |
| |
| if max_basic_leaf < 1 { |
| // Earlier Intel 486, CPUID not implemented |
| return value; |
| } |
| |
| // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; |
| // Contains information about most x86 features. |
| let CpuidResult { |
| ecx: proc_info_ecx, |
| edx: proc_info_edx, |
| .. |
| } = unsafe { __cpuid(0x0000_0001_u32) }; |
| |
| // EAX = 7, ECX = 0: Queries "Extended Features"; |
| // Contains information about bmi,bmi2, and avx2 support. |
| let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 { |
| let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; |
| (ebx, ecx) |
| } else { |
| (0, 0) // CPUID does not support "Extended Features" |
| }; |
| |
| // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported |
| // - EAX returns the max leaf value for extended information, that is, |
| // `cpuid` calls in range [0x8000_0000; u32::MAX]: |
| let CpuidResult { |
| eax: extended_max_basic_leaf, |
| .. |
| } = unsafe { __cpuid(0x8000_0000_u32) }; |
| |
| // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature |
| // Bits" |
| let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { |
| let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; |
| ecx |
| } else { |
| 0 |
| }; |
| |
| { |
| // borrows value till the end of this scope: |
| let mut enable = |r, rb, f| { |
| if bit::test(r as usize, rb) { |
| value.set(f as u32); |
| } |
| }; |
| |
| enable(proc_info_ecx, 0, Feature::sse3); |
| enable(proc_info_ecx, 1, Feature::pclmulqdq); |
| enable(proc_info_ecx, 9, Feature::ssse3); |
| enable(proc_info_ecx, 13, Feature::cmpxchg16b); |
| enable(proc_info_ecx, 19, Feature::sse4_1); |
| enable(proc_info_ecx, 20, Feature::sse4_2); |
| enable(proc_info_ecx, 23, Feature::popcnt); |
| enable(proc_info_ecx, 25, Feature::aes); |
| enable(proc_info_ecx, 29, Feature::f16c); |
| enable(proc_info_ecx, 30, Feature::rdrand); |
| enable(extended_features_ebx, 18, Feature::rdseed); |
| enable(extended_features_ebx, 19, Feature::adx); |
| enable(extended_features_ebx, 11, Feature::rtm); |
| enable(proc_info_edx, 4, Feature::tsc); |
| enable(proc_info_edx, 23, Feature::mmx); |
| enable(proc_info_edx, 24, Feature::fxsr); |
| enable(proc_info_edx, 25, Feature::sse); |
| enable(proc_info_edx, 26, Feature::sse2); |
| enable(extended_features_ebx, 29, Feature::sha); |
| |
| enable(extended_features_ebx, 3, Feature::bmi); |
| enable(extended_features_ebx, 8, Feature::bmi2); |
| |
| // `XSAVE` and `AVX` support: |
| let cpu_xsave = bit::test(proc_info_ecx as usize, 26); |
| if cpu_xsave { |
| // 0. Here the CPU supports `XSAVE`. |
| |
| // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and |
| // supports saving the state of the AVX/AVX2 vector registers on |
| // context-switches, see: |
| // |
| // - [intel: is avx enabled?][is_avx_enabled], |
| // - [mozilla: sse.cpp][mozilla_sse_cpp]. |
| // |
| // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled |
| // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 |
| let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); |
| |
| if cpu_osxsave { |
| // 2. The OS must have signaled the CPU that it supports saving and |
| // restoring the: |
| // |
| // * SSE -> `XCR0.SSE[1]` |
| // * AVX -> `XCR0.AVX[2]` |
| // * AVX-512 -> `XCR0.AVX-512[7:5]`. |
| // |
| // by setting the corresponding bits of `XCR0` to `1`. |
| // |
| // This is safe because the CPU supports `xsave` |
| // and the OS has set `osxsave`. |
| let xcr0 = unsafe { _xgetbv(0) }; |
| // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: |
| let os_avx_support = xcr0 & 6 == 6; |
| // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`: |
| let os_avx512_support = xcr0 & 224 == 224; |
| |
| // Only if the OS and the CPU support saving/restoring the AVX |
| // registers we enable `xsave` support: |
| if os_avx_support { |
| // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED |
| // FEATURES" in the "Intel® 64 and IA-32 Architectures Software |
| // Developer’s Manual, Volume 1: Basic Architecture": |
| // |
| // "Software enables the XSAVE feature set by setting |
| // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 |
| // instruction). If this bit is 0, execution of any of XGETBV, |
| // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV |
| // causes an invalid-opcode exception (#UD)" |
| // |
| enable(proc_info_ecx, 26, Feature::xsave); |
| |
| // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: |
| // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, |
| // ECX = 1): |
| if max_basic_leaf >= 0xd { |
| let CpuidResult { |
| eax: proc_extended_state1_eax, |
| .. |
| } = unsafe { __cpuid_count(0xd_u32, 1) }; |
| enable(proc_extended_state1_eax, 0, Feature::xsaveopt); |
| enable(proc_extended_state1_eax, 1, Feature::xsavec); |
| enable(proc_extended_state1_eax, 3, Feature::xsaves); |
| } |
| |
| // FMA (uses 256-bit wide registers): |
| enable(proc_info_ecx, 12, Feature::fma); |
| |
| // And AVX/AVX2: |
| enable(proc_info_ecx, 28, Feature::avx); |
| enable(extended_features_ebx, 5, Feature::avx2); |
| |
| // For AVX-512 the OS also needs to support saving/restoring |
| // the extended state, only then we enable AVX-512 support: |
| if os_avx512_support { |
| enable(extended_features_ebx, 16, Feature::avx512f); |
| enable(extended_features_ebx, 17, Feature::avx512dq); |
| enable(extended_features_ebx, 21, Feature::avx512_ifma); |
| enable(extended_features_ebx, 26, Feature::avx512pf); |
| enable(extended_features_ebx, 27, Feature::avx512er); |
| enable(extended_features_ebx, 28, Feature::avx512cd); |
| enable(extended_features_ebx, 30, Feature::avx512bw); |
| enable(extended_features_ebx, 31, Feature::avx512vl); |
| enable(extended_features_ecx, 1, Feature::avx512_vbmi); |
| enable(extended_features_ecx, 14, Feature::avx512_vpopcntdq); |
| } |
| } |
| } |
| } |
| |
| // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. |
| // On intel CPUs with popcnt, lzcnt implements the |
| // "missing part" of ABM, so we map both to the same |
| // internal feature. |
| // |
| // The `is_x86_feature_detected!("lzcnt")` macro then |
| // internally maps to Feature::abm. |
| enable(extended_proc_info_ecx, 5, Feature::abm); |
| // As Hygon Dhyana originates from AMD technology and shares most of the architecture with |
| // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series |
| // number(Family 18h). |
| // |
| // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD |
| // family 17h. |
| // |
| // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. |
| // Related Hygon kernel patch can be found on |
| // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn |
| if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { |
| // These features are available on AMD arch CPUs: |
| enable(extended_proc_info_ecx, 6, Feature::sse4a); |
| enable(extended_proc_info_ecx, 21, Feature::tbm); |
| } |
| } |
| |
| value |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| extern crate cupid; |
| |
| #[test] |
| fn dump() { |
| println!("aes: {:?}", is_x86_feature_detected!("aes")); |
| println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq")); |
| println!("rdrand: {:?}", is_x86_feature_detected!("rdrand")); |
| println!("rdseed: {:?}", is_x86_feature_detected!("rdseed")); |
| println!("tsc: {:?}", is_x86_feature_detected!("tsc")); |
| println!("sse: {:?}", is_x86_feature_detected!("sse")); |
| println!("sse2: {:?}", is_x86_feature_detected!("sse2")); |
| println!("sse3: {:?}", is_x86_feature_detected!("sse3")); |
| println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); |
| println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); |
| println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); |
| println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); |
| println!("sha: {:?}", is_x86_feature_detected!("sha")); |
| println!("avx: {:?}", is_x86_feature_detected!("avx")); |
| println!("avx2: {:?}", is_x86_feature_detected!("avx2")); |
| println!("avx512f {:?}", is_x86_feature_detected!("avx512f")); |
| println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd")); |
| println!("avx512er {:?}", is_x86_feature_detected!("avx512er")); |
| println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf")); |
| println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw")); |
| println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq")); |
| println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl")); |
| println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma")); |
| println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi")); |
| println!( |
| "avx512_vpopcntdq {:?}", |
| is_x86_feature_detected!("avx512vpopcntdq") |
| ); |
| println!("fma: {:?}", is_x86_feature_detected!("fma")); |
| println!("abm: {:?}", is_x86_feature_detected!("abm")); |
| println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); |
| println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); |
| println!("tbm: {:?}", is_x86_feature_detected!("tbm")); |
| println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); |
| println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); |
| println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); |
| println!("xsave: {:?}", is_x86_feature_detected!("xsave")); |
| println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); |
| println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); |
| println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); |
| println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); |
| println!("adx: {:?}", is_x86_feature_detected!("adx")); |
| println!("rtm: {:?}", is_x86_feature_detected!("rtm")); |
| } |
| |
| #[test] |
| fn compare_with_cupid() { |
| let information = cupid::master().unwrap(); |
| assert_eq!(is_x86_feature_detected!("aes"), information.aesni()); |
| assert_eq!( |
| is_x86_feature_detected!("pclmulqdq"), |
| information.pclmulqdq() |
| ); |
| assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand()); |
| assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed()); |
| assert_eq!(is_x86_feature_detected!("tsc"), information.tsc()); |
| assert_eq!(is_x86_feature_detected!("sse"), information.sse()); |
| assert_eq!(is_x86_feature_detected!("sse2"), information.sse2()); |
| assert_eq!(is_x86_feature_detected!("sse3"), information.sse3()); |
| assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3()); |
| assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1()); |
| assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2()); |
| assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a()); |
| assert_eq!(is_x86_feature_detected!("sha"), information.sha()); |
| assert_eq!(is_x86_feature_detected!("avx"), information.avx()); |
| assert_eq!(is_x86_feature_detected!("avx2"), information.avx2()); |
| assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f()); |
| assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd()); |
| assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er()); |
| assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf()); |
| assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw()); |
| assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq()); |
| assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl()); |
| assert_eq!( |
| is_x86_feature_detected!("avx512ifma"), |
| information.avx512_ifma() |
| ); |
| assert_eq!( |
| is_x86_feature_detected!("avx512vbmi"), |
| information.avx512_vbmi() |
| ); |
| assert_eq!( |
| is_x86_feature_detected!("avx512vpopcntdq"), |
| information.avx512_vpopcntdq() |
| ); |
| assert_eq!(is_x86_feature_detected!("fma"), information.fma()); |
| assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1()); |
| assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2()); |
| assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt()); |
| assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt()); |
| assert_eq!(is_x86_feature_detected!("tbm"), information.tbm()); |
| assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt()); |
| assert_eq!(is_x86_feature_detected!("xsave"), information.xsave()); |
| assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt()); |
| assert_eq!( |
| is_x86_feature_detected!("xsavec"), |
| information.xsavec_and_xrstor() |
| ); |
| assert_eq!( |
| is_x86_feature_detected!("xsaves"), |
| information.xsaves_xrstors_and_ia32_xss() |
| ); |
| assert_eq!( |
| is_x86_feature_detected!("cmpxchg16b"), |
| information.cmpxchg16b(), |
| ); |
| assert_eq!(is_x86_feature_detected!("adx"), information.adx(),); |
| assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),); |
| } |
| } |