| // SPDX-License-Identifier: Apache-2.0 OR MIT |
| |
| // Atomic{I,U}128 implementation on PowerPC64. |
| // |
| // powerpc64 on pwr8+ support 128-bit atomics: |
| // https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 |
| // https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll |
| // https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll |
| // |
| // powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/PowerPC/PPC.td#L663 |
| // See also https://github.com/rust-lang/rust/issues/59932 |
| // |
| // Note that we do not separate LL and SC into separate functions, but handle |
| // them within a single asm block. This is because it is theoretically possible |
| // for the compiler to insert operations that might clear the reservation between |
| // LL and SC. See aarch64.rs for details. |
| // |
| // Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use |
| // this module and use intrinsics.rs instead. |
| // |
| // Refs: |
| // - Power ISA https://openpowerfoundation.org/specifications/isa |
| // - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference |
| // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit |
| // |
| // Generated asm: |
| // - powerpc64 (pwr8) https://godbolt.org/z/nG5dGa38a |
| // - powerpc64le https://godbolt.org/z/6c99s75e4 |
| |
| include!("macros.rs"); |
| |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| #[path = "../fallback/outline_atomics.rs"] |
| mod fallback; |
| |
| // On musl with static linking, it seems that getauxval is not always available. |
| // See detect/auxv.rs for more. |
| #[cfg(not(portable_atomic_no_outline_atomics))] |
| #[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default |
| #[cfg(any( |
| test, |
| not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )), |
| ))] |
| #[cfg(any( |
| all( |
| target_os = "linux", |
| any( |
| target_env = "gnu", |
| all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")), |
| portable_atomic_outline_atomics, |
| ), |
| ), |
| target_os = "android", |
| target_os = "freebsd", |
| ))] |
| #[path = "detect/auxv.rs"] |
| mod detect; |
| |
| use core::{arch::asm, sync::atomic::Ordering}; |
| |
| use crate::utils::{Pair, U128}; |
| |
| macro_rules! debug_assert_pwr8 { |
| () => { |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| { |
| debug_assert!(detect::detect().has_quadword_atomics()); |
| } |
| }; |
| } |
| |
| // Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op |
| // |
| // This is similar to #[target_feature(enable = "quadword-atomics")], except that there are |
| // no compiler guarantees regarding (un)inlining, and the scope is within an asm |
| // block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")] |
| // is not supported as of Rust 1.70-nightly. |
| // |
| // start_pwr8 and end_pwr8 must be used in pairs. |
| // |
| // Note: If power8 instructions are not available at compile-time, we must guarantee that |
| // the function that uses it is not inlined into a function where it is not |
| // clear whether power8 instructions are available. Otherwise, (even if we checked whether |
| // power8 instructions are available at run-time) optimizations that reorder its |
| // instructions across the if condition might introduce undefined behavior. |
| // (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts) |
| // However, our code uses the ifunc helper macro that works with function pointers, |
| // so we don't have to worry about this unless calling without helper macro. |
| macro_rules! start_pwr8 { |
| () => { |
| ".machine push\n.machine power8" |
| }; |
| } |
| macro_rules! end_pwr8 { |
| () => { |
| ".machine pop" |
| }; |
| } |
| |
| macro_rules! atomic_rmw { |
| ($op:ident, $order:ident) => { |
| match $order { |
| Ordering::Relaxed => $op!("", ""), |
| Ordering::Acquire => $op!("lwsync", ""), |
| Ordering::Release => $op!("", "lwsync"), |
| Ordering::AcqRel => $op!("lwsync", "lwsync"), |
| Ordering::SeqCst => $op!("lwsync", "sync"), |
| _ => unreachable!("{:?}", $order), |
| } |
| }; |
| } |
| |
| // Extracts and checks the EQ bit of cr0. |
| #[inline] |
| fn extract_cr0(r: u64) -> bool { |
| r & 0x20000000 != 0 |
| } |
| |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| use atomic_load_pwr8 as atomic_load; |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| #[inline] |
| unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { |
| fn_alias! { |
| // inline(never) is just a hint and also not strictly necessary |
| // because we use ifunc helper macro, but used for clarity. |
| #[inline(never)] |
| unsafe fn(src: *mut u128) -> u128; |
| atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed); |
| atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire); |
| atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst); |
| } |
| // SAFETY: the caller must uphold the safety contract. |
| // we only calls atomic_load_pwr8 if quadword-atomics is available. |
| unsafe { |
| match order { |
| Ordering::Relaxed => { |
| ifunc!(unsafe fn(src: *mut u128) -> u128 { |
| if detect::detect().has_quadword_atomics() { |
| atomic_load_pwr8_relaxed |
| } else { |
| fallback::atomic_load_non_seqcst |
| } |
| }) |
| } |
| Ordering::Acquire => { |
| ifunc!(unsafe fn(src: *mut u128) -> u128 { |
| if detect::detect().has_quadword_atomics() { |
| atomic_load_pwr8_acquire |
| } else { |
| fallback::atomic_load_non_seqcst |
| } |
| }) |
| } |
| Ordering::SeqCst => { |
| ifunc!(unsafe fn(src: *mut u128) -> u128 { |
| if detect::detect().has_quadword_atomics() { |
| atomic_load_pwr8_seqcst |
| } else { |
| fallback::atomic_load_seqcst |
| } |
| }) |
| } |
| _ => unreachable!("{:?}", order), |
| } |
| } |
| } |
| #[inline] |
| unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 { |
| debug_assert!(src as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| |
| // SAFETY: the caller must uphold the safety contract. |
| // |
| // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA |
| unsafe { |
| let (out_hi, out_lo); |
| macro_rules! atomic_load_acquire { |
| ($release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "lq %r4, 0({src})", |
| // Lightweight acquire sync |
| // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 |
| "cmpd %cr7, %r4, %r4", |
| "bne- %cr7, 2f", |
| "2:", |
| "isync", |
| end_pwr8!(), |
| src = in(reg_nonzero) ptr_reg!(src), |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| out("r4") out_hi, |
| out("r5") out_lo, |
| out("cr7") _, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| match order { |
| Ordering::Relaxed => { |
| asm!( |
| start_pwr8!(), |
| "lq %r4, 0({src})", |
| end_pwr8!(), |
| src = in(reg_nonzero) ptr_reg!(src), |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| out("r4") out_hi, |
| out("r5") out_lo, |
| options(nostack, preserves_flags, readonly), |
| ); |
| } |
| Ordering::Acquire => atomic_load_acquire!(""), |
| Ordering::SeqCst => atomic_load_acquire!("sync"), |
| _ => unreachable!("{:?}", order), |
| } |
| U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole |
| } |
| } |
| |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| use atomic_store_pwr8 as atomic_store; |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| #[inline] |
| unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { |
| fn_alias! { |
| // inline(never) is just a hint and also not strictly necessary |
| // because we use ifunc helper macro, but used for clarity. |
| #[inline(never)] |
| unsafe fn(dst: *mut u128, val: u128); |
| atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed); |
| atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release); |
| atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst); |
| } |
| // SAFETY: the caller must uphold the safety contract. |
| // we only calls atomic_store_pwr8 if quadword-atomics is available. |
| unsafe { |
| match order { |
| Ordering::Relaxed => { |
| ifunc!(unsafe fn(dst: *mut u128, val: u128) { |
| if detect::detect().has_quadword_atomics() { |
| atomic_store_pwr8_relaxed |
| } else { |
| fallback::atomic_store_non_seqcst |
| } |
| }); |
| } |
| Ordering::Release => { |
| ifunc!(unsafe fn(dst: *mut u128, val: u128) { |
| if detect::detect().has_quadword_atomics() { |
| atomic_store_pwr8_release |
| } else { |
| fallback::atomic_store_non_seqcst |
| } |
| }); |
| } |
| Ordering::SeqCst => { |
| ifunc!(unsafe fn(dst: *mut u128, val: u128) { |
| if detect::detect().has_quadword_atomics() { |
| atomic_store_pwr8_seqcst |
| } else { |
| fallback::atomic_store_seqcst |
| } |
| }); |
| } |
| _ => unreachable!("{:?}", order), |
| } |
| } |
| } |
| #[inline] |
| unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) { |
| debug_assert!(dst as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| |
| // SAFETY: the caller must uphold the safety contract. |
| // |
| // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA |
| unsafe { |
| let val = U128 { whole: val }; |
| macro_rules! atomic_store { |
| ($release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "stq %r4, 0({dst})", |
| end_pwr8!(), |
| dst = in(reg_nonzero) ptr_reg!(dst), |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| in("r4") val.pair.hi, |
| in("r5") val.pair.lo, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| match order { |
| Ordering::Relaxed => atomic_store!(""), |
| Ordering::Release => atomic_store!("lwsync"), |
| Ordering::SeqCst => atomic_store!("sync"), |
| _ => unreachable!("{:?}", order), |
| } |
| } |
| } |
| |
| #[inline] |
| unsafe fn atomic_compare_exchange( |
| dst: *mut u128, |
| old: u128, |
| new: u128, |
| success: Ordering, |
| failure: Ordering, |
| ) -> Result<u128, u128> { |
| let success = crate::utils::upgrade_success_ordering(success, failure); |
| |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| // SAFETY: the caller must uphold the safety contract. |
| // cfg guarantees that quadword atomics instructions are available at compile-time. |
| let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) }; |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| // SAFETY: the caller must uphold the safety contract. |
| let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) }; |
| if ok { |
| Ok(prev) |
| } else { |
| Err(prev) |
| } |
| } |
| #[inline] |
| unsafe fn atomic_compare_exchange_pwr8( |
| dst: *mut u128, |
| old: u128, |
| new: u128, |
| order: Ordering, |
| ) -> (u128, bool) { |
| debug_assert!(dst as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| |
| // SAFETY: the caller must uphold the safety contract. |
| // |
| // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA |
| unsafe { |
| let old = U128 { whole: old }; |
| let new = U128 { whole: new }; |
| let (mut prev_hi, mut prev_lo); |
| let mut r; |
| macro_rules! cmpxchg { |
| ($acquire:tt, $release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "2:", |
| "lqarx %r8, 0, {dst}", |
| "xor {tmp_lo}, %r9, {old_lo}", |
| "xor {tmp_hi}, %r8, {old_hi}", |
| "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", |
| "bne %cr0, 3f", // jump if compare failed |
| "stqcx. %r6, 0, {dst}", |
| "bne %cr0, 2b", // continue loop if store failed |
| "3:", |
| // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. |
| "mfcr {tmp_lo}", |
| $acquire, |
| end_pwr8!(), |
| dst = in(reg_nonzero) ptr_reg!(dst), |
| old_hi = in(reg) old.pair.hi, |
| old_lo = in(reg) old.pair.lo, |
| tmp_hi = out(reg) _, |
| tmp_lo = out(reg) r, |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| in("r6") new.pair.hi, |
| in("r7") new.pair.lo, |
| out("r8") prev_hi, |
| out("r9") prev_lo, |
| out("cr0") _, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| atomic_rmw!(cmpxchg, order); |
| (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r)) |
| } |
| } |
| |
| // Always use strong CAS for outline-atomics. |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| use atomic_compare_exchange as atomic_compare_exchange_weak; |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| #[inline] |
| unsafe fn atomic_compare_exchange_weak( |
| dst: *mut u128, |
| old: u128, |
| new: u128, |
| success: Ordering, |
| failure: Ordering, |
| ) -> Result<u128, u128> { |
| let success = crate::utils::upgrade_success_ordering(success, failure); |
| |
| // SAFETY: the caller must uphold the safety contract. |
| // cfg guarantees that quadword atomics instructions are available at compile-time. |
| let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) }; |
| if ok { |
| Ok(prev) |
| } else { |
| Err(prev) |
| } |
| } |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| #[inline] |
| unsafe fn atomic_compare_exchange_weak_pwr8( |
| dst: *mut u128, |
| old: u128, |
| new: u128, |
| order: Ordering, |
| ) -> (u128, bool) { |
| debug_assert!(dst as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| |
| // SAFETY: the caller must uphold the safety contract. |
| // |
| // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA |
| unsafe { |
| let old = U128 { whole: old }; |
| let new = U128 { whole: new }; |
| let (mut prev_hi, mut prev_lo); |
| let mut r; |
| macro_rules! cmpxchg_weak { |
| ($acquire:tt, $release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "lqarx %r8, 0, {dst}", |
| "xor {tmp_lo}, %r9, {old_lo}", |
| "xor {tmp_hi}, %r8, {old_hi}", |
| "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", |
| "bne %cr0, 3f", // jump if compare failed |
| "stqcx. %r6, 0, {dst}", |
| "3:", |
| // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set. |
| "mfcr {tmp_lo}", |
| $acquire, |
| end_pwr8!(), |
| dst = in(reg_nonzero) ptr_reg!(dst), |
| old_hi = in(reg) old.pair.hi, |
| old_lo = in(reg) old.pair.lo, |
| tmp_hi = out(reg) _, |
| tmp_lo = out(reg) r, |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| in("r6") new.pair.hi, |
| in("r7") new.pair.lo, |
| out("r8") prev_hi, |
| out("r9") prev_lo, |
| out("cr0") _, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| atomic_rmw!(cmpxchg_weak, order); |
| (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r)) |
| } |
| } |
| |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| use atomic_swap_pwr8 as atomic_swap; |
| // Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap. |
| #[inline] |
| unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 { |
| debug_assert!(dst as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| |
| // SAFETY: the caller must uphold the safety contract. |
| unsafe { |
| let val = U128 { whole: val }; |
| let (mut prev_hi, mut prev_lo); |
| macro_rules! swap { |
| ($acquire:tt, $release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "2:", |
| "lqarx %r6, 0, {dst}", |
| "stqcx. %r8, 0, {dst}", |
| "bne %cr0, 2b", |
| $acquire, |
| end_pwr8!(), |
| dst = in(reg_nonzero) ptr_reg!(dst), |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| out("r6") prev_hi, |
| out("r7") prev_lo, |
| in("r8") val.pair.hi, |
| in("r9") val.pair.lo, |
| out("cr0") _, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| atomic_rmw!(swap, order); |
| U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole |
| } |
| } |
| |
| /// Atomic RMW by LL/SC loop (3 arguments) |
| /// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` |
| /// |
| /// $op can use the following registers: |
| /// - val_hi/val_lo pair: val argument (read-only for `$op`) |
| /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) |
| /// - r8/r9 pair: new value that will be stored by sc |
| macro_rules! atomic_rmw_ll_sc_3 { |
| ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| use $name as $reexport_name; |
| #[inline] |
| unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { |
| debug_assert!(dst as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| // SAFETY: the caller must uphold the safety contract. |
| unsafe { |
| let val = U128 { whole: val }; |
| let (mut prev_hi, mut prev_lo); |
| macro_rules! op { |
| ($acquire:tt, $release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "2:", |
| "lqarx %r6, 0, {dst}", |
| $($op)* |
| "stqcx. %r8, 0, {dst}", |
| "bne %cr0, 2b", |
| $acquire, |
| end_pwr8!(), |
| dst = in(reg_nonzero) ptr_reg!(dst), |
| val_hi = in(reg) val.pair.hi, |
| val_lo = in(reg) val.pair.lo, |
| $($reg)* |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| out("r6") prev_hi, |
| out("r7") prev_lo, |
| out("r8") _, // new (hi) |
| out("r9") _, // new (lo) |
| out("cr0") _, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| atomic_rmw!(op, order); |
| U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole |
| } |
| } |
| }; |
| } |
| /// Atomic RMW by LL/SC loop (2 arguments) |
| /// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` |
| /// |
| /// $op can use the following registers: |
| /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) |
| /// - r8/r9 pair: new value that will be stored by sc |
| macro_rules! atomic_rmw_ll_sc_2 { |
| ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| use $name as $reexport_name; |
| #[inline] |
| unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { |
| debug_assert!(dst as usize % 16 == 0); |
| debug_assert_pwr8!(); |
| // SAFETY: the caller must uphold the safety contract. |
| unsafe { |
| let (mut prev_hi, mut prev_lo); |
| macro_rules! op { |
| ($acquire:tt, $release:tt) => { |
| asm!( |
| start_pwr8!(), |
| $release, |
| "2:", |
| "lqarx %r6, 0, {dst}", |
| $($op)* |
| "stqcx. %r8, 0, {dst}", |
| "bne %cr0, 2b", |
| $acquire, |
| end_pwr8!(), |
| dst = in(reg_nonzero) ptr_reg!(dst), |
| $($reg)* |
| // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. |
| // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. |
| out("r6") prev_hi, |
| out("r7") prev_lo, |
| out("r8") _, // new (hi) |
| out("r9") _, // new (lo) |
| out("cr0") _, |
| options(nostack, preserves_flags), |
| ) |
| }; |
| } |
| atomic_rmw!(op, order); |
| U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole |
| } |
| } |
| }; |
| } |
| |
| atomic_rmw_ll_sc_3! { |
| atomic_add_pwr8 as atomic_add, [out("xer") _,], |
| "addc %r9, {val_lo}, %r7", |
| "adde %r8, {val_hi}, %r6", |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_sub_pwr8 as atomic_sub, [out("xer") _,], |
| "subc %r9, %r7, {val_lo}", |
| "subfe %r8, {val_hi}, %r6", |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_and_pwr8 as atomic_and, [], |
| "and %r9, {val_lo}, %r7", |
| "and %r8, {val_hi}, %r6", |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_nand_pwr8 as atomic_nand, [], |
| "nand %r9, {val_lo}, %r7", |
| "nand %r8, {val_hi}, %r6", |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_or_pwr8 as atomic_or, [], |
| "or %r9, {val_lo}, %r7", |
| "or %r8, {val_hi}, %r6", |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_xor_pwr8 as atomic_xor, [], |
| "xor %r9, {val_lo}, %r7", |
| "xor %r8, {val_hi}, %r6", |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_max_pwr8 as atomic_max, [out("cr1") _,], |
| "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 |
| "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 |
| "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 |
| "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1 |
| "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0 |
| "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 |
| "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1 |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_umax_pwr8 as atomic_umax, [], |
| "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 |
| "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 |
| "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 |
| "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 |
| "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 |
| "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0 |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_min_pwr8 as atomic_min, [out("cr1") _,], |
| "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 |
| "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 |
| "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 |
| "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1 |
| "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0 |
| "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 |
| "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1 |
| } |
| atomic_rmw_ll_sc_3! { |
| atomic_umin_pwr8 as atomic_umin, [], |
| "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 |
| "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 |
| "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 |
| "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 |
| "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 |
| "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0 |
| } |
| |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| use atomic_not_pwr8 as atomic_not; |
| #[inline] |
| unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 { |
| // SAFETY: the caller must uphold the safety contract. |
| unsafe { atomic_xor_pwr8(dst, !0, order) } |
| } |
| |
| #[cfg(portable_atomic_llvm_16)] |
| atomic_rmw_ll_sc_2! { |
| atomic_neg_pwr8 as atomic_neg, [out("xer") _,], |
| "subfic %r9, %r7, 0", |
| "subfze %r8, %r6", |
| } |
| // LLVM 15 miscompiles subfic. |
| #[cfg(not(portable_atomic_llvm_16))] |
| atomic_rmw_ll_sc_2! { |
| atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,], |
| "subc %r9, {zero}, %r7", |
| "subfze %r8, %r6", |
| } |
| |
| macro_rules! atomic_rmw_with_ifunc { |
| ( |
| unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; |
| pwr8 = $pwr8_fn:ident; |
| non_seqcst_fallback = $non_seqcst_fallback_fn:ident; |
| seqcst_fallback = $seqcst_fallback_fn:ident; |
| ) => { |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| #[inline] |
| unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? { |
| fn_alias! { |
| // inline(never) is just a hint and also not strictly necessary |
| // because we use ifunc helper macro, but used for clarity. |
| #[inline(never)] |
| unsafe fn($($arg)*) $(-> $ret_ty)?; |
| pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed); |
| pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire); |
| pwr8_release_fn = $pwr8_fn(Ordering::Release); |
| pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel); |
| pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst); |
| } |
| // SAFETY: the caller must uphold the safety contract. |
| // we only calls pwr8_fn if quadword-atomics is available. |
| unsafe { |
| match order { |
| Ordering::Relaxed => { |
| ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { |
| if detect::detect().has_quadword_atomics() { |
| pwr8_relaxed_fn |
| } else { |
| fallback::$non_seqcst_fallback_fn |
| } |
| }) |
| } |
| Ordering::Acquire => { |
| ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { |
| if detect::detect().has_quadword_atomics() { |
| pwr8_acquire_fn |
| } else { |
| fallback::$non_seqcst_fallback_fn |
| } |
| }) |
| } |
| Ordering::Release => { |
| ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { |
| if detect::detect().has_quadword_atomics() { |
| pwr8_release_fn |
| } else { |
| fallback::$non_seqcst_fallback_fn |
| } |
| }) |
| } |
| Ordering::AcqRel => { |
| ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { |
| if detect::detect().has_quadword_atomics() { |
| pwr8_acqrel_fn |
| } else { |
| fallback::$non_seqcst_fallback_fn |
| } |
| }) |
| } |
| Ordering::SeqCst => { |
| ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { |
| if detect::detect().has_quadword_atomics() { |
| pwr8_seqcst_fn |
| } else { |
| fallback::$seqcst_fallback_fn |
| } |
| }) |
| } |
| _ => unreachable!("{:?}", order), |
| } |
| } |
| } |
| }; |
| } |
| |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool); |
| pwr8 = atomic_compare_exchange_pwr8; |
| non_seqcst_fallback = atomic_compare_exchange_non_seqcst; |
| seqcst_fallback = atomic_compare_exchange_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_swap_pwr8; |
| non_seqcst_fallback = atomic_swap_non_seqcst; |
| seqcst_fallback = atomic_swap_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_add_pwr8; |
| non_seqcst_fallback = atomic_add_non_seqcst; |
| seqcst_fallback = atomic_add_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_sub_pwr8; |
| non_seqcst_fallback = atomic_sub_non_seqcst; |
| seqcst_fallback = atomic_sub_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_and_pwr8; |
| non_seqcst_fallback = atomic_and_non_seqcst; |
| seqcst_fallback = atomic_and_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_nand_pwr8; |
| non_seqcst_fallback = atomic_nand_non_seqcst; |
| seqcst_fallback = atomic_nand_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_or_pwr8; |
| non_seqcst_fallback = atomic_or_non_seqcst; |
| seqcst_fallback = atomic_or_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_xor_pwr8; |
| non_seqcst_fallback = atomic_xor_non_seqcst; |
| seqcst_fallback = atomic_xor_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_max_pwr8; |
| non_seqcst_fallback = atomic_max_non_seqcst; |
| seqcst_fallback = atomic_max_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_umax_pwr8; |
| non_seqcst_fallback = atomic_umax_non_seqcst; |
| seqcst_fallback = atomic_umax_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_min_pwr8; |
| non_seqcst_fallback = atomic_min_non_seqcst; |
| seqcst_fallback = atomic_min_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; |
| pwr8 = atomic_umin_pwr8; |
| non_seqcst_fallback = atomic_umin_non_seqcst; |
| seqcst_fallback = atomic_umin_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_not(dst: *mut u128) -> u128; |
| pwr8 = atomic_not_pwr8; |
| non_seqcst_fallback = atomic_not_non_seqcst; |
| seqcst_fallback = atomic_not_seqcst; |
| } |
| atomic_rmw_with_ifunc! { |
| unsafe fn atomic_neg(dst: *mut u128) -> u128; |
| pwr8 = atomic_neg_pwr8; |
| non_seqcst_fallback = atomic_neg_non_seqcst; |
| seqcst_fallback = atomic_neg_seqcst; |
| } |
| |
| #[inline] |
| fn is_lock_free() -> bool { |
| #[cfg(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| ))] |
| { |
| // lqarx and stqcx. instructions are statically available. |
| true |
| } |
| #[cfg(not(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )))] |
| { |
| detect::detect().has_quadword_atomics() |
| } |
| } |
| const IS_ALWAYS_LOCK_FREE: bool = cfg!(any( |
| target_feature = "quadword-atomics", |
| portable_atomic_target_feature = "quadword-atomics", |
| )); |
| |
| atomic128!(AtomicI128, i128, atomic_max, atomic_min); |
| atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| test_atomic_int!(i128); |
| test_atomic_int!(u128); |
| |
| // load/store/swap implementation is not affected by signedness, so it is |
| // enough to test only unsigned types. |
| stress_test!(u128); |
| } |