Upgrade crossbeam-utils to 0.8.14 am: 23cafcf3ba

Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/crossbeam-utils/+/2335403

Change-Id: I5926f2935d5795a776370e5c2c7c9794a0c425e2
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 0d40a83..cef469e 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,6 +1,6 @@
 {
   "git": {
-    "sha1": "2988f873f87d2263a7fd2b9465fb9c28f43a6490"
+    "sha1": "366276a4dde8bd6b4bdab531c09e6ab1ff38c407"
   },
   "path_in_vcs": "crossbeam-utils"
 }
\ No newline at end of file
diff --git a/Android.bp b/Android.bp
index 5f17007..43b9d5a 100644
--- a/Android.bp
+++ b/Android.bp
@@ -44,7 +44,7 @@
     host_supported: true,
     crate_name: "crossbeam_utils",
     cargo_env_compat: true,
-    cargo_pkg_version: "0.8.7",
+    cargo_pkg_version: "0.8.14",
     srcs: ["src/lib.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
@@ -54,12 +54,10 @@
     edition: "2018",
     features: [
         "default",
-        "lazy_static",
         "std",
     ],
     rustlibs: [
         "libcfg_if",
-        "liblazy_static",
         "librand",
     ],
     proc_macros: ["librustversion"],
@@ -69,19 +67,17 @@
     name: "crossbeam-utils_test_defaults",
     crate_name: "crossbeam_utils",
     cargo_env_compat: true,
-    cargo_pkg_version: "0.8.7",
+    cargo_pkg_version: "0.8.14",
     test_suites: ["general-tests"],
     auto_gen_config: true,
     edition: "2018",
     features: [
         "default",
-        "lazy_static",
         "std",
     ],
     rustlibs: [
         "libcfg_if",
         "libcrossbeam_utils",
-        "liblazy_static",
         "librand",
     ],
     proc_macros: ["librustversion"],
@@ -152,17 +148,15 @@
     host_supported: true,
     crate_name: "crossbeam_utils",
     cargo_env_compat: true,
-    cargo_pkg_version: "0.8.7",
+    cargo_pkg_version: "0.8.14",
     srcs: ["src/lib.rs"],
     edition: "2018",
     features: [
         "default",
-        "lazy_static",
         "std",
     ],
     rustlibs: [
         "libcfg_if",
-        "liblazy_static",
     ],
     apex_available: [
         "//apex_available:platform",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 98088c5..8e0fe35 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,35 @@
+# Version 0.8.14
+
+- Fix build script bug introduced in 0.8.13. (#932)
+
+# Version 0.8.13
+
+**Note:** This release has been yanked due to regression fixed in 0.8.14.
+
+- Improve support for custom targets. (#922)
+
+# Version 0.8.12
+
+- Removes the dependency on the `once_cell` crate to restore the MSRV. (#913)
+- Work around [rust-lang#98302](https://github.com/rust-lang/rust/issues/98302), which causes compile error on windows-gnu when LTO is enabled. (#913)
+
+# Version 0.8.11
+
+- Bump the minimum supported Rust version to 1.38. (#877)
+
+# Version 0.8.10
+
+- Fix unsoundness of `AtomicCell` on types containing niches. (#834)
+  This fix contains breaking changes, but they are allowed because this is a soundness bug fix. See #834 for more.
+
+# Version 0.8.9
+
+- Replace lazy_static with once_cell. (#817)
+
+# Version 0.8.8
+
+- Fix a bug when unstable `loom` support is enabled. (#787)
+
 # Version 0.8.7
 
 - Add `AtomicCell<{i*,u*}>::{fetch_max,fetch_min}`. (#785)
@@ -6,24 +38,34 @@
 
 # Version 0.8.6
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Re-add `AtomicCell<{i,u}64>::{fetch_add,fetch_sub,fetch_and,fetch_or,fetch_xor}` that were accidentally removed in 0.8.0 on targets that do not support `Atomic{I,U}64`. (#767)
 - Re-add `AtomicCell<{i,u}128>::{fetch_add,fetch_sub,fetch_and,fetch_or,fetch_xor}` that were accidentally removed in 0.8.0. (#767)
 
 # Version 0.8.5
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Add `AtomicCell::fetch_update`. (#704)
 - Support targets that do not have atomic CAS on stable Rust. (#698)
 
 # Version 0.8.4
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Bump `loom` dependency to version 0.5. (#686)
 
 # Version 0.8.3
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Make `loom` dependency optional. (#666)
 
 # Version 0.8.2
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Deprecate `AtomicCell::compare_and_swap`. Use `AtomicCell::compare_exchange` instead. (#619)
 - Add `Parker::park_deadline`. (#563)
 - Improve implementation of `CachePadded`. (#636)
@@ -31,6 +73,8 @@
 
 # Version 0.8.1
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Make `AtomicCell::is_lock_free` always const fn. (#600)
 - Fix a bug in `seq_lock_wide`. (#596)
 - Remove `const_fn` dependency. (#600)
@@ -38,6 +82,8 @@
 
 # Version 0.8.0
 
+**Note:** This release has been yanked. See [GHSA-qc84-gqf4-9926](https://github.com/crossbeam-rs/crossbeam/security/advisories/GHSA-qc84-gqf4-9926) for details.
+
 - Bump the minimum supported Rust version to 1.36.
 - Remove deprecated `AtomicCell::get_mut()` and `Backoff::is_complete()` methods.
 - Remove `alloc` feature.
diff --git a/Cargo.toml b/Cargo.toml
index 10d3240..6fe3f9f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,11 +11,12 @@
 
 [package]
 edition = "2018"
-rust-version = "1.36"
+rust-version = "1.38"
 name = "crossbeam-utils"
-version = "0.8.7"
+version = "0.8.14"
 description = "Utilities for concurrent programming"
 homepage = "https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-utils"
+readme = "README.md"
 keywords = [
     "scoped",
     "thread",
@@ -34,10 +35,6 @@
 [dependencies.cfg-if]
 version = "1"
 
-[dependencies.lazy_static]
-version = "1.4.0"
-optional = true
-
 [dev-dependencies.rand]
 version = "0.8"
 
@@ -47,7 +44,7 @@
 [features]
 default = ["std"]
 nightly = []
-std = ["lazy_static"]
+std = []
 
 [target."cfg(crossbeam_loom)".dependencies.loom]
 version = "0.5"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 73508a9..1674775 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -4,9 +4,9 @@
 # - Update CHANGELOG.md
 # - Update README.md
 # - Create "crossbeam-utils-X.Y.Z" git tag
-version = "0.8.7"
+version = "0.8.14"
 edition = "2018"
-rust-version = "1.36"
+rust-version = "1.38"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/crossbeam-rs/crossbeam"
 homepage = "https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-utils"
@@ -19,7 +19,7 @@
 
 # Enable to use APIs that require `std`.
 # This is enabled by default.
-std = ["lazy_static"]
+std = []
 
 # These features are no longer used.
 # TODO: remove in the next major version.
@@ -32,7 +32,6 @@
 
 [dependencies]
 cfg-if = "1"
-lazy_static = { version = "1.4.0", optional = true }
 
 # Enable the use of loom for concurrency testing.
 #
diff --git a/METADATA b/METADATA
index 2c55b77..a114c69 100644
--- a/METADATA
+++ b/METADATA
@@ -1,3 +1,7 @@
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update rust/crates/crossbeam-utils
+# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+
 name: "crossbeam-utils"
 description: "Utilities for concurrent programming"
 third_party {
@@ -7,13 +11,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/crossbeam-utils/crossbeam-utils-0.8.7.crate"
+    value: "https://static.crates.io/crates/crossbeam-utils/crossbeam-utils-0.8.14.crate"
   }
-  version: "0.8.7"
+  version: "0.8.14"
   license_type: NOTICE
   last_upgrade_date {
     year: 2022
-    month: 3
-    day: 1
+    month: 12
+    day: 8
   }
 }
diff --git a/README.md b/README.md
index 6e9a8e4..c06ea60 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 https://crates.io/crates/crossbeam-utils)
 [![Documentation](https://docs.rs/crossbeam-utils/badge.svg)](
 https://docs.rs/crossbeam-utils)
-[![Rust 1.36+](https://img.shields.io/badge/rust-1.36+-lightgray.svg)](
+[![Rust 1.38+](https://img.shields.io/badge/rust-1.38+-lightgray.svg)](
 https://www.rust-lang.org)
 [![chat](https://img.shields.io/discord/569610676205781012.svg?logo=discord)](https://discord.com/invite/JXYwgWZ)
 
@@ -55,7 +55,7 @@
 
 Crossbeam Utils supports stable Rust releases going back at least six months,
 and every time the minimum supported Rust version is increased, a new minor
-version is released. Currently, the minimum supported Rust version is 1.36.
+version is released. Currently, the minimum supported Rust version is 1.38.
 
 ## License
 
diff --git a/build-common.rs b/build-common.rs
new file mode 100644
index 0000000..e91bb4d
--- /dev/null
+++ b/build-common.rs
@@ -0,0 +1,13 @@
+// The target triplets have the form of 'arch-vendor-system'.
+//
+// When building for Linux (e.g. the 'system' part is
+// 'linux-something'), replace the vendor with 'unknown'
+// so that mapping to rust standard targets happens correctly.
+fn convert_custom_linux_target(target: String) -> String {
+    let mut parts: Vec<&str> = target.split('-').collect();
+    let system = parts.get(2);
+    if system == Some(&"linux") {
+        parts[1] = "unknown";
+    };
+    parts.join("-")
+}
diff --git a/build.rs b/build.rs
index a7557fd..617162f 100644
--- a/build.rs
+++ b/build.rs
@@ -27,10 +27,11 @@
 use std::env;
 
 include!("no_atomic.rs");
+include!("build-common.rs");
 
 fn main() {
     let target = match env::var("TARGET") {
-        Ok(target) => target,
+        Ok(target) => convert_custom_linux_target(target),
         Err(e) => {
             println!(
                 "cargo:warning={}: unable to get TARGET environment variable: {}",
@@ -41,10 +42,9 @@
         }
     };
 
-    // Note that this is `no_*`, not `has_*`. This allows treating
-    // `cfg(target_has_atomic = "ptr")` as true when the build script doesn't
-    // run. This is needed for compatibility with non-cargo build systems that
-    // don't run the build script.
+    // Note that this is `no_`*, not `has_*`. This allows treating as the latest
+    // stable rustc is used when the build script doesn't run. This is useful
+    // for non-cargo build systems that don't run the build script.
     if NO_ATOMIC_CAS.contains(&&*target) {
         println!("cargo:rustc-cfg=crossbeam_no_atomic_cas");
     }
diff --git a/no_atomic.rs b/no_atomic.rs
index 90ac60a..8ce0d31 100644
--- a/no_atomic.rs
+++ b/no_atomic.rs
@@ -2,13 +2,17 @@
 // It is not intended for manual editing.
 
 const NO_ATOMIC_CAS: &[&str] = &[
+    "armv4t-none-eabi",
+    "armv5te-none-eabi",
     "avr-unknown-gnu-atmega328",
     "bpfeb-unknown-none",
     "bpfel-unknown-none",
     "msp430-none-elf",
     "riscv32i-unknown-none-elf",
+    "riscv32im-unknown-none-elf",
     "riscv32imc-unknown-none-elf",
     "thumbv4t-none-eabi",
+    "thumbv5te-none-eabi",
     "thumbv6m-none-eabi",
 ];
 
@@ -17,7 +21,9 @@
     "arm-linux-androideabi",
     "armebv7r-none-eabi",
     "armebv7r-none-eabihf",
+    "armv4t-none-eabi",
     "armv4t-unknown-linux-gnueabi",
+    "armv5te-none-eabi",
     "armv5te-unknown-linux-gnueabi",
     "armv5te-unknown-linux-musleabi",
     "armv5te-unknown-linux-uclibceabi",
@@ -31,6 +37,7 @@
     "mips-unknown-linux-musl",
     "mips-unknown-linux-uclibc",
     "mipsel-sony-psp",
+    "mipsel-sony-psx",
     "mipsel-unknown-linux-gnu",
     "mipsel-unknown-linux-musl",
     "mipsel-unknown-linux-uclibc",
@@ -49,10 +56,12 @@
     "riscv32gc-unknown-linux-gnu",
     "riscv32gc-unknown-linux-musl",
     "riscv32i-unknown-none-elf",
+    "riscv32im-unknown-none-elf",
     "riscv32imac-unknown-none-elf",
-    "riscv32imc-esp-espidf",
+    "riscv32imac-unknown-xous-elf",
     "riscv32imc-unknown-none-elf",
     "thumbv4t-none-eabi",
+    "thumbv5te-none-eabi",
     "thumbv6m-none-eabi",
     "thumbv7em-none-eabi",
     "thumbv7em-none-eabihf",
@@ -65,7 +74,9 @@
 #[allow(dead_code)] // Only crossbeam-utils uses this.
 const NO_ATOMIC: &[&str] = &[
     "avr-unknown-gnu-atmega328",
+    "mipsel-sony-psx",
     "msp430-none-elf",
     "riscv32i-unknown-none-elf",
+    "riscv32im-unknown-none-elf",
     "riscv32imc-unknown-none-elf",
 ];
diff --git a/src/atomic/atomic_cell.rs b/src/atomic/atomic_cell.rs
index 8a49464..7941c5c 100644
--- a/src/atomic/atomic_cell.rs
+++ b/src/atomic/atomic_cell.rs
@@ -5,16 +5,14 @@
 use core::cell::UnsafeCell;
 use core::cmp;
 use core::fmt;
-use core::mem;
+use core::mem::{self, ManuallyDrop, MaybeUninit};
 use core::sync::atomic::Ordering;
 
-#[cfg(not(crossbeam_loom))]
 use core::ptr;
 
 #[cfg(feature = "std")]
 use std::panic::{RefUnwindSafe, UnwindSafe};
 
-#[cfg(not(crossbeam_loom))]
 use super::seq_lock::SeqLock;
 
 /// A thread-safe mutable memory location.
@@ -32,13 +30,20 @@
 /// [`Acquire`]: std::sync::atomic::Ordering::Acquire
 /// [`Release`]: std::sync::atomic::Ordering::Release
 #[repr(transparent)]
-pub struct AtomicCell<T: ?Sized> {
+pub struct AtomicCell<T> {
     /// The inner value.
     ///
     /// If this value can be transmuted into a primitive atomic type, it will be treated as such.
     /// Otherwise, all potentially concurrent operations on this data will be protected by a global
     /// lock.
-    value: UnsafeCell<T>,
+    ///
+    /// Using MaybeUninit to prevent code outside the cell from observing partially initialized state:
+    /// <https://github.com/crossbeam-rs/crossbeam/issues/833>
+    ///
+    /// Note:
+    /// - we'll never store uninitialized `T` due to our API only using initialized `T`.
+    /// - this `MaybeUninit` does *not* fix <https://github.com/crossbeam-rs/crossbeam/issues/315>.
+    value: UnsafeCell<MaybeUninit<T>>,
 }
 
 unsafe impl<T: Send> Send for AtomicCell<T> {}
@@ -61,12 +66,15 @@
     /// ```
     pub const fn new(val: T) -> AtomicCell<T> {
         AtomicCell {
-            value: UnsafeCell::new(val),
+            value: UnsafeCell::new(MaybeUninit::new(val)),
         }
     }
 
     /// Consumes the atomic and returns the contained value.
     ///
+    /// This is safe because passing `self` by value guarantees that no other threads are
+    /// concurrently accessing the atomic data.
+    ///
     /// # Examples
     ///
     /// ```
@@ -78,7 +86,13 @@
     /// assert_eq!(v, 7);
     /// ```
     pub fn into_inner(self) -> T {
-        self.value.into_inner()
+        let this = ManuallyDrop::new(self);
+        // SAFETY:
+        // - passing `self` by value guarantees that no other threads are concurrently
+        //   accessing the atomic data
+        // - the raw pointer passed in is valid because we got it from an owned value.
+        // - `ManuallyDrop` prevents double dropping `T`
+        unsafe { this.as_ptr().read() }
     }
 
     /// Returns `true` if operations on values of this type are lock-free.
@@ -131,7 +145,7 @@
             drop(self.swap(val));
         } else {
             unsafe {
-                atomic_store(self.value.get(), val);
+                atomic_store(self.as_ptr(), val);
             }
         }
     }
@@ -150,11 +164,9 @@
     /// assert_eq!(a.load(), 8);
     /// ```
     pub fn swap(&self, val: T) -> T {
-        unsafe { atomic_swap(self.value.get(), val) }
+        unsafe { atomic_swap(self.as_ptr(), val) }
     }
-}
 
-impl<T: ?Sized> AtomicCell<T> {
     /// Returns a raw pointer to the underlying data in this atomic cell.
     ///
     /// # Examples
@@ -168,7 +180,7 @@
     /// ```
     #[inline]
     pub fn as_ptr(&self) -> *mut T {
-        self.value.get()
+        self.value.get().cast::<T>()
     }
 }
 
@@ -204,7 +216,7 @@
     /// assert_eq!(a.load(), 7);
     /// ```
     pub fn load(&self) -> T {
-        unsafe { atomic_load(self.value.get()) }
+        unsafe { atomic_load(self.as_ptr()) }
     }
 }
 
@@ -256,7 +268,7 @@
     /// assert_eq!(a.load(), 2);
     /// ```
     pub fn compare_exchange(&self, current: T, new: T) -> Result<T, T> {
-        unsafe { atomic_compare_exchange_weak(self.value.get(), current, new) }
+        unsafe { atomic_compare_exchange_weak(self.as_ptr(), current, new) }
     }
 
     /// Fetches the value, and applies a function to it that returns an optional
@@ -294,6 +306,22 @@
     }
 }
 
+// `MaybeUninit` prevents `T` from being dropped, so we need to implement `Drop`
+// for `AtomicCell` to avoid leaks of non-`Copy` types.
+impl<T> Drop for AtomicCell<T> {
+    fn drop(&mut self) {
+        if mem::needs_drop::<T>() {
+            // SAFETY:
+            // - the mutable reference guarantees that no other threads are concurrently accessing the atomic data
+            // - the raw pointer passed in is valid because we got it from a reference
+            // - `MaybeUninit` prevents double dropping `T`
+            unsafe {
+                self.as_ptr().drop_in_place();
+            }
+        }
+    }
+}
+
 macro_rules! impl_arithmetic {
     ($t:ty, fallback, $example:tt) => {
         impl AtomicCell<$t> {
@@ -313,19 +341,11 @@
             /// ```
             #[inline]
             pub fn fetch_add(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value = value.wrapping_add(val);
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value = value.wrapping_add(val);
+                old
             }
 
             /// Decrements the current value by `val` and returns the previous value.
@@ -344,19 +364,11 @@
             /// ```
             #[inline]
             pub fn fetch_sub(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value = value.wrapping_sub(val);
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value = value.wrapping_sub(val);
+                old
             }
 
             /// Applies bitwise "and" to the current value and returns the previous value.
@@ -373,19 +385,11 @@
             /// ```
             #[inline]
             pub fn fetch_and(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value &= val;
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value &= val;
+                old
             }
 
             /// Applies bitwise "nand" to the current value and returns the previous value.
@@ -402,19 +406,11 @@
             /// ```
             #[inline]
             pub fn fetch_nand(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value = !(old & val);
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value = !(old & val);
+                old
             }
 
             /// Applies bitwise "or" to the current value and returns the previous value.
@@ -431,19 +427,11 @@
             /// ```
             #[inline]
             pub fn fetch_or(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value |= val;
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value |= val;
+                old
             }
 
             /// Applies bitwise "xor" to the current value and returns the previous value.
@@ -460,19 +448,11 @@
             /// ```
             #[inline]
             pub fn fetch_xor(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value ^= val;
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value ^= val;
+                old
             }
 
             /// Compares and sets the maximum of the current value and `val`,
@@ -490,19 +470,11 @@
             /// ```
             #[inline]
             pub fn fetch_max(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value = cmp::max(old, val);
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value = cmp::max(old, val);
+                old
             }
 
             /// Compares and sets the minimum of the current value and `val`,
@@ -520,19 +492,11 @@
             /// ```
             #[inline]
             pub fn fetch_min(&self, val: $t) -> $t {
-                #[cfg(crossbeam_loom)]
-                {
-                    let _ = val;
-                    unimplemented!("loom does not support non-atomic atomic ops");
-                }
-                #[cfg(not(crossbeam_loom))]
-                {
-                    let _guard = lock(self.value.get() as usize).write();
-                    let value = unsafe { &mut *(self.value.get()) };
-                    let old = *value;
-                    *value = cmp::min(old, val);
-                    old
-                }
+                let _guard = lock(self.as_ptr() as usize).write();
+                let value = unsafe { &mut *(self.as_ptr()) };
+                let old = *value;
+                *value = cmp::min(old, val);
+                old
             }
         }
     };
@@ -555,22 +519,14 @@
             #[inline]
             pub fn fetch_add(&self, val: $t) -> $t {
                 if can_transmute::<$t, $atomic>() {
-                    let a = unsafe { &*(self.value.get() as *const $atomic) };
+                    let a = unsafe { &*(self.as_ptr() as *const $atomic) };
                     a.fetch_add(val, Ordering::AcqRel)
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value = value.wrapping_add(val);
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value = value.wrapping_add(val);
+                    old
                 }
             }
 
@@ -591,22 +547,14 @@
             #[inline]
             pub fn fetch_sub(&self, val: $t) -> $t {
                 if can_transmute::<$t, $atomic>() {
-                    let a = unsafe { &*(self.value.get() as *const $atomic) };
+                    let a = unsafe { &*(self.as_ptr() as *const $atomic) };
                     a.fetch_sub(val, Ordering::AcqRel)
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value = value.wrapping_sub(val);
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value = value.wrapping_sub(val);
+                    old
                 }
             }
 
@@ -625,22 +573,14 @@
             #[inline]
             pub fn fetch_and(&self, val: $t) -> $t {
                 if can_transmute::<$t, $atomic>() {
-                    let a = unsafe { &*(self.value.get() as *const $atomic) };
+                    let a = unsafe { &*(self.as_ptr() as *const $atomic) };
                     a.fetch_and(val, Ordering::AcqRel)
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value &= val;
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value &= val;
+                    old
                 }
             }
 
@@ -659,22 +599,14 @@
             #[inline]
             pub fn fetch_nand(&self, val: $t) -> $t {
                 if can_transmute::<$t, $atomic>() {
-                    let a = unsafe { &*(self.value.get() as *const $atomic) };
+                    let a = unsafe { &*(self.as_ptr() as *const $atomic) };
                     a.fetch_nand(val, Ordering::AcqRel)
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value = !(old & val);
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value = !(old & val);
+                    old
                 }
             }
 
@@ -693,22 +625,14 @@
             #[inline]
             pub fn fetch_or(&self, val: $t) -> $t {
                 if can_transmute::<$t, $atomic>() {
-                    let a = unsafe { &*(self.value.get() as *const $atomic) };
+                    let a = unsafe { &*(self.as_ptr() as *const $atomic) };
                     a.fetch_or(val, Ordering::AcqRel)
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value |= val;
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value |= val;
+                    old
                 }
             }
 
@@ -727,22 +651,14 @@
             #[inline]
             pub fn fetch_xor(&self, val: $t) -> $t {
                 if can_transmute::<$t, $atomic>() {
-                    let a = unsafe { &*(self.value.get() as *const $atomic) };
+                    let a = unsafe { &*(self.as_ptr() as *const $atomic) };
                     a.fetch_xor(val, Ordering::AcqRel)
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value ^= val;
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value ^= val;
+                    old
                 }
             }
 
@@ -765,19 +681,11 @@
                     // TODO: Atomic*::fetch_max requires Rust 1.45.
                     self.fetch_update(|old| Some(cmp::max(old, val))).unwrap()
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value = cmp::max(old, val);
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value = cmp::max(old, val);
+                    old
                 }
             }
 
@@ -800,19 +708,11 @@
                     // TODO: Atomic*::fetch_min requires Rust 1.45.
                     self.fetch_update(|old| Some(cmp::min(old, val))).unwrap()
                 } else {
-                    #[cfg(crossbeam_loom)]
-                    {
-                        let _ = val;
-                        unimplemented!("loom does not support non-atomic atomic ops");
-                    }
-                    #[cfg(not(crossbeam_loom))]
-                    {
-                        let _guard = lock(self.value.get() as usize).write();
-                        let value = unsafe { &mut *(self.value.get()) };
-                        let old = *value;
-                        *value = cmp::min(old, val);
-                        old
-                    }
+                    let _guard = lock(self.as_ptr() as usize).write();
+                    let value = unsafe { &mut *(self.as_ptr()) };
+                    let old = *value;
+                    *value = cmp::min(old, val);
+                    old
                 }
             }
         }
@@ -868,7 +768,7 @@
     /// ```
     #[inline]
     pub fn fetch_and(&self, val: bool) -> bool {
-        let a = unsafe { &*(self.value.get() as *const AtomicBool) };
+        let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
         a.fetch_and(val, Ordering::AcqRel)
     }
 
@@ -892,7 +792,7 @@
     /// ```
     #[inline]
     pub fn fetch_nand(&self, val: bool) -> bool {
-        let a = unsafe { &*(self.value.get() as *const AtomicBool) };
+        let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
         a.fetch_nand(val, Ordering::AcqRel)
     }
 
@@ -913,7 +813,7 @@
     /// ```
     #[inline]
     pub fn fetch_or(&self, val: bool) -> bool {
-        let a = unsafe { &*(self.value.get() as *const AtomicBool) };
+        let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
         a.fetch_or(val, Ordering::AcqRel)
     }
 
@@ -934,7 +834,7 @@
     /// ```
     #[inline]
     pub fn fetch_xor(&self, val: bool) -> bool {
-        let a = unsafe { &*(self.value.get() as *const AtomicBool) };
+        let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
         a.fetch_xor(val, Ordering::AcqRel)
     }
 }
@@ -976,7 +876,6 @@
 /// scalability.
 #[inline]
 #[must_use]
-#[cfg(not(crossbeam_loom))]
 fn lock(addr: usize) -> &'static SeqLock {
     // The number of locks is a prime number because we want to make sure `addr % LEN` gets
     // dispersed across all locks.
@@ -1003,12 +902,7 @@
     const LEN: usize = 97;
     #[allow(clippy::declare_interior_mutable_const)]
     const L: SeqLock = SeqLock::new();
-    static LOCKS: [SeqLock; LEN] = [
-        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
-        L, L, L, L, L, L, L,
-    ];
+    static LOCKS: [SeqLock; LEN] = [L; LEN];
 
     // If the modulus is a constant number, the compiler will use crazy math to transform this into
     // a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
@@ -1067,9 +961,6 @@
             // TODO: AtomicU128 is unstable
             // atomic!(@check, $t, atomic::AtomicU128, $a, $atomic_op);
 
-            #[cfg(crossbeam_loom)]
-            unimplemented!("loom does not support non-atomic atomic ops");
-            #[cfg(not(crossbeam_loom))]
             break $fallback_op;
         }
     };
@@ -1077,7 +968,7 @@
 
 /// Returns `true` if operations on `AtomicCell<T>` are lock-free.
 const fn atomic_is_lock_free<T>() -> bool {
-    // HACK(taiki-e): This is equivalent to `atomic! { T, _a, true, false }`, but can be used in const fn even in Rust 1.36.
+    // HACK(taiki-e): This is equivalent to `atomic! { T, _a, true, false }`, but can be used in const fn even in our MSRV (Rust 1.38).
     let is_lock_free = can_transmute::<T, AtomicUnit>()
         | can_transmute::<T, atomic::AtomicU8>()
         | can_transmute::<T, atomic::AtomicU16>()
@@ -1113,10 +1004,11 @@
                 // discard the data when a data race is detected. The proper solution would be to
                 // do atomic reads and atomic writes, but we can't atomically read and write all
                 // kinds of data since `AtomicU8` is not available on stable Rust yet.
-                let val = ptr::read_volatile(src);
+                // Load as `MaybeUninit` because we may load a value that is not valid as `T`.
+                let val = ptr::read_volatile(src.cast::<MaybeUninit<T>>());
 
                 if lock.validate_read(stamp) {
-                    return val;
+                    return val.assume_init();
                 }
             }
 
@@ -1176,6 +1068,7 @@
 ///
 /// This operation uses the `AcqRel` ordering. If possible, an atomic instructions is used, and a
 /// global lock otherwise.
+#[allow(clippy::let_unit_value)]
 unsafe fn atomic_compare_exchange_weak<T>(dst: *mut T, mut current: T, new: T) -> Result<T, T>
 where
     T: Copy + Eq,
diff --git a/src/atomic/mod.rs b/src/atomic/mod.rs
index fc713fc..3896785 100644
--- a/src/atomic/mod.rs
+++ b/src/atomic/mod.rs
@@ -24,9 +24,14 @@
 }
 
 #[cfg(not(crossbeam_no_atomic_cas))]
+// We cannot provide AtomicCell under cfg(crossbeam_loom) because loom's atomic
+// types have a different in-memory representation than the underlying type.
+// TODO: The latest loom supports fences, so fallback using seqlock may be available.
+#[cfg(not(crossbeam_loom))]
 mod atomic_cell;
 mod consume;
 
 #[cfg(not(crossbeam_no_atomic_cas))]
+#[cfg(not(crossbeam_loom))]
 pub use self::atomic_cell::AtomicCell;
 pub use self::consume::AtomicConsume;
diff --git a/src/backoff.rs b/src/backoff.rs
index 1012f06..9e256aa 100644
--- a/src/backoff.rs
+++ b/src/backoff.rs
@@ -201,6 +201,7 @@
     /// assert_eq!(ready.load(SeqCst), false);
     /// spin_wait(&ready);
     /// assert_eq!(ready.load(SeqCst), true);
+    /// # std::thread::sleep(std::time::Duration::from_millis(500)); // wait for background threads closed: https://github.com/rust-lang/miri/issues/1371
     /// ```
     ///
     /// [`AtomicBool`]: std::sync::atomic::AtomicBool
@@ -269,6 +270,7 @@
     /// assert_eq!(ready.load(SeqCst), false);
     /// blocking_wait(&ready);
     /// assert_eq!(ready.load(SeqCst), true);
+    /// # std::thread::sleep(std::time::Duration::from_millis(500)); // wait for background threads closed: https://github.com/rust-lang/miri/issues/1371
     /// ```
     ///
     /// [`AtomicBool`]: std::sync::atomic::AtomicBool
diff --git a/src/cache_padded.rs b/src/cache_padded.rs
index 822e831..b5d5d33 100644
--- a/src/cache_padded.rs
+++ b/src/cache_padded.rs
@@ -39,9 +39,9 @@
 /// let addr1 = &*array[0] as *const i8 as usize;
 /// let addr2 = &*array[1] as *const i8 as usize;
 ///
-/// assert!(addr2 - addr1 >= 64);
-/// assert_eq!(addr1 % 64, 0);
-/// assert_eq!(addr2 % 64, 0);
+/// assert!(addr2 - addr1 >= 32);
+/// assert_eq!(addr1 % 32, 0);
+/// assert_eq!(addr2 % 32, 0);
 /// ```
 ///
 /// When building a concurrent queue with a head and a tail index, it is wise to place them in
diff --git a/src/sync/mod.rs b/src/sync/mod.rs
index eeb740c..f9eec71 100644
--- a/src/sync/mod.rs
+++ b/src/sync/mod.rs
@@ -4,6 +4,8 @@
 //! * [`ShardedLock`], a sharded reader-writer lock with fast concurrent reads.
 //! * [`WaitGroup`], for synchronizing the beginning or end of some computation.
 
+#[cfg(not(crossbeam_loom))]
+mod once_lock;
 mod parker;
 #[cfg(not(crossbeam_loom))]
 mod sharded_lock;
diff --git a/src/sync/once_lock.rs b/src/sync/once_lock.rs
new file mode 100644
index 0000000..c1fefc9
--- /dev/null
+++ b/src/sync/once_lock.rs
@@ -0,0 +1,103 @@
+// Based on unstable std::sync::OnceLock.
+//
+// Source: https://github.com/rust-lang/rust/blob/8e9c93df464b7ada3fc7a1c8ccddd9dcb24ee0a0/library/std/src/sync/once_lock.rs
+
+use core::cell::UnsafeCell;
+use core::mem::MaybeUninit;
+use core::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Once;
+
+pub(crate) struct OnceLock<T> {
+    once: Once,
+    // Once::is_completed requires Rust 1.43, so use this to track of whether they have been initialized.
+    is_initialized: AtomicBool,
+    value: UnsafeCell<MaybeUninit<T>>,
+    // Unlike std::sync::OnceLock, we don't need PhantomData here because
+    // we don't use #[may_dangle].
+}
+
+unsafe impl<T: Sync + Send> Sync for OnceLock<T> {}
+unsafe impl<T: Send> Send for OnceLock<T> {}
+
+impl<T> OnceLock<T> {
+    /// Creates a new empty cell.
+    #[must_use]
+    pub(crate) const fn new() -> Self {
+        Self {
+            once: Once::new(),
+            is_initialized: AtomicBool::new(false),
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+        }
+    }
+
+    /// Gets the contents of the cell, initializing it with `f` if the cell
+    /// was empty.
+    ///
+    /// Many threads may call `get_or_init` concurrently with different
+    /// initializing functions, but it is guaranteed that only one function
+    /// will be executed.
+    ///
+    /// # Panics
+    ///
+    /// If `f` panics, the panic is propagated to the caller, and the cell
+    /// remains uninitialized.
+    ///
+    /// It is an error to reentrantly initialize the cell from `f`. The
+    /// exact outcome is unspecified. Current implementation deadlocks, but
+    /// this may be changed to a panic in the future.
+    pub(crate) fn get_or_init<F>(&self, f: F) -> &T
+    where
+        F: FnOnce() -> T,
+    {
+        // Fast path check
+        if self.is_initialized() {
+            // SAFETY: The inner value has been initialized
+            return unsafe { self.get_unchecked() };
+        }
+        self.initialize(f);
+
+        debug_assert!(self.is_initialized());
+
+        // SAFETY: The inner value has been initialized
+        unsafe { self.get_unchecked() }
+    }
+
+    #[inline]
+    fn is_initialized(&self) -> bool {
+        self.is_initialized.load(Ordering::Acquire)
+    }
+
+    #[cold]
+    fn initialize<F>(&self, f: F)
+    where
+        F: FnOnce() -> T,
+    {
+        let slot = self.value.get().cast::<T>();
+        let is_initialized = &self.is_initialized;
+
+        self.once.call_once(|| {
+            let value = f();
+            unsafe {
+                slot.write(value);
+            }
+            is_initialized.store(true, Ordering::Release);
+        });
+    }
+
+    /// # Safety
+    ///
+    /// The value must be initialized
+    unsafe fn get_unchecked(&self) -> &T {
+        debug_assert!(self.is_initialized());
+        &*self.value.get().cast::<T>()
+    }
+}
+
+impl<T> Drop for OnceLock<T> {
+    fn drop(&mut self) {
+        if self.is_initialized() {
+            // SAFETY: The inner value has been initialized
+            unsafe { self.value.get().cast::<T>().drop_in_place() };
+        }
+    }
+}
diff --git a/src/sync/parker.rs b/src/sync/parker.rs
index 531f5a5..e791c44 100644
--- a/src/sync/parker.rs
+++ b/src/sync/parker.rs
@@ -44,6 +44,7 @@
 ///
 /// // Wakes up when `u.unpark()` provides the token.
 /// p.park();
+/// # std::thread::sleep(std::time::Duration::from_millis(500)); // wait for background threads closed: https://github.com/rust-lang/miri/issues/1371
 /// ```
 ///
 /// [`park`]: Parker::park
@@ -241,6 +242,7 @@
     ///
     /// // Wakes up when `u.unpark()` provides the token.
     /// p.park();
+    /// # std::thread::sleep(std::time::Duration::from_millis(500)); // wait for background threads closed: https://github.com/rust-lang/miri/issues/1371
     /// ```
     ///
     /// [`park`]: Parker::park
@@ -262,7 +264,7 @@
     /// # let _ = unsafe { Unparker::from_raw(raw) };
     /// ```
     pub fn into_raw(this: Unparker) -> *const () {
-        Arc::into_raw(this.inner) as *const ()
+        Arc::into_raw(this.inner).cast::<()>()
     }
 
     /// Converts a raw pointer into an `Unparker`.
@@ -284,7 +286,7 @@
     /// ```
     pub unsafe fn from_raw(ptr: *const ()) -> Unparker {
         Unparker {
-            inner: Arc::from_raw(ptr as *const Inner),
+            inner: Arc::from_raw(ptr.cast::<Inner>()),
         }
     }
 }
diff --git a/src/sync/sharded_lock.rs b/src/sync/sharded_lock.rs
index 163d34c..b43c55e 100644
--- a/src/sync/sharded_lock.rs
+++ b/src/sync/sharded_lock.rs
@@ -9,8 +9,8 @@
 use std::sync::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard};
 use std::thread::{self, ThreadId};
 
+use crate::sync::once_lock::OnceLock;
 use crate::CachePadded;
-use lazy_static::lazy_static;
 
 /// The number of shards per sharded lock. Must be a power of two.
 const NUM_SHARDS: usize = 8;
@@ -583,12 +583,16 @@
     next_index: usize,
 }
 
-lazy_static! {
-    static ref THREAD_INDICES: Mutex<ThreadIndices> = Mutex::new(ThreadIndices {
-        mapping: HashMap::new(),
-        free_list: Vec::new(),
-        next_index: 0,
-    });
+fn thread_indices() -> &'static Mutex<ThreadIndices> {
+    static THREAD_INDICES: OnceLock<Mutex<ThreadIndices>> = OnceLock::new();
+    fn init() -> Mutex<ThreadIndices> {
+        Mutex::new(ThreadIndices {
+            mapping: HashMap::new(),
+            free_list: Vec::new(),
+            next_index: 0,
+        })
+    }
+    THREAD_INDICES.get_or_init(init)
 }
 
 /// A registration of a thread with an index.
@@ -601,7 +605,7 @@
 
 impl Drop for Registration {
     fn drop(&mut self) {
-        let mut indices = THREAD_INDICES.lock().unwrap();
+        let mut indices = thread_indices().lock().unwrap();
         indices.mapping.remove(&self.thread_id);
         indices.free_list.push(self.index);
     }
@@ -610,7 +614,7 @@
 thread_local! {
     static REGISTRATION: Registration = {
         let thread_id = thread::current().id();
-        let mut indices = THREAD_INDICES.lock().unwrap();
+        let mut indices = thread_indices().lock().unwrap();
 
         let index = match indices.free_list.pop() {
             Some(i) => i,
diff --git a/src/sync/wait_group.rs b/src/sync/wait_group.rs
index 4206ee4..19d6074 100644
--- a/src/sync/wait_group.rs
+++ b/src/sync/wait_group.rs
@@ -1,6 +1,3 @@
-// Necessary for using `Mutex<usize>` for conditional variables
-#![allow(clippy::mutex_atomic)]
-
 use crate::primitive::sync::{Arc, Condvar, Mutex};
 use std::fmt;
 
@@ -42,6 +39,7 @@
 ///
 /// // Block until all threads have finished their work.
 /// wg.wait();
+/// # std::thread::sleep(std::time::Duration::from_millis(500)); // wait for background threads closed: https://github.com/rust-lang/miri/issues/1371
 /// ```
 ///
 /// [`Barrier`]: std::sync::Barrier
@@ -100,6 +98,7 @@
     ///
     /// // Block until both threads have reached `wait()`.
     /// wg.wait();
+    /// # std::thread::sleep(std::time::Duration::from_millis(500)); // wait for background threads closed: https://github.com/rust-lang/miri/issues/1371
     /// ```
     pub fn wait(self) {
         if *self.inner.count.lock().unwrap() == 1 {
diff --git a/src/thread.rs b/src/thread.rs
index a59a4f5..f1086d9 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -130,7 +130,8 @@
 /// All child threads that haven't been manually joined will be automatically joined just before
 /// this function invocation ends. If all joined threads have successfully completed, `Ok` is
 /// returned with the return value of `f`. If any of the joined threads has panicked, an `Err` is
-/// returned containing errors from panicked threads.
+/// returned containing errors from panicked threads. Note that if panics are implemented by
+/// aborting the process, no error is returned; see the notes of [std::panic::catch_unwind].
 ///
 /// # Examples
 ///
@@ -490,7 +491,8 @@
 impl<T> ScopedJoinHandle<'_, T> {
     /// Waits for the thread to finish and returns its result.
     ///
-    /// If the child thread panics, an error is returned.
+    /// If the child thread panics, an error is returned. Note that if panics are implemented by
+    /// aborting the process, no error is returned; see the notes of [std::panic::catch_unwind].
     ///
     /// # Panics
     ///
diff --git a/tests/atomic_cell.rs b/tests/atomic_cell.rs
index da7a6e1..a1d1022 100644
--- a/tests/atomic_cell.rs
+++ b/tests/atomic_cell.rs
@@ -330,3 +330,51 @@
     let x = AtomicCell::new(Test::FieldLess);
     assert_eq!(x.load(), Test::FieldLess);
 }
+
+// https://github.com/crossbeam-rs/crossbeam/issues/833
+#[rustversion::since(1.40)] // const_constructor requires Rust 1.40
+#[test]
+fn issue_833() {
+    use std::num::NonZeroU128;
+    use std::sync::atomic::{AtomicBool, Ordering};
+    use std::thread;
+
+    #[cfg(miri)]
+    const N: usize = 10_000;
+    #[cfg(not(miri))]
+    const N: usize = 1_000_000;
+
+    #[allow(dead_code)]
+    enum Enum {
+        NeverConstructed,
+        Cell(AtomicCell<NonZeroU128>),
+    }
+
+    static STATIC: Enum = Enum::Cell(AtomicCell::new(match NonZeroU128::new(1) {
+        Some(nonzero) => nonzero,
+        None => unreachable!(),
+    }));
+    static FINISHED: AtomicBool = AtomicBool::new(false);
+
+    let handle = thread::spawn(|| {
+        let cell = match &STATIC {
+            Enum::NeverConstructed => unreachable!(),
+            Enum::Cell(cell) => cell,
+        };
+        let x = NonZeroU128::new(0xFFFF_FFFF_FFFF_FFFF_0000_0000_0000_0000).unwrap();
+        let y = NonZeroU128::new(0x0000_0000_0000_0000_FFFF_FFFF_FFFF_FFFF).unwrap();
+        while !FINISHED.load(Ordering::Relaxed) {
+            cell.store(x);
+            cell.store(y);
+        }
+    });
+
+    for _ in 0..N {
+        if let Enum::NeverConstructed = STATIC {
+            unreachable!(":(");
+        }
+    }
+
+    FINISHED.store(true, Ordering::Relaxed);
+    handle.join().unwrap();
+}
diff --git a/tests/sharded_lock.rs b/tests/sharded_lock.rs
index 0718b44..8b0ae8e 100644
--- a/tests/sharded_lock.rs
+++ b/tests/sharded_lock.rs
@@ -22,7 +22,7 @@
 fn frob() {
     const N: u32 = 10;
     #[cfg(miri)]
-    const M: usize = 100;
+    const M: usize = 50;
     #[cfg(not(miri))]
     const M: usize = 1000;
 
diff --git a/tests/wait_group.rs b/tests/wait_group.rs
index b6c2a24..0ec4a72 100644
--- a/tests/wait_group.rs
+++ b/tests/wait_group.rs
@@ -36,6 +36,7 @@
 }
 
 #[test]
+#[cfg_attr(miri, ignore)] // this test makes timing assumptions, but Miri is so slow it violates them
 fn wait_and_drop() {
     let wg = WaitGroup::new();
     let (tx, rx) = mpsc::channel();
@@ -51,8 +52,8 @@
         });
     }
 
-    // At this point, all spawned threads should be sleeping, so we shouldn't get anything from the
-    // channel.
+    // At this point, all spawned threads should be in `thread::sleep`, so we shouldn't get anything
+    // from the channel.
     assert!(rx.try_recv().is_err());
 
     wg.wait();