Snap for 8564071 from 12223f22cfdb705bcf72b627190ab4111dd43ea0 to mainline-wifi-release

Change-Id: Idb71cf2177fd6f5258a3580e1aab4667815cf460
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 07d5794..56f48b2 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "7fe431c5f3af6a374812e9fdcb3c7059ca94175c"
+    "sha1": "95e2993afe52104d6d585173ddedb3da6afba533"
   }
 }
diff --git a/.travis.yml b/.travis.yml
index 7807456..6a1c30f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,8 @@
     - travis-cargo build
     - travis-cargo test
     - travis-cargo doc -- --no-deps
-    - rustdoc --test README.md -L target/debug
+    # TODO: Reenable later
+    #- rustdoc --test README.md -L target/debug
 
 after_success:
     - curl https://mvdnes.github.io/rust-docs/travis-doc-upload.sh | bash
diff --git a/Android.bp b/Android.bp
index 5d95d6d..6d0e88c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -33,47 +33,66 @@
 
 rust_library {
     name: "libspin",
-    // has rustc warnings
     host_supported: true,
     crate_name: "spin",
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.9.2",
     srcs: ["src/lib.rs"],
     edition: "2015",
     features: [
-        "default",
+        "once",
         "std",
-        "ticket_mutex",
     ],
     apex_available: [
         "//apex_available:platform",
+        "com.android.compos",
         "com.android.resolv",
+        "com.android.virt",
     ],
     min_sdk_version: "29",
 }
 
-rust_defaults {
-    name: "spin_defaults",
+rust_test {
+    name: "spin_test_src_lib",
+    host_supported: true,
     crate_name: "spin",
-    // has rustc warnings
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.9.2",
     srcs: ["src/lib.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
-    edition: "2015",
-    features: [
-        "default",
-        "std",
-        "ticket_mutex",
-    ],
-}
-
-rust_test_host {
-    name: "spin_host_test_src_lib",
-    defaults: ["spin_defaults"],
     test_options: {
         unit_test: true,
     },
+    edition: "2015",
+    features: [
+        "once",
+        "std",
+    ],
 }
 
-rust_test {
-    name: "spin_device_test_src_lib",
-    defaults: ["spin_defaults"],
+rust_library_rlib {
+    name: "libspin_nostd",
+    host_supported: true,
+    crate_name: "spin",
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.9.2",
+    srcs: ["src/lib.rs"],
+    edition: "2015",
+    features: [
+        "mutex",
+        "spin_mutex",
+    ],
+    apex_available: [
+        "//apex_available:platform",
+        "com.android.virt",
+    ],
+    min_sdk_version: "29",
 }
+
+
+// Errors when listing tests:
+// error[E0433]: failed to resolve: could not find `Mutex` in `spin`
+// error[E0433]: failed to resolve: could not find `RwLock` in `spin`
+// error: could not compile `spin` due to 2 previous errors
+// error: build failed
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 462d3c6..abbeee1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,43 @@
 
 ### Fixed
 
+# [0.9.2] - 2021-07-09
+
+### Changed
+
+- Improved `Once` performance by reducing the memory footprint of internal state to one byte
+
+### Fixed
+
+- Improved performance of `Once` by relaxing ordering guarantees and removing redundant checks
+
+# [0.9.1] - 2021-06-21
+
+### Added
+
+- Default type parameter on `Once` for better ergonomics
+
+# [0.9.0] - 2021-03-18
+
+### Changed
+
+- Placed all major API features behind feature flags
+
+### Fixed
+
+- A compilation bug with the `lock_api` feature
+
+# [0.8.0] - 2021-03-15
+
+### Added
+
+- `Once::get_unchecked`
+- `RelaxStrategy` trait with type parameter on all locks to support switching between relax strategies
+
+### Changed
+
+- `lock_api1` feature is now named `lock_api`
+
 # [0.7.1] - 2021-01-12
 
 ### Fixed
diff --git a/Cargo.toml b/Cargo.toml
index b15f61c..c32199c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,19 +12,29 @@
 
 [package]
 name = "spin"
-version = "0.7.1"
+version = "0.9.2"
 authors = ["Mathijs van de Nes <git@mathijs.vd-nes.nl>", "John Ericson <git@JohnEricson.me>", "Joshua Barretto <joshua.s.barretto@gmail.com>"]
 description = "Spin-based synchronization primitives"
 keywords = ["spinlock", "mutex", "rwlock"]
 license = "MIT"
 repository = "https://github.com/mvdnes/spin-rs.git"
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
 [dependencies.lock_api_crate]
 version = "0.4"
 optional = true
 package = "lock_api"
 
 [features]
-default = ["ticket_mutex"]
+barrier = ["mutex"]
+default = ["lock_api", "mutex", "spin_mutex", "rwlock", "once", "lazy", "barrier"]
+lazy = ["once"]
 lock_api = ["lock_api_crate"]
+mutex = []
+once = []
+rwlock = []
+spin_mutex = ["mutex"]
 std = []
-ticket_mutex = []
+ticket_mutex = ["mutex"]
+use_ticket_mutex = ["mutex", "ticket_mutex"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index ba4591d..ee6fb09 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
 [package]
 name = "spin"
-version = "0.7.1"
+version = "0.9.2"
 authors = [
 	"Mathijs van de Nes <git@mathijs.vd-nes.nl>",
 	"John Ericson <git@JohnEricson.me>",
@@ -15,7 +15,38 @@
 lock_api_crate = { package = "lock_api", version = "0.4", optional = true }
 
 [features]
-default = ["ticket_mutex"]
+default = ["lock_api", "mutex", "spin_mutex", "rwlock", "once", "lazy", "barrier"]
+
+# Enables `Mutex`. Must be used with either `spin_mutex` or `use_ticket_mutex`.
+mutex = []
+
+# Enables `SpinMutex` and the default spin mutex implementation for `Mutex`.
+spin_mutex = ["mutex"]
+
+# Enables `TicketMutex`.
+ticket_mutex = ["mutex"]
+
+# Enables the non-default ticket mutex implementation for `Mutex`.
+use_ticket_mutex = ["mutex", "ticket_mutex"]
+
+# Enables `RwLock`.
+rwlock = []
+
+# Enables `Once`.
+once = []
+
+# Enables `Lazy`.
+lazy = ["once"]
+
+# Enables `Barrier`. Because this feature uses `mutex`, either `spin_mutex` or `use_ticket_mutex` must be enabled.
+barrier = ["mutex"]
+
+# Enables `lock_api`-compatible types that use the primitives in this crate internally.
 lock_api = ["lock_api_crate"]
-ticket_mutex = []
+
+# Enables std-only features such as yield-relaxing.
 std = []
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
diff --git a/METADATA b/METADATA
index b2396cb..0213653 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/spin/spin-0.7.1.crate"
+    value: "https://static.crates.io/crates/spin/spin-0.9.2.crate"
   }
-  version: "0.7.1"
+  version: "0.9.2"
   license_type: NOTICE
   last_upgrade_date {
     year: 2021
-    month: 1
-    day: 12
+    month: 8
+    day: 9
   }
 }
diff --git a/README.md b/README.md
index 43a91dd..3d7d758 100644
--- a/README.md
+++ b/README.md
@@ -18,14 +18,14 @@
 
 ## Features
 
-- `Mutex`, `RwLock` and `Once` equivalents
+- `Mutex`, `RwLock`, `Once`, `Lazy` and `Barrier` equivalents
 - Support for `no_std` environments
 - [`lock_api`](https://crates.io/crates/lock_api) compatibility
 - Upgradeable `RwLock` guards
 - Guards can be sent and shared between threads
 - Guard leaking
-- `std` feature to enable yield to the OS scheduler in busy loops
-- `Mutex` can become a ticket lock
+- Ticket locks
+- Different strategies for dealing with contention
 
 ## Usage
 
@@ -38,7 +38,7 @@
 ## Example
 
 When calling `lock` on a `Mutex` you will get a guard value that provides access
-to the data. When this guard is dropped, the lock will be unlocked.
+to the data. When this guard is dropped, the mutex will become available again.
 
 ```rust
 extern crate spin;
@@ -50,19 +50,19 @@
     let thread = thread::spawn({
         let counter = counter.clone();
         move || {
-            for _ in 0..10 {
+            for _ in 0..100 {
                 *counter.lock() += 1;
             }
         }
     });
 
-    for _ in 0..10 {
+    for _ in 0..100 {
         *counter.lock() += 1;
     }
 
     thread.join().unwrap();
 
-    assert_eq!(*counter.lock(), 20);
+    assert_eq!(*counter.lock(), 200);
 }
 ```
 
@@ -70,11 +70,27 @@
 
 The crate comes with a few feature flags that you may wish to use.
 
-- `lock_api` enabled support for [`lock_api`](https://crates.io/crates/lock_api)
+- `mutex` enables the `Mutex` type.
 
-- `ticket_mutex` uses a ticket lock for the implementation of `Mutex`
+- `spin_mutex` enables the `SpinMutex` type.
 
-- `std` enables support for thread yielding instead of spinning
+- `ticket_mutex` enables the `TicketMutex` type.
+
+- `use_ticket_mutex` switches to a ticket lock for the implementation of `Mutex`. This
+  is recommended only on targets for which ordinary spinning locks perform very badly
+  because it will change the implementation used by other crates that depend on `spin`.
+
+- `rwlock` enables the `RwLock` type.
+
+- `once` enables the `Once` type.
+
+- `lazy` enables the `Lazy` type.
+
+- `barrier` enables the `Barrier` type.
+
+- `lock_api` enables support for [`lock_api`](https://crates.io/crates/lock_api)
+
+- `std` enables support for thread yielding instead of spinning.
 
 ## Remarks
 
@@ -89,7 +105,16 @@
 
 - Locks will not be poisoned in case of failure.
 - Threads will not yield to the OS scheduler when encounter a lock that cannot be
-accessed. Instead, they will 'spin' in a busy loop until the lock becomes available.
+  accessed. Instead, they will 'spin' in a busy loop until the lock becomes available.
+
+Many of the feature flags listed above are enabled by default. If you're writing a
+library, we recommend disabling those that you don't use to avoid increasing compilation
+time for your crate's users. You can do this like so:
+
+```
+[dependencies]
+spin = { version = "x.y", default-features = false, features = [...] }
+```
 
 ## License
 
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 6d31624..777e539 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -1,55 +1,72 @@
-// Generated by cargo2android.py for tests that depend on this crate.
+// Generated by update_crate_tests.py for tests that depend on this crate.
 {
+  "imports": [
+    {
+      "path": "external/rust/crates/quiche"
+    },
+    {
+      "path": "external/rust/crates/ring"
+    },
+    {
+      "path": "external/rust/crates/webpki"
+    }
+  ],
   "presubmit": [
     {
-      "name": "ring_device_test_tests_rand_tests"
+      "name": "apkdmverity.test"
     },
     {
-      "name": "ring_device_test_tests_signature_tests"
+      "name": "authfs_device_test_src_lib"
     },
     {
-      "name": "ring_device_test_tests_hkdf_tests"
+      "name": "doh_unit_test"
     },
     {
-      "name": "ring_device_test_tests_hmac_tests"
+      "name": "libapkverify.integration_test"
     },
     {
-      "name": "ring_device_test_tests_ecdsa_tests"
+      "name": "libapkverify.test"
     },
     {
-      "name": "ring_device_test_tests_agreement_tests"
+      "name": "libidsig.test"
     },
     {
-      "name": "ring_device_test_tests_ed25519_tests"
+      "name": "microdroid_manager_test"
     },
     {
-      "name": "spin_device_test_src_lib"
+      "name": "spin_test_src_lib"
     },
     {
-      "name": "ring_device_test_tests_rsa_tests"
+      "name": "virtualizationservice_device_test"
+    }
+  ],
+  "presubmit-rust": [
+    {
+      "name": "apkdmverity.test"
     },
     {
-      "name": "ring_device_test_tests_quic_tests"
+      "name": "authfs_device_test_src_lib"
     },
     {
-      "name": "ring_device_test_tests_pbkdf2_tests"
+      "name": "doh_unit_test"
     },
     {
-      "name": "ring_device_test_tests_constant_time_tests"
+      "name": "libapkverify.integration_test"
     },
     {
-      "name": "ring_device_test_src_lib"
+      "name": "libapkverify.test"
     },
     {
-      "name": "ring_device_test_tests_aead_tests"
+      "name": "libidsig.test"
     },
     {
-      "name": "ring_device_test_tests_digest_tests",
-      "options": [
-        {
-          "test-timeout": "600000"
-        }
-      ]
+      "name": "microdroid_manager_test"
+    },
+    {
+      "name": "spin_test_src_lib"
+    },
+    {
+      "name": "virtualizationservice_device_test"
     }
   ]
 }
diff --git a/cargo2android.json b/cargo2android.json
index 5441827..086d38a 100644
--- a/cargo2android.json
+++ b/cargo2android.json
@@ -1,12 +1,15 @@
 {
+  "add-toplevel-block": "cargo2android_nostd.bp",
   "apex-available": [
     "//apex_available:platform",
-    "com.android.resolv"
+    "com.android.compos",
+    "com.android.resolv",
+    "com.android.virt"
   ],
   "dependencies": true,
   "device": true,
+  "features": "once,std",
   "min-sdk-version": "29",
-  "patch": "patches/Android.bp.patch",
   "run": true,
   "tests": true
 }
\ No newline at end of file
diff --git a/cargo2android_nostd.bp b/cargo2android_nostd.bp
new file mode 100644
index 0000000..49d0318
--- /dev/null
+++ b/cargo2android_nostd.bp
@@ -0,0 +1,18 @@
+rust_library_rlib {
+    name: "libspin_nostd",
+    host_supported: true,
+    crate_name: "spin",
+    cargo_env_compat: true,
+    cargo_pkg_version: "0.9.2",
+    srcs: ["src/lib.rs"],
+    edition: "2015",
+    features: [
+        "mutex",
+        "spin_mutex",
+    ],
+    apex_available: [
+        "//apex_available:platform",
+        "com.android.virt",
+    ],
+    min_sdk_version: "29",
+}
diff --git a/patches/Android.bp.patch b/patches/Android.bp.patch
deleted file mode 100644
index f606cb9..0000000
--- a/patches/Android.bp.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-diff --git a/Android.bp b/Android.bp
-index 96531ab..43db8ef 100644
---- a/Android.bp
-+++ b/Android.bp
-@@ -39,6 +39,7 @@ rust_library {
-     edition: "2015",
-     features: [
-         "default",
-+        "std",
-         "ticket_mutex",
-     ],
-    apex_available: [
-@@ -56,6 +62,7 @@ rust_defaults {
-     edition: "2015",
-     features: [
-         "default",
-+        "std",
-         "ticket_mutex",
-     ],
- }
diff --git a/patches/disable_panic_tests.patch b/patches/disable_panic_tests.patch
index 658ee32..52610fb 100644
--- a/patches/disable_panic_tests.patch
+++ b/patches/disable_panic_tests.patch
@@ -34,10 +34,10 @@
      fn panic() {
          use ::std::panic;
  
-diff --git a/src/rw_lock.rs b/src/rw_lock.rs
+diff --git a/src/rwlock.rs b/src/rwlock.rs
 index 5c009cf..ed50407 100644
---- a/src/rw_lock.rs
-+++ b/src/rw_lock.rs
+--- a/src/rwlock.rs
++++ b/src/rwlock.rs
 @@ -932,6 +932,7 @@ mod tests {
      }
  
diff --git a/src/barrier.rs b/src/barrier.rs
index 073944f..7a13890 100644
--- a/src/barrier.rs
+++ b/src/barrier.rs
@@ -1,22 +1,20 @@
 //! Synchronization primitive allowing multiple threads to synchronize the
 //! beginning of some computation.
 //!
-//! Implementation adopted the 'Barrier' type of the standard library. See:
-//! https://doc.rust-lang.org/std/sync/struct.Barrier.html
+//! Implementation adapted from the 'Barrier' type of the standard library. See:
+//! <https://doc.rust-lang.org/std/sync/struct.Barrier.html>
 //!
 //! Copyright 2014 The Rust Project Developers. See the COPYRIGHT
 //! file at the top-level directory of this distribution and at
-//! http://rust-lang.org/COPYRIGHT.
-//! 
+//! <http://rust-lang.org/COPYRIGHT>.
+//!
 //! Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-//! http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-//! <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+//! <http://www.apache.org/licenses/LICENSE-2.0>> or the MIT license
+//! <LICENSE-MIT or <http://opensource.org/licenses/MIT>>, at your
 //! option. This file may not be copied, modified, or distributed
 //! except according to those terms.
 
-use core::sync::atomic::spin_loop_hint as cpu_relax;
-
-use crate::Mutex;
+use crate::{mutex::Mutex, RelaxStrategy, Spin};
 
 /// A primitive that synchronizes the execution of multiple threads.
 ///
@@ -44,8 +42,8 @@
 ///     handle.join().unwrap();
 /// }
 /// ```
-pub struct Barrier {
-    lock: Mutex<BarrierState>,
+pub struct Barrier<R = Spin> {
+    lock: Mutex<BarrierState, R>,
     num_threads: usize,
 }
 
@@ -71,32 +69,7 @@
 /// ```
 pub struct BarrierWaitResult(bool);
 
-impl Barrier {
-    /// Creates a new barrier that can block a given number of threads.
-    ///
-    /// A barrier will block `n`-1 threads which call [`wait`] and then wake up
-    /// all threads at once when the `n`th thread calls [`wait`]. A Barrier created
-    /// with n = 0 will behave identically to one created with n = 1.
-    ///
-    /// [`wait`]: #method.wait
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// use spin;
-    ///
-    /// let barrier = spin::Barrier::new(10);
-    /// ```
-    pub const fn new(n: usize) -> Barrier {
-        Barrier {
-            lock: Mutex::new(BarrierState {
-                count: 0,
-                generation_id: 0,
-            }),
-            num_threads: n,
-        }
-    }
-
+impl<R: RelaxStrategy> Barrier<R> {
     /// Blocks the current thread until all threads have rendezvoused here.
     ///
     /// Barriers are re-usable after all threads have rendezvoused once, and can
@@ -145,7 +118,7 @@
             while local_gen == lock.generation_id &&
                 lock.count < self.num_threads {
                 drop(lock);
-                cpu_relax();
+                R::relax();
                 lock = self.lock.lock();
             }
             BarrierWaitResult(false)
@@ -159,6 +132,33 @@
     }
 }
 
+impl<R> Barrier<R> {
+    /// Creates a new barrier that can block a given number of threads.
+    ///
+    /// A barrier will block `n`-1 threads which call [`wait`] and then wake up
+    /// all threads at once when the `n`th thread calls [`wait`]. A Barrier created
+    /// with n = 0 will behave identically to one created with n = 1.
+    ///
+    /// [`wait`]: #method.wait
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use spin;
+    ///
+    /// let barrier = spin::Barrier::new(10);
+    /// ```
+    pub const fn new(n: usize) -> Self {
+        Self {
+            lock: Mutex::new(BarrierState {
+                count: 0,
+                generation_id: 0,
+            }),
+            num_threads: n,
+        }
+    }
+}
+
 impl BarrierWaitResult {
     /// Returns whether this thread from [`wait`] is the "leader thread".
     ///
@@ -187,7 +187,7 @@
     use std::sync::Arc;
     use std::thread;
 
-    use super::Barrier;
+    type Barrier = super::Barrier;
 
     fn use_barrier(n: usize, barrier: Arc<Barrier>) {
         let (tx, rx) = channel();
diff --git a/src/lazy.rs b/src/lazy.rs
index 619253d..1473db1 100644
--- a/src/lazy.rs
+++ b/src/lazy.rs
@@ -1,10 +1,10 @@
 //! Synchronization primitives for lazy evaluation.
 //!
 //! Implementation adapted from the `SyncLazy` type of the standard library. See:
-//! https://github.com/rust-lang/rust/blob/cae8bc1f2324e31c98cb32b8ed37032fc9cef405/library/std/src/lazy.rs
+//! <https://doc.rust-lang.org/std/lazy/struct.SyncLazy.html>
 
 use core::{cell::Cell, fmt, ops::Deref};
-use crate::Once;
+use crate::{once::Once, RelaxStrategy, Spin};
 
 /// A value which is initialized on the first access.
 ///
@@ -38,12 +38,12 @@
 ///     //   Some("Hoyten")
 /// }
 /// ```
-pub struct Lazy<T, F = fn() -> T> {
-    cell: Once<T>,
+pub struct Lazy<T, F = fn() -> T, R = Spin> {
+    cell: Once<T, R>,
     init: Cell<Option<F>>,
 }
 
-impl<T: fmt::Debug, F> fmt::Debug for Lazy<T, F> {
+impl<T: fmt::Debug, F, R> fmt::Debug for Lazy<T, F, R> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("Lazy").field("cell", &self.cell).field("init", &"..").finish()
     }
@@ -57,15 +57,23 @@
 unsafe impl<T, F: Send> Sync for Lazy<T, F> where Once<T>: Sync {}
 // auto-derived `Send` impl is OK.
 
-impl<T, F> Lazy<T, F> {
+impl<T, F, R> Lazy<T, F, R> {
     /// Creates a new lazy value with the given initializing
     /// function.
-    pub const fn new(f: F) -> Lazy<T, F> {
-        Lazy { cell: Once::new(), init: Cell::new(Some(f)) }
+    pub const fn new(f: F) -> Self {
+        Self { cell: Once::new(), init: Cell::new(Some(f)) }
+    }
+    /// Retrieves a mutable pointer to the inner data.
+    ///
+    /// This is especially useful when interfacing with low level code or FFI where the caller
+    /// explicitly knows that it has exclusive access to the inner data. Note that reading from
+    /// this pointer is UB until initialized or directly written to.
+    pub fn as_mut_ptr(&self) -> *mut T {
+        self.cell.as_mut_ptr()
     }
 }
 
-impl<T, F: FnOnce() -> T> Lazy<T, F> {
+impl<T, F: FnOnce() -> T, R: RelaxStrategy> Lazy<T, F, R> {
     /// Forces the evaluation of this lazy value and
     /// returns a reference to result. This is equivalent
     /// to the `Deref` impl, but is explicit.
@@ -80,7 +88,7 @@
     /// assert_eq!(Lazy::force(&lazy), &92);
     /// assert_eq!(&*lazy, &92);
     /// ```
-    pub fn force(this: &Lazy<T, F>) -> &T {
+    pub fn force(this: &Self) -> &T {
         this.cell.call_once(|| match this.init.take() {
             Some(f) => f(),
             None => panic!("Lazy instance has previously been poisoned"),
@@ -88,16 +96,17 @@
     }
 }
 
-impl<T, F: FnOnce() -> T> Deref for Lazy<T, F> {
+impl<T, F: FnOnce() -> T, R: RelaxStrategy> Deref for Lazy<T, F, R> {
     type Target = T;
+
     fn deref(&self) -> &T {
-        Lazy::force(self)
+        Self::force(self)
     }
 }
 
-impl<T: Default> Default for Lazy<T> {
+impl<T: Default, R> Default for Lazy<T, fn() -> T, R> {
     /// Creates a new lazy value using `Default` as the initializing function.
-    fn default() -> Lazy<T> {
-        Lazy::new(T::default)
+    fn default() -> Self {
+        Self::new(T::default)
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index d685ff4..92af28a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,5 @@
 #![cfg_attr(all(not(feature = "std"), not(test)), no_std)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![deny(missing_docs)]
 
 //! This crate provides [spin-based](https://en.wikipedia.org/wiki/Spinlock) versions of the
@@ -19,6 +20,10 @@
 //!
 //! - Guard leaking
 //!
+//! - Ticket locks
+//!
+//! - Different strategies for dealing with contention
+//!
 //! # Relationship with `std::sync`
 //!
 //! While `spin` is not a drop-in replacement for `std::sync` (and
@@ -30,7 +35,7 @@
 //! - Because spinning does not depend on the thread-driven model of `std::sync`, guards ([`MutexGuard`],
 //!   [`RwLockReadGuard`], [`RwLockWriteGuard`], etc.) may be sent and shared between threads.
 //!
-//! - [`RwLockUpgradableGuard`] supports being upgrades into a [`RwLockWriteGuard`].
+//! - [`RwLockUpgradableGuard`] supports being upgraded into a [`RwLockWriteGuard`].
 //!
 //! - Guards support [leaking](https://doc.rust-lang.org/nomicon/leaking.html).
 //!
@@ -46,7 +51,7 @@
 //!
 //! The crate comes with a few feature flags that you may wish to use.
 //!
-//! - `lock_api` enabled support for [`lock_api`](https://crates.io/crates/lock_api)
+//! - `lock_api` enables support for [`lock_api`](https://crates.io/crates/lock_api)
 //!
 //! - `ticket_mutex` uses a ticket lock for the implementation of `Mutex`
 //!
@@ -55,43 +60,127 @@
 #[cfg(any(test, feature = "std"))]
 extern crate core;
 
-// Choose a different relaxation strategy based on whether `std` is available or not.
-#[cfg(not(feature = "std"))]
-use core::sync::atomic::spin_loop_hint as relax;
-#[cfg(feature = "std")]
-use std::thread::yield_now as relax;
-
+#[cfg(feature = "barrier")]
+#[cfg_attr(docsrs, doc(cfg(feature = "barrier")))]
 pub mod barrier;
+#[cfg(feature = "lazy")]
+#[cfg_attr(docsrs, doc(cfg(feature = "lazy")))]
 pub mod lazy;
+#[cfg(feature = "mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "mutex")))]
 pub mod mutex;
+#[cfg(feature = "once")]
+#[cfg_attr(docsrs, doc(cfg(feature = "once")))]
 pub mod once;
-pub mod rw_lock;
+#[cfg(feature = "rwlock")]
+#[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+pub mod rwlock;
+pub mod relax;
 
-pub use barrier::Barrier;
-pub use lazy::Lazy;
-pub use mutex::{Mutex, MutexGuard};
-pub use once::Once;
-pub use rw_lock::{RwLock, RwLockReadGuard, RwLockWriteGuard, RwLockUpgradableGuard};
+#[cfg(feature = "mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "mutex")))]
+pub use mutex::MutexGuard;
+#[cfg(feature = "rwlock")]
+#[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+pub use rwlock::RwLockReadGuard;
+pub use relax::{Spin, RelaxStrategy};
+#[cfg(feature = "std")]
+#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+pub use relax::Yield;
+
+// Avoid confusing inference errors by aliasing away the relax strategy parameter. Users that need to use a different
+// relax strategy can do so by accessing the types through their fully-qualified path. This is a little bit horrible
+// but sadly adding a default type parameter is *still* a breaking change in Rust (for understandable reasons).
+
+/// A primitive that synchronizes the execution of multiple threads. See [`barrier::Barrier`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "barrier")]
+#[cfg_attr(docsrs, doc(cfg(feature = "barrier")))]
+pub type Barrier = crate::barrier::Barrier;
+
+/// A value which is initialized on the first access. See [`lazy::Lazy`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "lazy")]
+#[cfg_attr(docsrs, doc(cfg(feature = "lazy")))]
+pub type Lazy<T, F = fn() -> T> = crate::lazy::Lazy<T, F>;
+
+/// A primitive that synchronizes the execution of multiple threads. See [`mutex::Mutex`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "mutex")))]
+pub type Mutex<T> = crate::mutex::Mutex<T>;
+
+/// A primitive that provides lazy one-time initialization. See [`once::Once`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "once")]
+#[cfg_attr(docsrs, doc(cfg(feature = "once")))]
+pub type Once<T = ()> = crate::once::Once<T>;
+
+/// A lock that provides data access to either one writer or many readers. See [`rwlock::RwLock`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "rwlock")]
+#[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+pub type RwLock<T> = crate::rwlock::RwLock<T>;
+
+/// A guard that provides immutable data access but can be upgraded to [`RwLockWriteGuard`]. See
+/// [`rwlock::RwLockUpgradableGuard`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "rwlock")]
+#[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+pub type RwLockUpgradableGuard<'a, T> = crate::rwlock::RwLockUpgradableGuard<'a, T>;
+
+/// A guard that provides mutable data access. See [`rwlock::RwLockWriteGuard`] for documentation.
+///
+/// A note for advanced users: this alias exists to avoid subtle type inference errors due to the default relax
+/// strategy type parameter. If you need a non-default relax strategy, use the fully-qualified path.
+#[cfg(feature = "rwlock")]
+#[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+pub type RwLockWriteGuard<'a, T> = crate::rwlock::RwLockWriteGuard<'a, T>;
 
 /// Spin synchronisation primitives, but compatible with [`lock_api`](https://crates.io/crates/lock_api).
-#[cfg(feature = "lock_api1")]
+#[cfg(feature = "lock_api")]
+#[cfg_attr(docsrs, doc(cfg(feature = "lock_api")))]
 pub mod lock_api {
     /// A lock that provides mutually exclusive data access (compatible with [`lock_api`](https://crates.io/crates/lock_api)).
-    pub type Mutex<T> = lock_api::Mutex<crate::Mutex<()>, T>;
+    #[cfg(feature = "mutex")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "mutex")))]
+    pub type Mutex<T> = lock_api_crate::Mutex<crate::Mutex<()>, T>;
 
     /// A guard that provides mutable data access (compatible with [`lock_api`](https://crates.io/crates/lock_api)).
-    pub type MutexGuard<'a, T> = lock_api::MutexGuard<'a, crate::Mutex<()>, T>;
+    #[cfg(feature = "mutex")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "mutex")))]
+    pub type MutexGuard<'a, T> = lock_api_crate::MutexGuard<'a, crate::Mutex<()>, T>;
 
     /// A lock that provides data access to either one writer or many readers (compatible with [`lock_api`](https://crates.io/crates/lock_api)).
-    pub type RwLock<T> = lock_api::RwLock<crate::RwLock<()>, T>;
+    #[cfg(feature = "rwlock")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+    pub type RwLock<T> = lock_api_crate::RwLock<crate::RwLock<()>, T>;
 
     /// A guard that provides immutable data access (compatible with [`lock_api`](https://crates.io/crates/lock_api)).
-    pub type RwLockReadGuard<'a, T> = lock_api::RwLockReadGuard<'a, crate::RwLock<()>, T>;
+    #[cfg(feature = "rwlock")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+    pub type RwLockReadGuard<'a, T> = lock_api_crate::RwLockReadGuard<'a, crate::RwLock<()>, T>;
 
     /// A guard that provides mutable data access (compatible with [`lock_api`](https://crates.io/crates/lock_api)).
-    pub type RwLockWriteGuard<'a, T> = lock_api::RwLockWriteGuard<'a, crate::RwLock<()>, T>;
+    #[cfg(feature = "rwlock")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
+    pub type RwLockWriteGuard<'a, T> = lock_api_crate::RwLockWriteGuard<'a, crate::RwLock<()>, T>;
 
     /// A guard that provides immutable data access but can be upgraded to [`RwLockWriteGuard`] (compatible with [`lock_api`](https://crates.io/crates/lock_api)).
+    #[cfg(feature = "rwlock")]
+    #[cfg_attr(docsrs, doc(cfg(feature = "rwlock")))]
     pub type RwLockUpgradableReadGuard<'a, T> =
-        lock_api::RwLockUpgradableReadGuard<'a, crate::RwLock<()>, T>;
+        lock_api_crate::RwLockUpgradableReadGuard<'a, crate::RwLock<()>, T>;
 }
diff --git a/src/mutex.rs b/src/mutex.rs
index 4fa5add..2335051 100644
--- a/src/mutex.rs
+++ b/src/mutex.rs
@@ -4,39 +4,52 @@
 //! If it's enabled, [`TicketMutex`] and [`TicketMutexGuard`] will be re-exported as [`Mutex`]
 //! and [`MutexGuard`], otherwise the [`SpinMutex`] and guard will be re-exported.
 //!
-//! `ticket_mutex` is enabled by default.
+//! `ticket_mutex` is disabled by default.
 //!
 //! [`Mutex`]: ../struct.Mutex.html
 //! [`MutexGuard`]: ../struct.MutexGuard.html
 //! [`TicketMutex`]: ./struct.TicketMutex.html
 //! [`TicketMutexGuard`]: ./struct.TicketMutexGuard.html
 //! [`SpinMutex`]: ./struct.SpinMutex.html
+//! [`SpinMutexGuard`]: ./struct.SpinMutexGuard.html
 
-mod spin;
-pub use self::spin::*;
+#[cfg(feature = "spin_mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "spin_mutex")))]
+pub mod spin;
+#[cfg(feature = "spin_mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "spin_mutex")))]
+pub use self::spin::{SpinMutex, SpinMutexGuard};
 
-mod ticket;
-pub use self::ticket::*;
+#[cfg(feature = "ticket_mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "ticket_mutex")))]
+pub mod ticket;
+#[cfg(feature = "ticket_mutex")]
+#[cfg_attr(docsrs, doc(cfg(feature = "ticket_mutex")))]
+pub use self::ticket::{TicketMutex, TicketMutexGuard};
 
 use core::{
     fmt,
     ops::{Deref, DerefMut},
 };
+use crate::{RelaxStrategy, Spin};
 
-#[cfg(feature = "ticket_mutex")]
-type InnerMutex<T> = TicketMutex<T>;
-#[cfg(feature = "ticket_mutex")]
-type InnerMutexGuard<'a, T> = TicketMutexGuard<'a, T>;
+#[cfg(all(not(feature = "spin_mutex"), not(feature = "use_ticket_mutex")))]
+compile_error!("The `mutex` feature flag was used (perhaps through another feature?) without either `spin_mutex` or `use_ticket_mutex`. One of these is required.");
 
-#[cfg(not(feature = "ticket_mutex"))]
-type InnerMutex<T> = SpinMutex<T>;
-#[cfg(not(feature = "ticket_mutex"))]
-type InnerMutexGuard<'a, T> = SpinMutexGuard<'a, T>;
+#[cfg(all(not(feature = "use_ticket_mutex"), feature = "spin_mutex"))]
+type InnerMutex<T, R> = self::spin::SpinMutex<T, R>;
+#[cfg(all(not(feature = "use_ticket_mutex"), feature = "spin_mutex"))]
+type InnerMutexGuard<'a, T> = self::spin::SpinMutexGuard<'a, T>;
+
+#[cfg(feature = "use_ticket_mutex")]
+type InnerMutex<T, R> = self::ticket::TicketMutex<T, R>;
+#[cfg(feature = "use_ticket_mutex")]
+type InnerMutexGuard<'a, T> = self::ticket::TicketMutexGuard<'a, T>;
 
 /// A spin-based lock providing mutually exclusive access to data.
 ///
-/// The implementation uses either a [`TicketMutex`] or a regular [`SpinMutex`] depending on whether the `ticket_mutex`
-/// feature flag is enabled.
+/// The implementation uses either a ticket mutex or a regular spin mutex depending on whether the `spin_mutex` or
+/// `ticket_mutex` feature flag is enabled.
 ///
 /// # Example
 ///
@@ -83,15 +96,12 @@
 /// let answer = { *spin_mutex.lock() };
 /// assert_eq!(answer, thread_count);
 /// ```
-pub struct Mutex<T: ?Sized> {
-    #[cfg(feature = "ticket_mutex")]
-    inner: TicketMutex<T>,
-    #[cfg(not(feature = "ticket_mutex"))]
-    inner: SpinMutex<T>,
+pub struct Mutex<T: ?Sized, R = Spin> {
+    inner: InnerMutex<T, R>,
 }
 
-unsafe impl<T: ?Sized + Send> Sync for Mutex<T> {}
-unsafe impl<T: ?Sized + Send> Send for Mutex<T> {}
+unsafe impl<T: ?Sized + Send, R> Sync for Mutex<T, R> {}
+unsafe impl<T: ?Sized + Send, R> Send for Mutex<T, R> {}
 
 /// A generic guard that will protect some data access and
 /// uses either a ticket lock or a normal spin mutex.
@@ -101,13 +111,10 @@
 /// [`TicketMutexGuard`]: ./struct.TicketMutexGuard.html
 /// [`SpinMutexGuard`]: ./struct.SpinMutexGuard.html
 pub struct MutexGuard<'a, T: 'a + ?Sized> {
-    #[cfg(feature = "ticket_mutex")]
-    inner: TicketMutexGuard<'a, T>,
-    #[cfg(not(feature = "ticket_mutex"))]
-    inner: SpinMutexGuard<'a, T>,
+    inner: InnerMutexGuard<'a, T>,
 }
 
-impl<T> Mutex<T> {
+impl<T, R> Mutex<T, R> {
     /// Creates a new [`Mutex`] wrapping the supplied data.
     ///
     /// # Example
@@ -142,18 +149,7 @@
     }
 }
 
-impl<T: ?Sized> Mutex<T> {
-    /// Returns `true` if the lock is currently held.
-    ///
-    /// # Safety
-    ///
-    /// This function provides no synchronization guarantees and so its result should be considered 'out of date'
-    /// the instant it is called. Do not use it for synchronization purposes. However, it may be useful as a heuristic.
-    #[inline(always)]
-    pub fn is_locked(&self) -> bool {
-        self.inner.is_locked()
-    }
-
+impl<T: ?Sized, R: RelaxStrategy> Mutex<T, R> {
     /// Locks the [`Mutex`] and returns a guard that permits access to the inner data.
     ///
     /// The returned value may be dereferenced for data access
@@ -174,6 +170,19 @@
             inner: self.inner.lock(),
         }
     }
+}
+
+impl<T: ?Sized, R> Mutex<T, R> {
+    /// Returns `true` if the lock is currently held.
+    ///
+    /// # Safety
+    ///
+    /// This function provides no synchronization guarantees and so its result should be considered 'out of date'
+    /// the instant it is called. Do not use it for synchronization purposes. However, it may be useful as a heuristic.
+    #[inline(always)]
+    pub fn is_locked(&self) -> bool {
+        self.inner.is_locked()
+    }
 
     /// Force unlock this [`Mutex`].
     ///
@@ -227,19 +236,19 @@
     }
 }
 
-impl<T: ?Sized + fmt::Debug> fmt::Debug for Mutex<T> {
+impl<T: ?Sized + fmt::Debug, R> fmt::Debug for Mutex<T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         fmt::Debug::fmt(&self.inner, f)
     }
 }
 
-impl<T: ?Sized + Default> Default for Mutex<T> {
-    fn default() -> Mutex<T> {
+impl<T: ?Sized + Default, R> Default for Mutex<T, R> {
+    fn default() -> Self {
         Self::new(Default::default())
     }
 }
 
-impl<T> From<T> for Mutex<T> {
+impl<T, R> From<T> for Mutex<T, R> {
     fn from(data: T) -> Self {
         Self::new(data)
     }
@@ -289,9 +298,9 @@
     }
 }
 
-#[cfg(feature = "lock_api1")]
-unsafe impl lock_api::RawMutex for Mutex<()> {
-    type GuardMarker = lock_api::GuardSend;
+#[cfg(feature = "lock_api")]
+unsafe impl<R: RelaxStrategy> lock_api_crate::RawMutex for Mutex<(), R> {
+    type GuardMarker = lock_api_crate::GuardSend;
 
     const INIT: Self = Self::new(());
 
diff --git a/src/mutex/spin.rs b/src/mutex/spin.rs
index 36d65fd..fce3eb9 100644
--- a/src/mutex/spin.rs
+++ b/src/mutex/spin.rs
@@ -1,9 +1,16 @@
+//! A naïve spinning mutex.
+//!
+//! Waiting threads hammer an atomic variable until it becomes available. Best-case latency is low, but worst-case
+//! latency is theoretically infinite.
+
 use core::{
     cell::UnsafeCell,
     fmt,
     ops::{Deref, DerefMut},
     sync::atomic::{AtomicBool, Ordering},
+    marker::PhantomData,
 };
+use crate::{RelaxStrategy, Spin};
 
 /// A [spin lock](https://en.m.wikipedia.org/wiki/Spinlock) providing mutually exclusive access to data.
 ///
@@ -12,7 +19,7 @@
 /// ```
 /// use spin;
 ///
-/// let lock = spin::mutex::SpinMutex::new(0);
+/// let lock = spin::mutex::SpinMutex::<_>::new(0);
 ///
 /// // Modify the data
 /// *lock.lock() = 2;
@@ -29,7 +36,7 @@
 /// use std::sync::{Arc, Barrier};
 ///
 /// let thread_count = 1000;
-/// let spin_mutex = Arc::new(spin::mutex::SpinMutex::new(0));
+/// let spin_mutex = Arc::new(spin::mutex::SpinMutex::<_>::new(0));
 ///
 /// // We use a barrier to ensure the readout happens after all writing
 /// let barrier = Arc::new(Barrier::new(thread_count + 1));
@@ -52,7 +59,8 @@
 /// let answer = { *spin_mutex.lock() };
 /// assert_eq!(answer, thread_count);
 /// ```
-pub struct SpinMutex<T: ?Sized> {
+pub struct SpinMutex<T: ?Sized, R = Spin> {
+    phantom: PhantomData<R>,
     pub(crate) lock: AtomicBool,
     data: UnsafeCell<T>,
 }
@@ -69,7 +77,7 @@
 unsafe impl<T: ?Sized + Send> Sync for SpinMutex<T> {}
 unsafe impl<T: ?Sized + Send> Send for SpinMutex<T> {}
 
-impl<T> SpinMutex<T> {
+impl<T, R> SpinMutex<T, R> {
     /// Creates a new [`SpinMutex`] wrapping the supplied data.
     ///
     /// # Example
@@ -77,7 +85,7 @@
     /// ```
     /// use spin::mutex::SpinMutex;
     ///
-    /// static MUTEX: SpinMutex<()> = SpinMutex::new(());
+    /// static MUTEX: SpinMutex<()> = SpinMutex::<_>::new(());
     ///
     /// fn demo() {
     ///     let lock = MUTEX.lock();
@@ -86,10 +94,11 @@
     /// }
     /// ```
     #[inline(always)]
-    pub const fn new(user_data: T) -> SpinMutex<T> {
+    pub const fn new(data: T) -> Self {
         SpinMutex {
             lock: AtomicBool::new(false),
-            data: UnsafeCell::new(user_data),
+            data: UnsafeCell::new(data),
+            phantom: PhantomData,
         }
     }
 
@@ -98,7 +107,7 @@
     /// # Example
     ///
     /// ```
-    /// let lock = spin::mutex::SpinMutex::new(42);
+    /// let lock = spin::mutex::SpinMutex::<_>::new(42);
     /// assert_eq!(42, lock.into_inner());
     /// ```
     #[inline(always)]
@@ -108,27 +117,42 @@
         let SpinMutex { data, .. } = self;
         data.into_inner()
     }
+
+    /// Returns a mutable pointer to the underlying data.
+    ///
+    /// This is mostly meant to be used for applications which require manual unlocking, but where
+    /// storing both the lock and the pointer to the inner data gets inefficient.
+    ///
+    /// # Example
+    /// ```
+    /// let lock = spin::mutex::SpinMutex::<_>::new(42);
+    ///
+    /// unsafe {
+    ///     core::mem::forget(lock.lock());
+    ///     
+    ///     assert_eq!(lock.as_mut_ptr().read(), 42);
+    ///     lock.as_mut_ptr().write(58);
+    ///
+    ///     lock.force_unlock();
+    /// }
+    ///
+    /// assert_eq!(*lock.lock(), 58);
+    ///
+    /// ```
+    #[inline(always)]
+    pub fn as_mut_ptr(&self) -> *mut T {
+        self.data.get()
+    }
 }
 
-impl<T: ?Sized> SpinMutex<T> {
-    /// Returns `true` if the lock is currently held.
-    ///
-    /// # Safety
-    ///
-    /// This function provides no synchronization guarantees and so its result should be considered 'out of date'
-    /// the instant it is called. Do not use it for synchronization purposes. However, it may be useful as a heuristic.
-    #[inline(always)]
-    pub fn is_locked(&self) -> bool {
-        self.lock.load(Ordering::Relaxed)
-    }
-
+impl<T: ?Sized, R: RelaxStrategy> SpinMutex<T, R> {
     /// Locks the [`SpinMutex`] and returns a guard that permits access to the inner data.
     ///
     /// The returned value may be dereferenced for data access
     /// and the lock will be dropped when the guard falls out of scope.
     ///
     /// ```
-    /// let lock = spin::mutex::SpinMutex::new(0);
+    /// let lock = spin::mutex::SpinMutex::<_>::new(0);
     /// {
     ///     let mut data = lock.lock();
     ///     // The lock is now locked and the data can be accessed
@@ -143,7 +167,7 @@
         while self.lock.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() {
             // Wait until the lock looks unlocked before retrying
             while self.is_locked() {
-                crate::relax();
+                R::relax();
             }
         }
 
@@ -152,6 +176,19 @@
             data: unsafe { &mut *self.data.get() },
         }
     }
+}
+
+impl<T: ?Sized, R> SpinMutex<T, R> {
+    /// Returns `true` if the lock is currently held.
+    ///
+    /// # Safety
+    ///
+    /// This function provides no synchronization guarantees and so its result should be considered 'out of date'
+    /// the instant it is called. Do not use it for synchronization purposes. However, it may be useful as a heuristic.
+    #[inline(always)]
+    pub fn is_locked(&self) -> bool {
+        self.lock.load(Ordering::Relaxed)
+    }
 
     /// Force unlock this [`SpinMutex`].
     ///
@@ -170,7 +207,7 @@
     /// # Example
     ///
     /// ```
-    /// let lock = spin::mutex::SpinMutex::new(42);
+    /// let lock = spin::mutex::SpinMutex::<_>::new(42);
     ///
     /// let maybe_guard = lock.try_lock();
     /// assert!(maybe_guard.is_some());
@@ -202,7 +239,7 @@
     /// # Example
     ///
     /// ```
-    /// let mut lock = spin::mutex::SpinMutex::new(0);
+    /// let mut lock = spin::mutex::SpinMutex::<_>::new(0);
     /// *lock.get_mut() = 10;
     /// assert_eq!(*lock.lock(), 10);
     /// ```
@@ -214,7 +251,7 @@
     }
 }
 
-impl<T: ?Sized + fmt::Debug> fmt::Debug for SpinMutex<T> {
+impl<T: ?Sized + fmt::Debug, R> fmt::Debug for SpinMutex<T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.try_lock() {
             Some(guard) => write!(f, "Mutex {{ data: ")
@@ -225,13 +262,13 @@
     }
 }
 
-impl<T: ?Sized + Default> Default for SpinMutex<T> {
-    fn default() -> SpinMutex<T> {
-        SpinMutex::new(Default::default())
+impl<T: ?Sized + Default, R> Default for SpinMutex<T, R> {
+    fn default() -> Self {
+        Self::new(Default::default())
     }
 }
 
-impl<T> From<T> for SpinMutex<T> {
+impl<T, R> From<T> for SpinMutex<T, R> {
     fn from(data: T) -> Self {
         Self::new(data)
     }
@@ -243,7 +280,7 @@
     /// Note that this function will permanently lock the original [`SpinMutex`].
     ///
     /// ```
-    /// let mylock = spin::mutex::SpinMutex::new(0);
+    /// let mylock = spin::mutex::SpinMutex::<_>::new(0);
     ///
     /// let data: &mut i32 = spin::mutex::SpinMutexGuard::leak(mylock.lock());
     ///
@@ -290,9 +327,9 @@
     }
 }
 
-#[cfg(feature = "lock_api1")]
-unsafe impl lock_api::RawMutex for SpinMutex<()> {
-    type GuardMarker = lock_api::GuardSend;
+#[cfg(feature = "lock_api")]
+unsafe impl<R: RelaxStrategy> lock_api_crate::RawMutex for SpinMutex<(), R> {
+    type GuardMarker = lock_api_crate::GuardSend;
 
     const INIT: Self = Self::new(());
 
@@ -324,21 +361,21 @@
     use std::sync::Arc;
     use std::thread;
 
-    use super::*;
+    type SpinMutex<T> = super::SpinMutex<T>;
 
     #[derive(Eq, PartialEq, Debug)]
     struct NonCopy(i32);
 
     #[test]
     fn smoke() {
-        let m = SpinMutex::new(());
+        let m = SpinMutex::<_>::new(());
         drop(m.lock());
         drop(m.lock());
     }
 
     #[test]
     fn lots_and_lots() {
-        static M: SpinMutex<()> = SpinMutex::new(());
+        static M: SpinMutex<()> = SpinMutex::<_>::new(());
         static mut CNT: u32 = 0;
         const J: u32 = 1000;
         const K: u32 = 3;
@@ -375,7 +412,7 @@
 
     #[test]
     fn try_lock() {
-        let mutex = SpinMutex::new(42);
+        let mutex = SpinMutex::<_>::new(42);
 
         // First lock succeeds
         let a = mutex.try_lock();
@@ -393,7 +430,7 @@
 
     #[test]
     fn test_into_inner() {
-        let m = SpinMutex::new(NonCopy(10));
+        let m = SpinMutex::<_>::new(NonCopy(10));
         assert_eq!(m.into_inner(), NonCopy(10));
     }
 
@@ -406,7 +443,7 @@
             }
         }
         let num_drops = Arc::new(AtomicUsize::new(0));
-        let m = SpinMutex::new(Foo(num_drops.clone()));
+        let m = SpinMutex::<_>::new(Foo(num_drops.clone()));
         assert_eq!(num_drops.load(Ordering::SeqCst), 0);
         {
             let _inner = m.into_inner();
@@ -419,8 +456,8 @@
     fn test_mutex_arc_nested() {
         // Tests nested mutexes and access
         // to underlying data.
-        let arc = Arc::new(SpinMutex::new(1));
-        let arc2 = Arc::new(SpinMutex::new(arc));
+        let arc = Arc::new(SpinMutex::<_>::new(1));
+        let arc2 = Arc::new(SpinMutex::<_>::new(arc));
         let (tx, rx) = channel();
         let _t = thread::spawn(move || {
             let lock = arc2.lock();
@@ -434,7 +471,7 @@
     #[test]
     #[ignore = "Android uses panic_abort"]
     fn test_mutex_arc_access_in_unwind() {
-        let arc = Arc::new(SpinMutex::new(1));
+        let arc = Arc::new(SpinMutex::<_>::new(1));
         let arc2 = arc.clone();
         let _ = thread::spawn(move || -> () {
             struct Unwinder {
@@ -455,7 +492,7 @@
 
     #[test]
     fn test_mutex_unsized() {
-        let mutex: &SpinMutex<[i32]> = &SpinMutex::new([1, 2, 3]);
+        let mutex: &SpinMutex<[i32]> = &SpinMutex::<_>::new([1, 2, 3]);
         {
             let b = &mut *mutex.lock();
             b[0] = 4;
@@ -467,7 +504,7 @@
 
     #[test]
     fn test_mutex_force_lock() {
-        let lock = SpinMutex::new(());
+        let lock = SpinMutex::<_>::new(());
         ::std::mem::forget(lock.lock());
         unsafe {
             lock.force_unlock();
diff --git a/src/mutex/ticket.rs b/src/mutex/ticket.rs
index 4186fb8..128b434 100644
--- a/src/mutex/ticket.rs
+++ b/src/mutex/ticket.rs
@@ -1,9 +1,18 @@
+//! A ticket-based mutex.
+//!
+//! Waiting threads take a 'ticket' from the lock in the order they arrive and gain access to the lock when their
+//! ticket is next in the queue. Best-case latency is slightly worse than a regular spinning mutex, but worse-case
+//! latency is infinitely better. Waiting threads simply need to wait for all threads that come before them in the
+//! queue to finish.
+
 use core::{
     cell::UnsafeCell,
     fmt,
     ops::{Deref, DerefMut},
     sync::atomic::{AtomicUsize, Ordering},
+    marker::PhantomData,
 };
+use crate::{RelaxStrategy, Spin};
 
 /// A spin-based [ticket lock](https://en.wikipedia.org/wiki/Ticket_lock) providing mutually exclusive access to data.
 ///
@@ -19,7 +28,7 @@
 /// ```
 /// use spin;
 ///
-/// let lock = spin::mutex::TicketMutex::new(0);
+/// let lock = spin::mutex::TicketMutex::<_>::new(0);
 ///
 /// // Modify the data
 /// *lock.lock() = 2;
@@ -36,7 +45,7 @@
 /// use std::sync::{Arc, Barrier};
 ///
 /// let thread_count = 1000;
-/// let spin_mutex = Arc::new(spin::mutex::TicketMutex::new(0));
+/// let spin_mutex = Arc::new(spin::mutex::TicketMutex::<_>::new(0));
 ///
 /// // We use a barrier to ensure the readout happens after all writing
 /// let barrier = Arc::new(Barrier::new(thread_count + 1));
@@ -59,10 +68,11 @@
 /// let answer = { *spin_mutex.lock() };
 /// assert_eq!(answer, thread_count);
 /// ```
-pub struct TicketMutex<T: ?Sized> {
-    pub(crate) next_ticket: AtomicUsize,
-    pub(crate) next_serving: AtomicUsize,
-    value: UnsafeCell<T>,
+pub struct TicketMutex<T: ?Sized, R = Spin> {
+    phantom: PhantomData<R>,
+    next_ticket: AtomicUsize,
+    next_serving: AtomicUsize,
+    data: UnsafeCell<T>,
 }
 
 /// A guard that protects some data.
@@ -71,13 +81,13 @@
 pub struct TicketMutexGuard<'a, T: ?Sized + 'a> {
     next_serving: &'a AtomicUsize,
     ticket: usize,
-    value: &'a mut T,
+    data: &'a mut T,
 }
 
 unsafe impl<T: ?Sized + Send> Sync for TicketMutex<T> {}
 unsafe impl<T: ?Sized + Send> Send for TicketMutex<T> {}
 
-impl<T> TicketMutex<T> {
+impl<T, R> TicketMutex<T, R> {
     /// Creates a new [`TicketMutex`] wrapping the supplied data.
     ///
     /// # Example
@@ -85,7 +95,7 @@
     /// ```
     /// use spin::mutex::TicketMutex;
     ///
-    /// static MUTEX: TicketMutex<()> = TicketMutex::new(());
+    /// static MUTEX: TicketMutex<()> = TicketMutex::<_>::new(());
     ///
     /// fn demo() {
     ///     let lock = MUTEX.lock();
@@ -94,11 +104,12 @@
     /// }
     /// ```
     #[inline(always)]
-    pub const fn new(value: T) -> Self {
+    pub const fn new(data: T) -> Self {
         Self {
+            phantom: PhantomData,
             next_ticket: AtomicUsize::new(0),
             next_serving: AtomicUsize::new(0),
-            value: UnsafeCell::new(value),
+            data: UnsafeCell::new(data),
         }
     }
 
@@ -107,16 +118,41 @@
     /// # Example
     ///
     /// ```
-    /// let lock = spin::mutex::TicketMutex::new(42);
+    /// let lock = spin::mutex::TicketMutex::<_>::new(42);
     /// assert_eq!(42, lock.into_inner());
     /// ```
     #[inline(always)]
     pub fn into_inner(self) -> T {
-        self.value.into_inner()
+        self.data.into_inner()
+    }
+    /// Returns a mutable pointer to the underying data.
+    ///
+    /// This is mostly meant to be used for applications which require manual unlocking, but where
+    /// storing both the lock and the pointer to the inner data gets inefficient.
+    ///
+    /// # Example
+    /// ```
+    /// let lock = spin::mutex::SpinMutex::<_>::new(42);
+    ///
+    /// unsafe {
+    ///     core::mem::forget(lock.lock());
+    ///     
+    ///     assert_eq!(lock.as_mut_ptr().read(), 42);
+    ///     lock.as_mut_ptr().write(58);
+    ///
+    ///     lock.force_unlock();
+    /// }
+    ///
+    /// assert_eq!(*lock.lock(), 58);
+    ///
+    /// ```
+    #[inline(always)]
+    pub fn as_mut_ptr(&self) -> *mut T {
+        self.data.get()
     }
 }
 
-impl<T: ?Sized + fmt::Debug> fmt::Debug for TicketMutex<T> {
+impl<T: ?Sized + fmt::Debug, R> fmt::Debug for TicketMutex<T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.try_lock() {
             Some(guard) => write!(f, "Mutex {{ data: ")
@@ -127,26 +163,14 @@
     }
 }
 
-impl<T: ?Sized> TicketMutex<T> {
-    /// Returns `true` if the lock is currently held.
-    ///
-    /// # Safety
-    ///
-    /// This function provides no synchronization guarantees and so its result should be considered 'out of date'
-    /// the instant it is called. Do not use it for synchronization purposes. However, it may be useful as a heuristic.
-    #[inline(always)]
-    pub fn is_locked(&self) -> bool {
-        let ticket = self.next_ticket.load(Ordering::Relaxed);
-        self.next_serving.load(Ordering::Relaxed) != ticket
-    }
-
+impl<T: ?Sized, R: RelaxStrategy> TicketMutex<T, R> {
     /// Locks the [`TicketMutex`] and returns a guard that permits access to the inner data.
     ///
-    /// The returned value may be dereferenced for data access
+    /// The returned data may be dereferenced for data access
     /// and the lock will be dropped when the guard falls out of scope.
     ///
     /// ```
-    /// let lock = spin::mutex::TicketMutex::new(0);
+    /// let lock = spin::mutex::TicketMutex::<_>::new(0);
     /// {
     ///     let mut data = lock.lock();
     ///     // The lock is now locked and the data can be accessed
@@ -159,7 +183,7 @@
         let ticket = self.next_ticket.fetch_add(1, Ordering::Relaxed);
 
         while self.next_serving.load(Ordering::Acquire) != ticket {
-            crate::relax();
+            R::relax();
         }
 
         TicketMutexGuard {
@@ -167,13 +191,27 @@
             ticket,
             // Safety
             // We know that we are the next ticket to be served,
-            // so there's no other thread accessing the value.
+            // so there's no other thread accessing the data.
             //
             // Every other thread has another ticket number so it's
             // definitely stuck in the spin loop above.
-            value: unsafe { &mut *self.value.get() },
+            data: unsafe { &mut *self.data.get() },
         }
     }
+}
+
+impl<T: ?Sized, R> TicketMutex<T, R> {
+    /// Returns `true` if the lock is currently held.
+    ///
+    /// # Safety
+    ///
+    /// This function provides no synchronization guarantees and so its result should be considered 'out of date'
+    /// the instant it is called. Do not use it for synchronization purposes. However, it may be useful as a heuristic.
+    #[inline(always)]
+    pub fn is_locked(&self) -> bool {
+        let ticket = self.next_ticket.load(Ordering::Relaxed);
+        self.next_serving.load(Ordering::Relaxed) != ticket
+    }
 
     /// Force unlock this [`TicketMutex`], by serving the next ticket.
     ///
@@ -192,7 +230,7 @@
     /// # Example
     ///
     /// ```
-    /// let lock = spin::mutex::TicketMutex::new(42);
+    /// let lock = spin::mutex::TicketMutex::<_>::new(42);
     ///
     /// let maybe_guard = lock.try_lock();
     /// assert!(maybe_guard.is_some());
@@ -219,8 +257,8 @@
             // Safety
             // We have a ticket that is equal to the next_serving ticket, so we know:
             // - that no other thread can have the same ticket id as this thread
-            // - that we are the next one to be served so we have exclusive access to the value
-            value: unsafe { &mut *self.value.get() },
+            // - that we are the next one to be served so we have exclusive access to the data
+            data: unsafe { &mut *self.data.get() },
         })
     }
 
@@ -233,7 +271,7 @@
     /// # Example
     ///
     /// ```
-    /// let mut lock = spin::mutex::TicketMutex::new(0);
+    /// let mut lock = spin::mutex::TicketMutex::<_>::new(0);
     /// *lock.get_mut() = 10;
     /// assert_eq!(*lock.lock(), 10);
     /// ```
@@ -241,20 +279,20 @@
     pub fn get_mut(&mut self) -> &mut T {
         // Safety:
         // We know that there are no other references to `self`,
-        // so it's safe to return a exclusive reference to the value.
-        unsafe { &mut *self.value.get() }
+        // so it's safe to return a exclusive reference to the data.
+        unsafe { &mut *self.data.get() }
     }
 }
 
-impl<T: ?Sized + Default> Default for TicketMutex<T> {
-    fn default() -> TicketMutex<T> {
-        TicketMutex::new(Default::default())
+impl<T: ?Sized + Default, R> Default for TicketMutex<T, R> {
+    fn default() -> Self {
+        Self::new(Default::default())
     }
 }
 
-impl<T> From<T> for TicketMutex<T> {
-    fn from(value: T) -> Self {
-        Self::new(value)
+impl<T, R> From<T> for TicketMutex<T, R> {
+    fn from(data: T) -> Self {
+        Self::new(data)
     }
 }
 
@@ -264,7 +302,7 @@
     /// Note that this function will permanently lock the original [`TicketMutex`].
     ///
     /// ```
-    /// let mylock = spin::mutex::TicketMutex::new(0);
+    /// let mylock = spin::mutex::TicketMutex::<_>::new(0);
     ///
     /// let data: &mut i32 = spin::mutex::TicketMutexGuard::leak(mylock.lock());
     ///
@@ -273,7 +311,7 @@
     /// ```
     #[inline(always)]
     pub fn leak(this: Self) -> &'a mut T {
-        let data = this.value as *mut _; // Keep it in pointer form temporarily to avoid double-aliasing
+        let data = this.data as *mut _; // Keep it in pointer form temporarily to avoid double-aliasing
         core::mem::forget(this);
         unsafe { &mut *data }
     }
@@ -294,13 +332,13 @@
 impl<'a, T: ?Sized> Deref for TicketMutexGuard<'a, T> {
     type Target = T;
     fn deref(&self) -> &T {
-        self.value
+        self.data
     }
 }
 
 impl<'a, T: ?Sized> DerefMut for TicketMutexGuard<'a, T> {
     fn deref_mut(&mut self) -> &mut T {
-        self.value
+        self.data
     }
 }
 
@@ -311,6 +349,31 @@
     }
 }
 
+#[cfg(feature = "lock_api")]
+unsafe impl<R: RelaxStrategy> lock_api_crate::RawMutex for TicketMutex<(), R> {
+    type GuardMarker = lock_api_crate::GuardSend;
+
+    const INIT: Self = Self::new(());
+
+    fn lock(&self) {
+        // Prevent guard destructor running
+        core::mem::forget(Self::lock(self));
+    }
+
+    fn try_lock(&self) -> bool {
+        // Prevent guard destructor running
+        Self::try_lock(self).map(core::mem::forget).is_some()
+    }
+
+    unsafe fn unlock(&self) {
+        self.force_unlock();
+    }
+
+    fn is_locked(&self) -> bool {
+        Self::is_locked(self)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::prelude::v1::*;
@@ -320,21 +383,21 @@
     use std::sync::Arc;
     use std::thread;
 
-    use super::*;
+    type TicketMutex<T> = super::TicketMutex<T>;
 
     #[derive(Eq, PartialEq, Debug)]
     struct NonCopy(i32);
 
     #[test]
     fn smoke() {
-        let m = TicketMutex::new(());
+        let m = TicketMutex::<_>::new(());
         drop(m.lock());
         drop(m.lock());
     }
 
     #[test]
     fn lots_and_lots() {
-        static M: TicketMutex<()> = TicketMutex::new(());
+        static M: TicketMutex<()> = TicketMutex::<_>::new(());
         static mut CNT: u32 = 0;
         const J: u32 = 1000;
         const K: u32 = 3;
@@ -371,7 +434,7 @@
 
     #[test]
     fn try_lock() {
-        let mutex = TicketMutex::new(42);
+        let mutex = TicketMutex::<_>::new(42);
 
         // First lock succeeds
         let a = mutex.try_lock();
@@ -389,7 +452,7 @@
 
     #[test]
     fn test_into_inner() {
-        let m = TicketMutex::new(NonCopy(10));
+        let m = TicketMutex::<_>::new(NonCopy(10));
         assert_eq!(m.into_inner(), NonCopy(10));
     }
 
@@ -402,7 +465,7 @@
             }
         }
         let num_drops = Arc::new(AtomicUsize::new(0));
-        let m = TicketMutex::new(Foo(num_drops.clone()));
+        let m = TicketMutex::<_>::new(Foo(num_drops.clone()));
         assert_eq!(num_drops.load(Ordering::SeqCst), 0);
         {
             let _inner = m.into_inner();
@@ -415,8 +478,8 @@
     fn test_mutex_arc_nested() {
         // Tests nested mutexes and access
         // to underlying data.
-        let arc = Arc::new(TicketMutex::new(1));
-        let arc2 = Arc::new(TicketMutex::new(arc));
+        let arc = Arc::new(TicketMutex::<_>::new(1));
+        let arc2 = Arc::new(TicketMutex::<_>::new(arc));
         let (tx, rx) = channel();
         let _t = thread::spawn(move || {
             let lock = arc2.lock();
@@ -430,7 +493,7 @@
     #[test]
     #[ignore = "Android uses panic_abort"]
     fn test_mutex_arc_access_in_unwind() {
-        let arc = Arc::new(TicketMutex::new(1));
+        let arc = Arc::new(TicketMutex::<_>::new(1));
         let arc2 = arc.clone();
         let _ = thread::spawn(move || -> () {
             struct Unwinder {
@@ -451,7 +514,7 @@
 
     #[test]
     fn test_mutex_unsized() {
-        let mutex: &TicketMutex<[i32]> = &TicketMutex::new([1, 2, 3]);
+        let mutex: &TicketMutex<[i32]> = &TicketMutex::<_>::new([1, 2, 3]);
         {
             let b = &mut *mutex.lock();
             b[0] = 4;
@@ -463,7 +526,7 @@
 
     #[test]
     fn is_locked() {
-        let mutex = TicketMutex::new(());
+        let mutex = TicketMutex::<_>::new(());
         assert!(!mutex.is_locked());
         let lock = mutex.lock();
         assert!(mutex.is_locked());
diff --git a/src/once.rs b/src/once.rs
index ad60405..e4aadee 100644
--- a/src/once.rs
+++ b/src/once.rs
@@ -1,17 +1,19 @@
-//! Synchronization primitives for one-time evaluation.
+    //! Synchronization primitives for one-time evaluation.
 
 use core::{
     cell::UnsafeCell,
     mem::MaybeUninit,
-    sync::atomic::{AtomicUsize, Ordering},
+    sync::atomic::{AtomicU8, Ordering},
+    marker::PhantomData,
     fmt,
 };
+use crate::{RelaxStrategy, Spin};
 
 /// A primitive that provides lazy one-time initialization.
 ///
 /// Unlike its `std::sync` equivalent, this is generalized such that the closure returns a
 /// value to be stored by the [`Once`] (`std::sync::Once` can be trivially emulated with
-/// `Once<()>`).
+/// `Once`).
 ///
 /// Because [`Once::new`] is `const`, this primitive may be used to safely initialize statics.
 ///
@@ -20,18 +22,19 @@
 /// ```
 /// use spin;
 ///
-/// static START: spin::Once<()> = spin::Once::new();
+/// static START: spin::Once = spin::Once::new();
 ///
 /// START.call_once(|| {
 ///     // run initialization here
 /// });
 /// ```
-pub struct Once<T> {
-    state: AtomicUsize,
+pub struct Once<T = (), R = Spin> {
+    phantom: PhantomData<R>,
+    status: AtomicStatus,
     data: UnsafeCell<MaybeUninit<T>>,
 }
 
-impl<T: fmt::Debug> fmt::Debug for Once<T> {
+impl<T: fmt::Debug, R> fmt::Debug for Once<T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.get() {
             Some(s) => write!(f, "Once {{ data: ")
@@ -44,63 +47,81 @@
 
 // Same unsafe impls as `std::sync::RwLock`, because this also allows for
 // concurrent reads.
-unsafe impl<T: Send + Sync> Sync for Once<T> {}
-unsafe impl<T: Send> Send for Once<T> {}
+unsafe impl<T: Send + Sync, R> Sync for Once<T, R> {}
+unsafe impl<T: Send, R> Send for Once<T, R> {}
 
-// Four states that a Once can be in, encoded into the lower bits of `state` in
-// the Once structure.
-const INCOMPLETE: usize = 0x0;
-const RUNNING: usize = 0x1;
-const COMPLETE: usize = 0x2;
-const PANICKED: usize = 0x3;
+mod status {
+    use super::*;
 
-use core::hint::unreachable_unchecked as unreachable;
+    // SAFETY: This structure has an invariant, namely that the inner atomic u8 must *always* have
+    // a value for which there exists a valid Status. This means that users of this API must only
+    // be allowed to load and store `Status`es.
+    #[repr(transparent)]
+    pub struct AtomicStatus(AtomicU8);
 
-impl<T> Once<T> {
-    /// Initialization constant of [`Once`].
-    #[allow(clippy::declare_interior_mutable_const)]
-    pub const INIT: Self = Self {
-        state: AtomicUsize::new(INCOMPLETE),
-        data: UnsafeCell::new(MaybeUninit::uninit()),
-    };
-
-    /// Creates a new [`Once`].
-    pub const fn new() -> Once<T> {
-        Self::INIT
+    // Four states that a Once can be in, encoded into the lower bits of `status` in
+    // the Once structure.
+    #[repr(u8)]
+    #[derive(Clone, Copy, Debug, PartialEq)]
+    pub enum Status {
+        Incomplete = 0x00,
+        Running = 0x01,
+        Complete = 0x02,
+        Panicked = 0x03,
     }
-
-    /// Creates a new initialized [`Once`].
-    pub const fn initialized(data: T) -> Once<T> {
-        Self {
-            state: AtomicUsize::new(COMPLETE),
-            data: UnsafeCell::new(MaybeUninit::new(data)),
+    impl Status {
+        // Construct a status from an inner u8 integer.
+        //
+        // # Safety
+        //
+        // For this to be safe, the inner number must have a valid corresponding enum variant.
+        unsafe fn new_unchecked(inner: u8) -> Self {
+            core::mem::transmute(inner)
         }
     }
 
-    /// Get a reference to the initialized instance. Must only be called once COMPLETE.
-    unsafe fn force_get(&self) -> &T {
-        // SAFETY:
-        // * `UnsafeCell`/inner deref: data never changes again
-        // * `MaybeUninit`/outer deref: data was initialized
-        &*(*self.data.get()).as_ptr()
-    }
+    impl AtomicStatus {
+        #[inline(always)]
+        pub const fn new(status: Status) -> Self {
+            // SAFETY: We got the value directly from status, so transmuting back is fine.
+            Self(AtomicU8::new(status as u8))
+        }
+        #[inline(always)]
+        pub fn load(&self, ordering: Ordering) -> Status {
+            // SAFETY: We know that the inner integer must have been constructed from a Status in
+            // the first place.
+            unsafe { Status::new_unchecked(self.0.load(ordering)) }
+        }
+        #[inline(always)]
+        pub fn store(&self, status: Status, ordering: Ordering) {
+            // SAFETY: While not directly unsafe, this is safe because the value was retrieved from
+            // a status, thus making transmutation safe.
+            self.0.store(status as u8, ordering);
+        }
+        #[inline(always)]
+        pub fn compare_exchange(&self, old: Status, new: Status, success: Ordering, failure: Ordering) -> Result<Status, Status> {
+            match self.0.compare_exchange(old as u8, new as u8, success, failure) {
+                // SAFETY: A compare exchange will always return a value that was later stored into
+                // the atomic u8, but due to the invariant that it must be a valid Status, we know
+                // that both Ok(_) and Err(_) will be safely transmutable.
 
-    /// Get a reference to the initialized instance. Must only be called once COMPLETE.
-    unsafe fn force_get_mut(&mut self) -> &mut T {
-        // SAFETY:
-        // * `UnsafeCell`/inner deref: data never changes again
-        // * `MaybeUninit`/outer deref: data was initialized
-        &mut *(*self.data.get()).as_mut_ptr()
+                Ok(ok) => Ok(unsafe { Status::new_unchecked(ok) }),
+                Err(err) => Ok(unsafe { Status::new_unchecked(err) }),
+            }
+        }
+        #[inline(always)]
+        pub fn get_mut(&mut self) -> &mut Status {
+            // SAFETY: Since we know that the u8 inside must be a valid Status, we can safely cast
+            // it to a &mut Status.
+            unsafe { &mut *((self.0.get_mut() as *mut u8).cast::<Status>()) }
+        }
     }
+}
+use self::status::{Status, AtomicStatus};
 
-    /// Get a reference to the initialized instance. Must only be called once COMPLETE.
-    unsafe fn force_into_inner(self) -> T {
-        // SAFETY:
-        // * `UnsafeCell`/inner deref: data never changes again
-        // * `MaybeUninit`/outer deref: data was initialized
-        (*self.data.get()).as_ptr().read()
-    }
+use core::hint::unreachable_unchecked as unreachable;
 
+impl<T, R: RelaxStrategy> Once<T, R> {
     /// Performs an initialization routine once and only once. The given closure
     /// will be executed if this is the first time `call_once` has been called,
     /// and otherwise the routine will *not* be invoked.
@@ -136,73 +157,103 @@
     /// }
     /// ```
     pub fn call_once<F: FnOnce() -> T>(&self, f: F) -> &T {
-        let mut status = self.state.load(Ordering::SeqCst);
+        // SAFETY: We perform an Acquire load because if this were to return COMPLETE, then we need
+        // the preceding stores done while initializing, to become visible after this load.
+        let mut status = self.status.load(Ordering::Acquire);
 
-        if status == INCOMPLETE {
-            status = self.state.compare_and_swap(
-                INCOMPLETE,
-                RUNNING,
-                Ordering::SeqCst,
-            );
+        if status == Status::Incomplete {
+            match self.status.compare_exchange(
+                Status::Incomplete,
+                Status::Running,
+                // SAFETY: Success ordering: We do not have to synchronize any data at all, as the
+                // value is at this point uninitialized, so Relaxed is technically sufficient. We
+                // will however have to do a Release store later. However, the success ordering
+                // must always be at least as strong as the failure ordering, so we choose Acquire
+                // here anyway.
+                Ordering::Acquire,
+                // SAFETY: Failure ordering: While we have already loaded the status initially, we
+                // know that if some other thread would have fully initialized this in between,
+                // then there will be new not-yet-synchronized accesses done during that
+                // initialization that would not have been synchronized by the earlier load. Thus
+                // we use Acquire to ensure when we later call force_get() in the last match
+                // statement, if the status was changed to COMPLETE, that those accesses will become
+                // visible to us.
+                Ordering::Acquire,
+            ) {
+                Ok(_must_be_state_incomplete) => {
+                    // The compare-exchange suceeded, so we shall initialize it.
 
-            if status == INCOMPLETE { // We init
-                // We use a guard (Finish) to catch panics caused by builder
-                let mut finish = Finish { state: &self.state, panicked: true };
-                unsafe {
-                    // SAFETY:
-                    // `UnsafeCell`/deref: currently the only accessor, mutably
-                    // and immutably by cas exclusion.
-                    // `write`: pointer comes from `MaybeUninit`.
-                    (*self.data.get()).as_mut_ptr().write(f())
-                };
-                finish.panicked = false;
+                    // We use a guard (Finish) to catch panics caused by builder
+                    let finish = Finish { status: &self.status };
+                    unsafe {
+                        // SAFETY:
+                        // `UnsafeCell`/deref: currently the only accessor, mutably
+                        // and immutably by cas exclusion.
+                        // `write`: pointer comes from `MaybeUninit`.
+                        (*self.data.get()).as_mut_ptr().write(f())
+                    };
+                    // If there were to be a panic with unwind enabled, the code would
+                    // short-circuit and never reach the point where it writes the inner data.
+                    // The destructor for Finish will run, and poison the Once to ensure that other
+                    // threads accessing it do not exhibit unwanted behavior, if there were to be
+                    // any inconsistency in data structures caused by the panicking thread.
+                    //
+                    // However, f() is expected in the general case not to panic. In that case, we
+                    // simply forget the guard, bypassing its destructor. We could theoretically
+                    // clear a flag instead, but this eliminates the call to the destructor at
+                    // compile time, and unconditionally poisons during an eventual panic, if
+                    // unwinding is enabled.
+                    core::mem::forget(finish);
 
-                status = COMPLETE;
-                self.state.store(status, Ordering::SeqCst);
+                    // SAFETY: Release is required here, so that all memory accesses done in the
+                    // closure when initializing, become visible to other threads that perform Acquire
+                    // loads.
+                    //
+                    // And, we also know that the changes this thread has done will not magically
+                    // disappear from our cache, so it does not need to be AcqRel.
+                    self.status.store(Status::Complete, Ordering::Release);
 
-                // This next line is strictly an optimization
-                return unsafe { self.force_get() };
+                    // This next line is mainly an optimization.
+                    return unsafe { self.force_get() };
+                }
+                // The compare-exchange failed, so we know for a fact that the status cannot be
+                // INCOMPLETE, or it would have succeeded.
+                Err(other_status) => status = other_status,
             }
         }
 
-        self
-            .poll()
-            .unwrap_or_else(|| unreachable!("Encountered INCOMPLETE when polling Once"))
-    }
+        match status {
+            // SAFETY: We have either checked with an Acquire load, that the status is COMPLETE, or
+            // initialized it ourselves, in which case no additional synchronization is needed.
+            Status::Complete => unsafe { self.force_get() },
+            Status::Panicked => panic!("Once panicked"),
+            Status::Running => self
+                .poll()
+                .unwrap_or_else(|| {
+                    if cfg!(debug_assertions) {
+                        unreachable!("Encountered INCOMPLETE when polling Once")
+                    } else {
+                        // SAFETY: This poll is guaranteed never to fail because the API of poll
+                        // promises spinning if initialization is in progress. We've already
+                        // checked that initialisation is in progress, and initialisation is
+                        // monotonic: once done, it cannot be undone. We also fetched the status
+                        // with Acquire semantics, thereby guaranteeing that the later-executed
+                        // poll will also agree with us that initialization is in progress. Ergo,
+                        // this poll cannot fail.
+                        unsafe {
+                            unreachable();
+                        }
+                    }
+                }),
 
-    /// Returns a reference to the inner value if the [`Once`] has been initialized.
-    pub fn get(&self) -> Option<&T> {
-        match self.state.load(Ordering::SeqCst) {
-            COMPLETE => Some(unsafe { self.force_get() }),
-            _ => None,
+            // SAFETY: The only invariant possible in addition to the aforementioned ones at the
+            // moment, is INCOMPLETE. However, the only way for this match statement to be
+            // reached, is if we lost the CAS (otherwise we would have returned early), in
+            // which case we know for a fact that the state cannot be changed back to INCOMPLETE as
+            // `Once`s are monotonic.
+            Status::Incomplete => unsafe { unreachable() },
         }
-    }
 
-    /// Returns a mutable reference to the inner value if the [`Once`] has been initialized.
-    ///
-    /// Because this method requires a mutable reference to the [`Once`], no synchronization
-    /// overhead is required to access the inner value. In effect, it is zero-cost.
-    pub fn get_mut(&mut self) -> Option<&mut T> {
-        match *self.state.get_mut() {
-            COMPLETE => Some(unsafe { self.force_get_mut() }),
-            _ => None,
-        }
-    }
-
-    /// Returns a the inner value if the [`Once`] has been initialized.
-    ///
-    /// Because this method requires ownershup of the [`Once`], no synchronization overhead
-    /// is required to access the inner value. In effect, it is zero-cost.
-    pub fn try_into_inner(mut self) -> Option<T> {
-        match *self.state.get_mut() {
-            COMPLETE => Some(unsafe { self.force_into_inner() }),
-            _ => None,
-        }
-    }
-
-    /// Returns a reference to the inner value if the [`Once`] has been initialized.
-    pub fn is_completed(&self) -> bool {
-        self.state.load(Ordering::SeqCst) == COMPLETE
     }
 
     /// Spins until the [`Once`] contains a value.
@@ -218,7 +269,7 @@
         loop {
             match self.poll() {
                 Some(x) => break x,
-                None => crate::relax(),
+                None => R::relax(),
             }
         }
     }
@@ -235,26 +286,147 @@
     /// primitives.
     pub fn poll(&self) -> Option<&T> {
         loop {
-            match self.state.load(Ordering::SeqCst) {
-                INCOMPLETE => return None,
-                RUNNING => crate::relax(), // We spin
-                COMPLETE => return Some(unsafe { self.force_get() }),
-                PANICKED => panic!("Once previously poisoned by a panicked"),
-                _ => unsafe { unreachable() },
+            // SAFETY: Acquire is safe here, because if the status is COMPLETE, then we want to make
+            // sure that all memory accessed done while initializing that value, are visible when
+            // we return a reference to the inner data after this load.
+            match self.status.load(Ordering::Acquire) {
+                Status::Incomplete => return None,
+                Status::Running => R::relax(), // We spin
+                Status::Complete => return Some(unsafe { self.force_get() }),
+                Status::Panicked => panic!("Once previously poisoned by a panicked"),
             }
         }
     }
 }
 
-impl<T> From<T> for Once<T> {
+impl<T, R> Once<T, R> {
+    /// Initialization constant of [`Once`].
+    #[allow(clippy::declare_interior_mutable_const)]
+    pub const INIT: Self = Self {
+        phantom: PhantomData,
+        status: AtomicStatus::new(Status::Incomplete),
+        data: UnsafeCell::new(MaybeUninit::uninit()),
+    };
+
+    /// Creates a new [`Once`].
+    pub const fn new() -> Self{
+        Self::INIT
+    }
+
+    /// Creates a new initialized [`Once`].
+    pub const fn initialized(data: T) -> Self {
+        Self {
+            phantom: PhantomData,
+            status: AtomicStatus::new(Status::Complete),
+            data: UnsafeCell::new(MaybeUninit::new(data)),
+        }
+    }
+
+    /// Retrieve a pointer to the inner data.
+    ///
+    /// While this method itself is safe, accessing the pointer before the [`Once`] has been
+    /// initialized is UB, unless this method has already been written to from a pointer coming
+    /// from this method.
+    pub fn as_mut_ptr(&self) -> *mut T {
+        // SAFETY:
+        // * MaybeUninit<T> always has exactly the same layout as T
+        self.data.get().cast::<T>()
+    }
+
+    /// Get a reference to the initialized instance. Must only be called once COMPLETE.
+    unsafe fn force_get(&self) -> &T {
+        // SAFETY:
+        // * `UnsafeCell`/inner deref: data never changes again
+        // * `MaybeUninit`/outer deref: data was initialized
+        &*(*self.data.get()).as_ptr()
+    }
+
+    /// Get a reference to the initialized instance. Must only be called once COMPLETE.
+    unsafe fn force_get_mut(&mut self) -> &mut T {
+        // SAFETY:
+        // * `UnsafeCell`/inner deref: data never changes again
+        // * `MaybeUninit`/outer deref: data was initialized
+        &mut *(*self.data.get()).as_mut_ptr()
+    }
+
+    /// Get a reference to the initialized instance. Must only be called once COMPLETE.
+    unsafe fn force_into_inner(self) -> T {
+        // SAFETY:
+        // * `UnsafeCell`/inner deref: data never changes again
+        // * `MaybeUninit`/outer deref: data was initialized
+        (*self.data.get()).as_ptr().read()
+    }
+
+    /// Returns a reference to the inner value if the [`Once`] has been initialized.
+    pub fn get(&self) -> Option<&T> {
+        // SAFETY: Just as with `poll`, Acquire is safe here because we want to be able to see the
+        // nonatomic stores done when initializing, once we have loaded and checked the status.
+        match self.status.load(Ordering::Acquire) {
+            Status::Complete => Some(unsafe { self.force_get() }),
+            _ => None,
+        }
+    }
+
+    /// Returns a reference to the inner value on the unchecked assumption that the  [`Once`] has been initialized.
+    ///
+    /// # Safety
+    ///
+    /// This is *extremely* unsafe if the `Once` has not already been initialized because a reference to uninitialized
+    /// memory will be returned, immediately triggering undefined behaviour (even if the reference goes unused).
+    /// However, this can be useful in some instances for exposing the `Once` to FFI or when the overhead of atomically
+    /// checking initialization is unacceptable and the `Once` has already been initialized.
+    pub unsafe fn get_unchecked(&self) -> &T {
+        debug_assert_eq!(
+            self.status.load(Ordering::SeqCst),
+            Status::Complete,
+            "Attempted to access an uninitialized Once. If this was run without debug checks, this would be undefined behaviour. This is a serious bug and you must fix it.",
+        );
+        self.force_get()
+    }
+
+    /// Returns a mutable reference to the inner value if the [`Once`] has been initialized.
+    ///
+    /// Because this method requires a mutable reference to the [`Once`], no synchronization
+    /// overhead is required to access the inner value. In effect, it is zero-cost.
+    pub fn get_mut(&mut self) -> Option<&mut T> {
+        match *self.status.get_mut() {
+            Status::Complete => Some(unsafe { self.force_get_mut() }),
+            _ => None,
+        }
+    }
+
+    /// Returns a the inner value if the [`Once`] has been initialized.
+    ///
+    /// Because this method requires ownership of the [`Once`], no synchronization overhead
+    /// is required to access the inner value. In effect, it is zero-cost.
+    pub fn try_into_inner(mut self) -> Option<T> {
+        match *self.status.get_mut() {
+            Status::Complete => Some(unsafe { self.force_into_inner() }),
+            _ => None,
+        }
+    }
+
+    /// Checks whether the value has been initialized.
+    ///
+    /// This is done using [`Acquire`](core::sync::atomic::Ordering::Acquire) ordering, and
+    /// therefore it is safe to access the value directly via
+    /// [`get_unchecked`](Self::get_unchecked) if this returns true.
+    pub fn is_completed(&self) -> bool {
+        // TODO: Add a similar variant for Relaxed?
+        self.status.load(Ordering::Acquire) == Status::Complete
+    }
+}
+
+impl<T, R> From<T> for Once<T, R> {
     fn from(data: T) -> Self {
         Self::initialized(data)
     }
 }
 
-impl<T> Drop for Once<T> {
+impl<T, R> Drop for Once<T, R> {
     fn drop(&mut self) {
-        if self.state.load(Ordering::SeqCst) == COMPLETE {
+        // No need to do any atomic access here, we have &mut!
+        if *self.status.get_mut() == Status::Complete {
             unsafe {
                 //TODO: Use MaybeUninit::assume_init_drop once stabilised
                 core::ptr::drop_in_place((*self.data.get()).as_mut_ptr());
@@ -264,15 +436,18 @@
 }
 
 struct Finish<'a> {
-    state: &'a AtomicUsize,
-    panicked: bool,
+    status: &'a AtomicStatus,
 }
 
 impl<'a> Drop for Finish<'a> {
     fn drop(&mut self) {
-        if self.panicked {
-            self.state.store(PANICKED, Ordering::SeqCst);
-        }
+        // While using Relaxed here would most likely not be an issue, we use SeqCst anyway.
+        // This is mainly because panics are not meant to be fast at all, but also because if
+        // there were to be a compiler bug which reorders accesses within the same thread,
+        // where it should not, we want to be sure that the panic really is handled, and does
+        // not cause additional problems. SeqCst will therefore help guarding against such
+        // bugs.
+        self.status.store(Status::Panicked, Ordering::SeqCst);
     }
 }
 
@@ -282,11 +457,12 @@
 
     use std::sync::mpsc::channel;
     use std::thread;
-    use super::Once;
+
+    use super::*;
 
     #[test]
     fn smoke_once() {
-        static O: Once<()> = Once::new();
+        static O: Once = Once::new();
         let mut a = 0;
         O.call_once(|| a += 1);
         assert_eq!(a, 1);
@@ -305,7 +481,7 @@
 
     #[test]
     fn stampede_once() {
-        static O: Once<()> = Once::new();
+        static O: Once = Once::new();
         static mut RUN: bool = false;
 
         let (tx, rx) = channel();
@@ -388,7 +564,7 @@
     fn panic() {
         use ::std::panic;
 
-        static INIT: Once<()> = Once::new();
+        static INIT: Once = Once::new();
 
         // poison the once
         let t = panic::catch_unwind(|| {
@@ -405,7 +581,7 @@
 
     #[test]
     fn init_constant() {
-        static O: Once<()> = Once::INIT;
+        static O: Once = Once::INIT;
         let mut a = 0;
         O.call_once(|| a += 1);
         assert_eq!(a, 1);
@@ -426,13 +602,13 @@
     }
 
     #[test]
-    fn drop() {
+    fn drop_occurs() {
         unsafe {
             CALLED = false;
         }
 
         {
-            let once = Once::new();
+            let once = Once::<_>::new();
             once.call_once(|| DropTest {});
         }
 
@@ -447,9 +623,8 @@
             CALLED = false;
         }
 
-        {
-            let once = Once::<DropTest>::new();
-        }
+        let once = Once::<DropTest>::new();
+        drop(once);
 
         assert!(unsafe {
             !CALLED
diff --git a/src/relax.rs b/src/relax.rs
new file mode 100644
index 0000000..6d9a690
--- /dev/null
+++ b/src/relax.rs
@@ -0,0 +1,58 @@
+//! Strategies that determine the behaviour of locks when encountering contention.
+
+/// A trait implemented by spinning relax strategies.
+pub trait RelaxStrategy {
+    /// Perform the relaxing operation during a period of contention.
+    fn relax();
+}
+
+/// A strategy that rapidly spins while informing the CPU that it should power down non-essential components via
+/// [`core::hint::spin_loop`].
+///
+/// Note that spinning is a 'dumb' strategy and most schedulers cannot correctly differentiate it from useful work,
+/// thereby misallocating even more CPU time to the spinning process. This is known as
+/// ['priority inversion'](https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html).
+///
+/// If you see signs that priority inversion is occurring, consider switching to [`Yield`] or, even better, not using a
+/// spinlock at all and opting for a proper scheduler-aware lock. Remember also that different targets, operating
+/// systems, schedulers, and even the same scheduler with different workloads will exhibit different behaviour. Just
+/// because priority inversion isn't occurring in your tests does not mean that it will not occur. Use a scheduler-
+/// aware lock if at all possible.
+pub struct Spin;
+
+impl RelaxStrategy for Spin {
+    #[inline(always)]
+    fn relax() {
+        core::hint::spin_loop();
+    }
+}
+
+/// A strategy that yields the current time slice to the scheduler in favour of other threads or processes.
+///
+/// This is generally used as a strategy for minimising power consumption and priority inversion on targets that have a
+/// standard library available. Note that such targets have scheduler-integrated concurrency primitives available, and
+/// you should generally use these instead, except in rare circumstances.
+#[cfg(feature = "std")]
+#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+pub struct Yield;
+
+#[cfg(feature = "std")]
+#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
+impl RelaxStrategy for Yield {
+    #[inline(always)]
+    fn relax() {
+        std::thread::yield_now();
+    }
+}
+
+/// A strategy that rapidly spins, without telling the CPU to do any powering down.
+///
+/// You almost certainly do not want to use this. Use [`Spin`] instead. It exists for completeness and for targets
+/// that, for some reason, miscompile or do not support spin hint intrinsics despite attempting to generate code for
+/// them (i.e: this is a workaround for possible compiler bugs).
+pub struct Loop;
+
+impl RelaxStrategy for Loop {
+    #[inline(always)]
+    fn relax() {}
+}
diff --git a/src/rw_lock.rs b/src/rwlock.rs
similarity index 87%
rename from src/rw_lock.rs
rename to src/rwlock.rs
index ed50407..28602c9 100644
--- a/src/rw_lock.rs
+++ b/src/rwlock.rs
@@ -4,9 +4,11 @@
     cell::UnsafeCell,
     ops::{Deref, DerefMut},
     sync::atomic::{AtomicUsize, Ordering},
+    marker::PhantomData,
     fmt,
     mem,
 };
+use crate::{RelaxStrategy, Spin};
 
 /// A lock that provides data access to either one writer or many readers.
 ///
@@ -61,7 +63,8 @@
 ///     assert_eq!(*w, 6);
 /// } // write lock is dropped here
 /// ```
-pub struct RwLock<T: ?Sized> {
+pub struct RwLock<T: ?Sized, R = Spin> {
+    phantom: PhantomData<R>,
     lock: AtomicUsize,
     data: UnsafeCell<T>,
 }
@@ -75,36 +78,37 @@
 /// When the guard falls out of scope it will decrement the read count,
 /// potentially releasing the lock.
 pub struct RwLockReadGuard<'a, T: 'a + ?Sized> {
-    inner: &'a RwLock<T>,
+    lock: &'a AtomicUsize,
     data: &'a T,
 }
 
 /// A guard that provides mutable data access.
 ///
 /// When the guard falls out of scope it will release the lock.
-pub struct RwLockWriteGuard<'a, T: 'a + ?Sized> {
-    inner: &'a RwLock<T>,
+pub struct RwLockWriteGuard<'a, T: 'a + ?Sized, R = Spin> {
+    phantom: PhantomData<R>,
+    inner: &'a RwLock<T, R>,
     data: &'a mut T,
 }
 
-/// A guard that provides immutable data access but can be upgraded
-/// to [`RwLockWriteGuard`].
+/// A guard that provides immutable data access but can be upgraded to [`RwLockWriteGuard`].
 ///
 /// No writers or other upgradeable guards can exist while this is in scope. New reader
 /// creation is prevented (to alleviate writer starvation) but there may be existing readers
 /// when the lock is acquired.
 ///
 /// When the guard falls out of scope it will release the lock.
-pub struct RwLockUpgradableGuard<'a, T: 'a + ?Sized> {
-    inner: &'a RwLock<T>,
+pub struct RwLockUpgradableGuard<'a, T: 'a + ?Sized, R = Spin> {
+    phantom: PhantomData<R>,
+    inner: &'a RwLock<T, R>,
     data: &'a T,
 }
 
 // Same unsafe impls as `std::sync::RwLock`
-unsafe impl<T: ?Sized + Send> Send for RwLock<T> {}
-unsafe impl<T: ?Sized + Send + Sync> Sync for RwLock<T> {}
+unsafe impl<T: ?Sized + Send, R> Send for RwLock<T, R> {}
+unsafe impl<T: ?Sized + Send + Sync, R> Sync for RwLock<T, R> {}
 
-impl<T> RwLock<T> {
+impl<T, R> RwLock<T, R> {
     /// Creates a new spinlock wrapping the supplied data.
     ///
     /// May be used statically:
@@ -121,10 +125,11 @@
     /// }
     /// ```
     #[inline]
-    pub const fn new(user_data: T) -> RwLock<T> {
+    pub const fn new(data: T) -> Self {
         RwLock {
+            phantom: PhantomData,
             lock: AtomicUsize::new(0),
-            data: UnsafeCell::new(user_data),
+            data: UnsafeCell::new(data),
         }
     }
 
@@ -136,9 +141,37 @@
         let RwLock { data, .. } = self;
         data.into_inner()
     }
+    /// Returns a mutable pointer to the underying data.
+    ///
+    /// This is mostly meant to be used for applications which require manual unlocking, but where
+    /// storing both the lock and the pointer to the inner data gets inefficient.
+    ///
+    /// While this is safe, writing to the data is undefined behavior unless the current thread has
+    /// acquired a write lock, and reading requires either a read or write lock.
+    ///
+    /// # Example
+    /// ```
+    /// let lock = spin::RwLock::new(42);
+    ///
+    /// unsafe {
+    ///     core::mem::forget(lock.write());
+    ///     
+    ///     assert_eq!(lock.as_mut_ptr().read(), 42);
+    ///     lock.as_mut_ptr().write(58);
+    ///
+    ///     lock.force_write_unlock();
+    /// }
+    ///
+    /// assert_eq!(*lock.read(), 58);
+    ///
+    /// ```
+    #[inline(always)]
+    pub fn as_mut_ptr(&self) -> *mut T {
+        self.data.get()
+    }
 }
 
-impl<T: ?Sized> RwLock<T> {
+impl<T: ?Sized, R: RelaxStrategy> RwLock<T, R> {
     /// Locks this rwlock with shared read access, blocking the current thread
     /// until it can be acquired.
     ///
@@ -165,11 +198,53 @@
         loop {
             match self.try_read() {
                 Some(guard) => return guard,
-                None => crate::relax(),
+                None => R::relax(),
             }
         }
     }
 
+    /// Lock this rwlock with exclusive write access, blocking the current
+    /// thread until it can be acquired.
+    ///
+    /// This function will not return while other writers or other readers
+    /// currently have access to the lock.
+    ///
+    /// Returns an RAII guard which will drop the write access of this rwlock
+    /// when dropped.
+    ///
+    /// ```
+    /// let mylock = spin::RwLock::new(0);
+    /// {
+    ///     let mut data = mylock.write();
+    ///     // The lock is now locked and the data can be written
+    ///     *data += 1;
+    ///     // The lock is dropped
+    /// }
+    /// ```
+    #[inline]
+    pub fn write(&self) -> RwLockWriteGuard<T, R> {
+        loop {
+            match self.try_write_internal(false) {
+                Some(guard) => return guard,
+                None => R::relax(),
+            }
+        }
+    }
+
+    /// Obtain a readable lock guard that can later be upgraded to a writable lock guard.
+    /// Upgrades can be done through the [`RwLockUpgradableGuard::upgrade`](RwLockUpgradableGuard::upgrade) method.
+    #[inline]
+    pub fn upgradeable_read(&self) -> RwLockUpgradableGuard<T, R> {
+        loop {
+            match self.try_upgradeable_read() {
+                Some(guard) => return guard,
+                None => R::relax(),
+            }
+        }
+    }
+}
+
+impl<T: ?Sized, R> RwLock<T, R> {
     /// Attempt to acquire this lock with shared read access.
     ///
     /// This function will never block and will return immediately if `read`
@@ -204,7 +279,7 @@
             None
         } else {
             Some(RwLockReadGuard {
-                inner: self,
+                lock: &self.lock,
                 data: unsafe { &*self.data.get() },
             })
         }
@@ -262,7 +337,7 @@
     }
 
     #[inline(always)]
-    fn try_write_internal(&self, strong: bool) -> Option<RwLockWriteGuard<T>> {
+    fn try_write_internal(&self, strong: bool) -> Option<RwLockWriteGuard<T, R>> {
         if compare_exchange(
             &self.lock,
             0,
@@ -274,6 +349,7 @@
         .is_ok()
         {
             Some(RwLockWriteGuard {
+                phantom: PhantomData,
                 inner: self,
                 data: unsafe { &mut *self.data.get() },
             })
@@ -282,34 +358,6 @@
         }
     }
 
-    /// Lock this rwlock with exclusive write access, blocking the current
-    /// thread until it can be acquired.
-    ///
-    /// This function will not return while other writers or other readers
-    /// currently have access to the lock.
-    ///
-    /// Returns an RAII guard which will drop the write access of this rwlock
-    /// when dropped.
-    ///
-    /// ```
-    /// let mylock = spin::RwLock::new(0);
-    /// {
-    ///     let mut data = mylock.write();
-    ///     // The lock is now locked and the data can be written
-    ///     *data += 1;
-    ///     // The lock is dropped
-    /// }
-    /// ```
-    #[inline]
-    pub fn write(&self) -> RwLockWriteGuard<T> {
-        loop {
-            match self.try_write_internal(false) {
-                Some(guard) => return guard,
-                None => crate::relax(),
-            }
-        }
-    }
-
     /// Attempt to lock this rwlock with exclusive write access.
     ///
     /// This function does not ever block, and it will return `None` if a call
@@ -330,27 +378,16 @@
     /// }
     /// ```
     #[inline]
-    pub fn try_write(&self) -> Option<RwLockWriteGuard<T>> {
+    pub fn try_write(&self) -> Option<RwLockWriteGuard<T, R>> {
         self.try_write_internal(true)
     }
 
-    /// Obtain a readable lock guard that can later be upgraded to a writable lock guard.
-    /// Upgrades can be done through the [`RwLockUpgradableGuard::upgrade`](RwLockUpgradableGuard::upgrade) method.
-    #[inline]
-    pub fn upgradeable_read(&self) -> RwLockUpgradableGuard<T> {
-        loop {
-            match self.try_upgradeable_read() {
-                Some(guard) => return guard,
-                None => crate::relax(),
-            }
-        }
-    }
-
     /// Tries to obtain an upgradeable lock guard.
     #[inline]
-    pub fn try_upgradeable_read(&self) -> Option<RwLockUpgradableGuard<T>> {
+    pub fn try_upgradeable_read(&self) -> Option<RwLockUpgradableGuard<T, R>> {
         if self.lock.fetch_or(UPGRADED, Ordering::Acquire) & (WRITER | UPGRADED) == 0 {
             Some(RwLockUpgradableGuard {
+                phantom: PhantomData,
                 inner: self,
                 data: unsafe { &*self.data.get() },
             })
@@ -380,7 +417,7 @@
     }
 }
 
-impl<T: ?Sized + fmt::Debug> fmt::Debug for RwLock<T> {
+impl<T: ?Sized + fmt::Debug, R> fmt::Debug for RwLock<T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.try_read() {
             Some(guard) => write!(f, "RwLock {{ data: ")
@@ -391,13 +428,13 @@
     }
 }
 
-impl<T: ?Sized + Default> Default for RwLock<T> {
-    fn default() -> RwLock<T> {
+impl<T: ?Sized + Default, R> Default for RwLock<T, R> {
+    fn default() -> Self {
         Self::new(Default::default())
     }
 }
 
-impl<T> From<T> for RwLock<T> {
+impl<T, R> From<T> for RwLock<T, R> {
     fn from(data: T) -> Self {
         Self::new(data)
     }
@@ -434,9 +471,31 @@
     }
 }
 
-impl<'rwlock, T: ?Sized> RwLockUpgradableGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R: RelaxStrategy> RwLockUpgradableGuard<'rwlock, T, R> {
+    /// Upgrades an upgradeable lock guard to a writable lock guard.
+    ///
+    /// ```
+    /// let mylock = spin::RwLock::new(0);
+    ///
+    /// let upgradeable = mylock.upgradeable_read(); // Readable, but not yet writable
+    /// let writable = upgradeable.upgrade();
+    /// ```
+    #[inline]
+    pub fn upgrade(mut self) -> RwLockWriteGuard<'rwlock, T, R> {
+        loop {
+            self = match self.try_upgrade_internal(false) {
+                Ok(guard) => return guard,
+                Err(e) => e,
+            };
+
+            R::relax();
+        }
+    }
+}
+
+impl<'rwlock, T: ?Sized, R> RwLockUpgradableGuard<'rwlock, T, R> {
     #[inline(always)]
-    fn try_upgrade_internal(self, strong: bool) -> Result<RwLockWriteGuard<'rwlock, T>, Self> {
+    fn try_upgrade_internal(self, strong: bool) -> Result<RwLockWriteGuard<'rwlock, T, R>, Self> {
         if compare_exchange(
             &self.inner.lock,
             UPGRADED,
@@ -454,6 +513,7 @@
 
             // Upgrade successful
             Ok(RwLockWriteGuard {
+                phantom: PhantomData,
                 inner,
                 data: unsafe { &mut *inner.data.get() },
             })
@@ -462,26 +522,6 @@
         }
     }
 
-    /// Upgrades an upgradeable lock guard to a writable lock guard.
-    ///
-    /// ```
-    /// let mylock = spin::RwLock::new(0);
-    ///
-    /// let upgradeable = mylock.upgradeable_read(); // Readable, but not yet writable
-    /// let writable = upgradeable.upgrade();
-    /// ```
-    #[inline]
-    pub fn upgrade(mut self) -> RwLockWriteGuard<'rwlock, T> {
-        loop {
-            self = match self.try_upgrade_internal(false) {
-                Ok(guard) => return guard,
-                Err(e) => e,
-            };
-
-            crate::relax();
-        }
-    }
-
     /// Tries to upgrade an upgradeable lock guard to a writable lock guard.
     ///
     /// ```
@@ -494,7 +534,7 @@
     /// };
     /// ```
     #[inline]
-    pub fn try_upgrade(self) -> Result<RwLockWriteGuard<'rwlock, T>, Self> {
+    pub fn try_upgrade(self) -> Result<RwLockWriteGuard<'rwlock, T, R>, Self> {
         self.try_upgrade_internal(true)
     }
 
@@ -522,7 +562,7 @@
         mem::drop(self);
 
         RwLockReadGuard {
-            inner,
+            lock: &inner.lock,
             data: unsafe { &*inner.data.get() },
         }
     }
@@ -545,19 +585,19 @@
     }
 }
 
-impl<'rwlock, T: ?Sized + fmt::Debug> fmt::Debug for RwLockUpgradableGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized + fmt::Debug, R> fmt::Debug for RwLockUpgradableGuard<'rwlock, T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         fmt::Debug::fmt(&**self, f)
     }
 }
 
-impl<'rwlock, T: ?Sized + fmt::Display> fmt::Display for RwLockUpgradableGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized + fmt::Display, R> fmt::Display for RwLockUpgradableGuard<'rwlock, T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         fmt::Display::fmt(&**self, f)
     }
 }
 
-impl<'rwlock, T: ?Sized> RwLockWriteGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R> RwLockWriteGuard<'rwlock, T, R> {
     /// Downgrades the writable lock guard to a readable, shared lock guard. Cannot fail and is guaranteed not to spin.
     ///
     /// ```
@@ -581,7 +621,7 @@
         mem::drop(self);
 
         RwLockReadGuard {
-            inner,
+            lock: &inner.lock,
             data: unsafe { &*inner.data.get() },
         }
     }
@@ -598,7 +638,7 @@
     /// assert_eq!(*readable, 1);
     /// ```
     #[inline]
-    pub fn downgrade_to_upgradeable(self) -> RwLockUpgradableGuard<'rwlock, T> {
+    pub fn downgrade_to_upgradeable(self) -> RwLockUpgradableGuard<'rwlock, T, R> {
         debug_assert_eq!(self.inner.lock.load(Ordering::Acquire) & (WRITER | UPGRADED), WRITER);
 
         // Reserve the read guard for ourselves
@@ -610,6 +650,7 @@
         mem::forget(self);
 
         RwLockUpgradableGuard {
+            phantom: PhantomData,
             inner,
             data: unsafe { &*inner.data.get() },
         }
@@ -635,13 +676,13 @@
     }
 }
 
-impl<'rwlock, T: ?Sized + fmt::Debug> fmt::Debug for RwLockWriteGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized + fmt::Debug, R> fmt::Debug for RwLockWriteGuard<'rwlock, T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         fmt::Debug::fmt(&**self, f)
     }
 }
 
-impl<'rwlock, T: ?Sized + fmt::Display> fmt::Display for RwLockWriteGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized + fmt::Display, R> fmt::Display for RwLockWriteGuard<'rwlock, T, R> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         fmt::Display::fmt(&**self, f)
     }
@@ -655,7 +696,7 @@
     }
 }
 
-impl<'rwlock, T: ?Sized> Deref for RwLockUpgradableGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R> Deref for RwLockUpgradableGuard<'rwlock, T, R> {
     type Target = T;
 
     fn deref(&self) -> &T {
@@ -663,7 +704,7 @@
     }
 }
 
-impl<'rwlock, T: ?Sized> Deref for RwLockWriteGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R> Deref for RwLockWriteGuard<'rwlock, T, R> {
     type Target = T;
 
     fn deref(&self) -> &T {
@@ -671,7 +712,7 @@
     }
 }
 
-impl<'rwlock, T: ?Sized> DerefMut for RwLockWriteGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R> DerefMut for RwLockWriteGuard<'rwlock, T, R> {
     fn deref_mut(&mut self) -> &mut T {
         self.data
     }
@@ -679,12 +720,12 @@
 
 impl<'rwlock, T: ?Sized> Drop for RwLockReadGuard<'rwlock, T> {
     fn drop(&mut self) {
-        debug_assert!(self.inner.lock.load(Ordering::Relaxed) & !(WRITER | UPGRADED) > 0);
-        self.inner.lock.fetch_sub(READER, Ordering::Release);
+        debug_assert!(self.lock.load(Ordering::Relaxed) & !(WRITER | UPGRADED) > 0);
+        self.lock.fetch_sub(READER, Ordering::Release);
     }
 }
 
-impl<'rwlock, T: ?Sized> Drop for RwLockUpgradableGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R> Drop for RwLockUpgradableGuard<'rwlock, T, R> {
     fn drop(&mut self) {
         debug_assert_eq!(
             self.inner.lock.load(Ordering::Relaxed) & (WRITER | UPGRADED),
@@ -694,7 +735,7 @@
     }
 }
 
-impl<'rwlock, T: ?Sized> Drop for RwLockWriteGuard<'rwlock, T> {
+impl<'rwlock, T: ?Sized, R> Drop for RwLockWriteGuard<'rwlock, T, R> {
     fn drop(&mut self) {
         debug_assert_eq!(self.inner.lock.load(Ordering::Relaxed) & WRITER, WRITER);
 
@@ -720,9 +761,9 @@
     }
 }
 
-#[cfg(feature = "lock_api1")]
-unsafe impl lock_api::RawRwLock for RwLock<()> {
-    type GuardMarker = lock_api::GuardSend;
+#[cfg(feature = "lock_api")]
+unsafe impl<R: RelaxStrategy> lock_api_crate::RawRwLock for RwLock<(), R> {
+    type GuardMarker = lock_api_crate::GuardSend;
 
     const INIT: Self = Self::new(());
 
@@ -743,6 +784,7 @@
         drop(RwLockWriteGuard {
             inner: self,
             data: &mut (),
+            phantom: PhantomData,
         });
     }
 
@@ -761,7 +803,7 @@
     #[inline(always)]
     unsafe fn unlock_shared(&self) {
         drop(RwLockReadGuard {
-            inner: self,
+            lock: &self.lock,
             data: &(),
         });
     }
@@ -772,8 +814,8 @@
     }
 }
 
-#[cfg(feature = "lock_api1")]
-unsafe impl lock_api::RawRwLockUpgrade for RwLock<()> {
+#[cfg(feature = "lock_api")]
+unsafe impl<R: RelaxStrategy> lock_api_crate::RawRwLockUpgrade for RwLock<(), R> {
     #[inline(always)]
     fn lock_upgradable(&self) {
         // Prevent guard destructor running
@@ -791,6 +833,7 @@
         drop(RwLockUpgradableGuard {
             inner: self,
             data: &(),
+            phantom: PhantomData,
         });
     }
 
@@ -799,6 +842,7 @@
         let tmp_guard = RwLockUpgradableGuard {
             inner: self,
             data: &(),
+            phantom: PhantomData,
         };
         core::mem::forget(tmp_guard.upgrade());
     }
@@ -808,17 +852,19 @@
         let tmp_guard = RwLockUpgradableGuard {
             inner: self,
             data: &(),
+            phantom: PhantomData,
         };
         tmp_guard.try_upgrade().map(|g| core::mem::forget(g)).is_ok()
     }
 }
 
-#[cfg(feature = "lock_api1")]
-unsafe impl lock_api::RawRwLockDowngrade for RwLock<()> {
+#[cfg(feature = "lock_api")]
+unsafe impl<R: RelaxStrategy> lock_api_crate::RawRwLockDowngrade for RwLock<(), R> {
     unsafe fn downgrade(&self) {
         let tmp_guard = RwLockWriteGuard {
             inner: self,
             data: &mut (),
+            phantom: PhantomData,
         };
         core::mem::forget(tmp_guard.downgrade());
     }
@@ -830,6 +876,7 @@
         let tmp_guard = RwLockUpgradableGuard {
             inner: self,
             data: &(),
+            phantom: PhantomData,
         };
         core::mem::forget(tmp_guard.downgrade());
     }
@@ -838,6 +885,7 @@
         let tmp_guard = RwLockWriteGuard {
             inner: self,
             data: &mut (),
+            phantom: PhantomData,
         };
         core::mem::forget(tmp_guard.downgrade_to_upgradeable());
     }
@@ -852,7 +900,7 @@
     use std::sync::Arc;
     use std::thread;
 
-    use super::*;
+    type RwLock<T> = super::RwLock<T>;
 
     #[derive(Eq, PartialEq, Debug)]
     struct NonCopy(i32);
@@ -988,7 +1036,7 @@
     #[test]
     fn test_rw_try_read() {
         let m = RwLock::new(0);
-        mem::forget(m.write());
+        ::std::mem::forget(m.write());
         assert!(m.try_read().is_none());
     }