Mark ab/7061308 as merged in stage.

Bug: 180401296
Merged-In: I3a32e308046dfd304dec678a62cae7999c05c8a1
Change-Id: Id2aaf6a702e27cc09237074ad560b6acd8617032
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 0cd0fb1..adb1fc4 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "99df5dba5fa2471d51f58e3691621e5cd22d464d"
+    "sha1": "3012849c2d9c50228a780031e7c200b193a6b4fa"
   }
 }
diff --git a/Android.bp b/Android.bp
index 5a1adfe..fe8ffb5 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,8 +1,43 @@
 // This file is generated by cargo2android.py --device --run --dependencies --tests.
 
+package {
+    default_applicable_licenses: ["external_rust_crates_ppv-lite86_license"],
+}
+
+// Added automatically by a large-scale-change that took the approach of
+// 'apply every license found to every target'. While this makes sure we respect
+// every license restriction, it may not be entirely correct.
+//
+// e.g. GPL in an MIT project might only apply to the contrib/ directory.
+//
+// Please consider splitting the single license below into multiple licenses,
+// taking care not to lose any license_kind information, and overriding the
+// default license using the 'licenses: [...]' property on targets as needed.
+//
+// For unused files, consider creating a 'fileGroup' with "//visibility:private"
+// to attach the license to, and including a comment whether the files may be
+// used in the current project.
+//
+// large-scale-change included anything that looked like it might be a license
+// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
+//
+// Please consider removing redundant or irrelevant files from 'license_text:'.
+// See: http://go/android-license-faq
+license {
+    name: "external_rust_crates_ppv-lite86_license",
+    visibility: [":__subpackages__"],
+    license_kinds: [
+        "SPDX-license-identifier-Apache-2.0",
+        "SPDX-license-identifier-MIT",
+    ],
+    license_text: [
+        "LICENSE-APACHE",
+        "LICENSE-MIT",
+    ],
+}
+
 rust_library {
     name: "libppv_lite86",
-    // has rustc warnings
     host_supported: true,
     crate_name: "ppv_lite86",
     srcs: ["src/lib.rs"],
@@ -16,7 +51,6 @@
 rust_defaults {
     name: "ppv-lite86_defaults",
     crate_name: "ppv_lite86",
-    // has rustc warnings
     srcs: ["src/lib.rs"],
     test_suites: ["general-tests"],
     auto_gen_config: true,
diff --git a/Cargo.toml b/Cargo.toml
index 6b87ae6..6ffa7ba 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
 [package]
 edition = "2018"
 name = "ppv-lite86"
-version = "0.2.9"
+version = "0.2.10"
 authors = ["The CryptoCorrosion Contributors"]
 description = "Implementation of the crypto-simd API for x86"
 keywords = ["crypto", "simd", "x86"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 84a59ad..8f3fb52 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
 [package]
 name = "ppv-lite86"
-version = "0.2.9"
+version = "0.2.10"
 authors = ["The CryptoCorrosion Contributors"]
 edition = "2018"
 license = "MIT/Apache-2.0"
diff --git a/METADATA b/METADATA
index 6fc2138..1ac3aef 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/ppv-lite86/ppv-lite86-0.2.9.crate"
+    value: "https://static.crates.io/crates/ppv-lite86/ppv-lite86-0.2.10.crate"
   }
-  version: "0.2.9"
+  version: "0.2.10"
   license_type: NOTICE
   last_upgrade_date {
     year: 2020
-    month: 8
-    day: 21
+    month: 11
+    day: 2
   }
 }
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 04b640f..a45c687 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -2,10 +2,6 @@
 {
   "presubmit": [
     {
-      "host": true,
-      "name": "ppv-lite86_host_test_src_lib"
-    },
-    {
       "name": "ppv-lite86_device_test_src_lib"
     }
   ]
diff --git a/src/generic.rs b/src/generic.rs
index d26266c..f0e83d9 100644
--- a/src/generic.rs
+++ b/src/generic.rs
@@ -1,8 +1,8 @@
 #![allow(non_camel_case_types)]
 
-use core::ops::*;
 use crate::soft::{x2, x4};
 use crate::types::*;
+use core::ops::*;
 
 #[repr(C)]
 #[derive(Clone, Copy)]
@@ -61,12 +61,6 @@
         self.v128
     }
 }
-impl From<[u64; 4]> for vec256_storage {
-    #[inline]
-    fn from(q: [u64; 4]) -> Self {
-        Self { v128: [[0, 1].into(), [2, 3].into()] }
-    }
-}
 impl From<vec256_storage> for [u64; 4] {
     #[inline]
     fn from(q: vec256_storage) -> Self {
diff --git a/src/lib.rs b/src/lib.rs
index 2f32286..a4fbabe 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -12,14 +12,14 @@
 mod types;
 pub use self::types::*;
 
-#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
 pub mod x86_64;
-#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
+#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
 use self::x86_64 as arch;
 
-#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
 pub mod generic;
-#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
+#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
 use self::generic as arch;
 
 pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};
diff --git a/src/soft.rs b/src/soft.rs
index d12dac5..8976c48 100644
--- a/src/soft.rs
+++ b/src/soft.rs
@@ -1,9 +1,9 @@
 //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
 
-use core::marker::PhantomData;
-use core::ops::*;
 use crate::types::*;
 use crate::{vec128_storage, vec256_storage, vec512_storage};
+use core::marker::PhantomData;
+use core::ops::*;
 
 #[derive(Copy, Clone, Default)]
 #[allow(non_camel_case_types)]
@@ -238,7 +238,12 @@
     ($fn:ident) => {
         #[inline(always)]
         fn $fn(self) -> Self {
-            x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()])
+            x4([
+                self.0[0].$fn(),
+                self.0[1].$fn(),
+                self.0[2].$fn(),
+                self.0[3].$fn(),
+            ])
         }
     };
 }
diff --git a/src/types.rs b/src/types.rs
index 119b6bb..a282670 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -1,3 +1,4 @@
+#![allow(non_camel_case_types)]
 use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
 
 pub trait AndNot {
@@ -44,182 +45,178 @@
 
 pub trait RotateEachWord128 {}
 
-#[allow(non_camel_case_types)]
-mod types {
-    //! Vector type naming scheme:
-    //! uN[xP]xL
-    //! Unsigned; N-bit words * P bits per lane * L lanes
-    //!
-    //! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
-    //! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
-    //! slow inter-lane operations.
+// Vector type naming scheme:
+// uN[xP]xL
+// Unsigned; N-bit words * P bits per lane * L lanes
+//
+// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
+// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
+// slow inter-lane operations.
 
-    use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
-    use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
+use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
 
-    pub trait UnsafeFrom<T> {
-        unsafe fn unsafe_from(t: T) -> Self;
-    }
-
-    /// A vector composed of two elements, which may be words or themselves vectors.
-    pub trait Vec2<W> {
-        fn extract(self, i: u32) -> W;
-        fn insert(self, w: W, i: u32) -> Self;
-    }
-
-    /// A vector composed of four elements, which may be words or themselves vectors.
-    pub trait Vec4<W> {
-        fn extract(self, i: u32) -> W;
-        fn insert(self, w: W, i: u32) -> Self;
-    }
-
-    // TODO: multiples of 4 should inherit this
-    /// A vector composed of four words; depending on their size, operations may cross lanes.
-    pub trait Words4 {
-        fn shuffle1230(self) -> Self;
-        fn shuffle2301(self) -> Self;
-        fn shuffle3012(self) -> Self;
-    }
-
-    /// A vector composed one or more lanes each composed of four words.
-    pub trait LaneWords4 {
-        fn shuffle_lane_words1230(self) -> Self;
-        fn shuffle_lane_words2301(self) -> Self;
-        fn shuffle_lane_words3012(self) -> Self;
-    }
-
-    // TODO: make this a part of BitOps
-    /// Exchange neigboring ranges of bits of the specified size
-    pub trait Swap64 {
-        fn swap1(self) -> Self;
-        fn swap2(self) -> Self;
-        fn swap4(self) -> Self;
-        fn swap8(self) -> Self;
-        fn swap16(self) -> Self;
-        fn swap32(self) -> Self;
-        fn swap64(self) -> Self;
-    }
-
-    pub trait u32x4<M: Machine>:
-        BitOps32
-        + Store<vec128_storage>
-        + ArithOps
-        + Vec4<u32>
-        + Words4
-        + LaneWords4
-        + StoreBytes
-        + MultiLane<[u32; 4]>
-        + Into<vec128_storage>
-    {
-}
-    pub trait u64x2<M: Machine>:
-        BitOps64
-        + Store<vec128_storage>
-        + ArithOps
-        + Vec2<u64>
-        + MultiLane<[u64; 2]>
-        + Into<vec128_storage>
-    {
-}
-    pub trait u128x1<M: Machine>:
-        BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
-    {
+#[allow(clippy::missing_safety_doc)]
+pub trait UnsafeFrom<T> {
+    unsafe fn unsafe_from(t: T) -> Self;
 }
 
-    pub trait u32x4x2<M: Machine>:
-        BitOps32
-        + Store<vec256_storage>
-        + Vec2<M::u32x4>
-        + MultiLane<[M::u32x4; 2]>
-        + ArithOps
-        + Into<vec256_storage>
-    {
-}
-    pub trait u64x2x2<M: Machine>:
-        BitOps64
-        + Store<vec256_storage>
-        + Vec2<M::u64x2>
-        + MultiLane<[M::u64x2; 2]>
-        + ArithOps
-        + StoreBytes
-        + Into<vec256_storage>
-    {
-}
-    pub trait u64x4<M: Machine>:
-        BitOps64
-        + Store<vec256_storage>
-        + Vec4<u64>
-        + MultiLane<[u64; 4]>
-        + ArithOps
-        + Words4
-        + StoreBytes
-        + Into<vec256_storage>
-    {
-}
-    pub trait u128x2<M: Machine>:
-        BitOps128
-        + Store<vec256_storage>
-        + Vec2<M::u128x1>
-        + MultiLane<[M::u128x1; 2]>
-        + Swap64
-        + Into<vec256_storage>
-    {
+/// A vector composed of two elements, which may be words or themselves vectors.
+pub trait Vec2<W> {
+    fn extract(self, i: u32) -> W;
+    fn insert(self, w: W, i: u32) -> Self;
 }
 
-    pub trait u32x4x4<M: Machine>:
-        BitOps32
-        + Store<vec512_storage>
-        + Vec4<M::u32x4>
-        + MultiLane<[M::u32x4; 4]>
-        + ArithOps
-        + LaneWords4
-        + Into<vec512_storage>
-    {
-}
-    pub trait u64x2x4<M: Machine>:
-        BitOps64
-        + Store<vec512_storage>
-        + Vec4<M::u64x2>
-        + MultiLane<[M::u64x2; 4]>
-        + ArithOps
-        + Into<vec512_storage>
-    {
-}
-    // TODO: Words4
-    pub trait u128x4<M: Machine>:
-        BitOps128
-        + Store<vec512_storage>
-        + Vec4<M::u128x1>
-        + MultiLane<[M::u128x1; 4]>
-        + Swap64
-        + Into<vec512_storage>
-    {
+/// A vector composed of four elements, which may be words or themselves vectors.
+pub trait Vec4<W> {
+    fn extract(self, i: u32) -> W;
+    fn insert(self, w: W, i: u32) -> Self;
 }
 
-    /// A vector composed of multiple 128-bit lanes.
-    pub trait MultiLane<Lanes> {
-        /// Split a multi-lane vector into single-lane vectors.
-        fn to_lanes(self) -> Lanes;
-        /// Build a multi-lane vector from individual lanes.
-        fn from_lanes(lanes: Lanes) -> Self;
-    }
+// TODO: multiples of 4 should inherit this
+/// A vector composed of four words; depending on their size, operations may cross lanes.
+pub trait Words4 {
+    fn shuffle1230(self) -> Self;
+    fn shuffle2301(self) -> Self;
+    fn shuffle3012(self) -> Self;
+}
 
-    /// Combine single vectors into a multi-lane vector.
-    pub trait VZip<V> {
-        fn vzip(self) -> V;
-    }
+/// A vector composed one or more lanes each composed of four words.
+pub trait LaneWords4 {
+    fn shuffle_lane_words1230(self) -> Self;
+    fn shuffle_lane_words2301(self) -> Self;
+    fn shuffle_lane_words3012(self) -> Self;
+}
 
-    impl<V, T> VZip<V> for T
-    where
-        V: MultiLane<T>,
-    {
-        #[inline(always)]
-        fn vzip(self) -> V {
-            V::from_lanes(self)
-        }
+// TODO: make this a part of BitOps
+/// Exchange neigboring ranges of bits of the specified size
+pub trait Swap64 {
+    fn swap1(self) -> Self;
+    fn swap2(self) -> Self;
+    fn swap4(self) -> Self;
+    fn swap8(self) -> Self;
+    fn swap16(self) -> Self;
+    fn swap32(self) -> Self;
+    fn swap64(self) -> Self;
+}
+
+pub trait u32x4<M: Machine>:
+    BitOps32
+    + Store<vec128_storage>
+    + ArithOps
+    + Vec4<u32>
+    + Words4
+    + LaneWords4
+    + StoreBytes
+    + MultiLane<[u32; 4]>
+    + Into<vec128_storage>
+{
+}
+pub trait u64x2<M: Machine>:
+    BitOps64
+    + Store<vec128_storage>
+    + ArithOps
+    + Vec2<u64>
+    + MultiLane<[u64; 2]>
+    + Into<vec128_storage>
+{
+}
+pub trait u128x1<M: Machine>:
+    BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
+{
+}
+
+pub trait u32x4x2<M: Machine>:
+    BitOps32
+    + Store<vec256_storage>
+    + Vec2<M::u32x4>
+    + MultiLane<[M::u32x4; 2]>
+    + ArithOps
+    + Into<vec256_storage>
+{
+}
+pub trait u64x2x2<M: Machine>:
+    BitOps64
+    + Store<vec256_storage>
+    + Vec2<M::u64x2>
+    + MultiLane<[M::u64x2; 2]>
+    + ArithOps
+    + StoreBytes
+    + Into<vec256_storage>
+{
+}
+pub trait u64x4<M: Machine>:
+    BitOps64
+    + Store<vec256_storage>
+    + Vec4<u64>
+    + MultiLane<[u64; 4]>
+    + ArithOps
+    + Words4
+    + StoreBytes
+    + Into<vec256_storage>
+{
+}
+pub trait u128x2<M: Machine>:
+    BitOps128
+    + Store<vec256_storage>
+    + Vec2<M::u128x1>
+    + MultiLane<[M::u128x1; 2]>
+    + Swap64
+    + Into<vec256_storage>
+{
+}
+
+pub trait u32x4x4<M: Machine>:
+    BitOps32
+    + Store<vec512_storage>
+    + Vec4<M::u32x4>
+    + MultiLane<[M::u32x4; 4]>
+    + ArithOps
+    + LaneWords4
+    + Into<vec512_storage>
+{
+}
+pub trait u64x2x4<M: Machine>:
+    BitOps64
+    + Store<vec512_storage>
+    + Vec4<M::u64x2>
+    + MultiLane<[M::u64x2; 4]>
+    + ArithOps
+    + Into<vec512_storage>
+{
+}
+// TODO: Words4
+pub trait u128x4<M: Machine>:
+    BitOps128
+    + Store<vec512_storage>
+    + Vec4<M::u128x1>
+    + MultiLane<[M::u128x1; 4]>
+    + Swap64
+    + Into<vec512_storage>
+{
+}
+
+/// A vector composed of multiple 128-bit lanes.
+pub trait MultiLane<Lanes> {
+    /// Split a multi-lane vector into single-lane vectors.
+    fn to_lanes(self) -> Lanes;
+    /// Build a multi-lane vector from individual lanes.
+    fn from_lanes(lanes: Lanes) -> Self;
+}
+
+/// Combine single vectors into a multi-lane vector.
+pub trait VZip<V> {
+    fn vzip(self) -> V;
+}
+
+impl<V, T> VZip<V> for T
+where
+    V: MultiLane<T>,
+{
+    #[inline(always)]
+    fn vzip(self) -> V {
+        V::from_lanes(self)
     }
 }
-pub use self::types::*;
 
 pub trait Machine: Sized + Copy {
     type u32x4: u32x4<Self>;
@@ -264,15 +261,27 @@
         unsafe { V::unsafe_read_be(input) }
     }
 
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
     unsafe fn instance() -> Self;
 }
 
 pub trait Store<S> {
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
     unsafe fn unpack(p: S) -> Self;
 }
 
 pub trait StoreBytes {
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
     unsafe fn unsafe_read_le(input: &[u8]) -> Self;
+    /// # Safety
+    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
+    /// environment.
     unsafe fn unsafe_read_be(input: &[u8]) -> Self;
     fn write_le(self, out: &mut [u8]);
     fn write_be(self, out: &mut [u8]);
diff --git a/src/x86_64/mod.rs b/src/x86_64/mod.rs
index ecf184f..d7455d0 100644
--- a/src/x86_64/mod.rs
+++ b/src/x86_64/mod.rs
@@ -1,7 +1,7 @@
 // crate minimums: sse2, x86_64
 
-use core::arch::x86_64::{__m128i, __m256i};
 use crate::types::*;
+use core::arch::x86_64::{__m128i, __m256i};
 
 mod sse2;
 
diff --git a/src/x86_64/sse2.rs b/src/x86_64/sse2.rs
index 60e7681..bf0063f 100644
--- a/src/x86_64/sse2.rs
+++ b/src/x86_64/sse2.rs
@@ -166,28 +166,23 @@
 
 macro_rules! rotr_32_s3 {
     ($name:ident, $k0:expr, $k1:expr) => {
-    #[inline(always)]
-    fn $name(self) -> Self {
-        Self::new(unsafe {
-                _mm_shuffle_epi8(
-                    self.x,
-                    _mm_set_epi64x($k0, $k1),
-                )
-            })
+        #[inline(always)]
+        fn $name(self) -> Self {
+            Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
         }
     };
 }
 macro_rules! rotr_32 {
     ($name:ident, $i:expr) => {
-    #[inline(always)]
-    fn $name(self) -> Self {
-        Self::new(unsafe {
-            _mm_or_si128(
-                _mm_srli_epi32(self.x, $i as i32),
-                _mm_slli_epi32(self.x, 32 - $i as i32),
-            )
-        })
-    }
+        #[inline(always)]
+        fn $name(self) -> Self {
+            Self::new(unsafe {
+                _mm_or_si128(
+                    _mm_srli_epi32(self.x, $i as i32),
+                    _mm_slli_epi32(self.x, 32 - $i as i32),
+                )
+            })
+        }
     };
 }
 impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
@@ -228,28 +223,23 @@
 
 macro_rules! rotr_64_s3 {
     ($name:ident, $k0:expr, $k1:expr) => {
-    #[inline(always)]
-    fn $name(self) -> Self {
-        Self::new(unsafe {
-                _mm_shuffle_epi8(
-                    self.x,
-                    _mm_set_epi64x($k0, $k1),
-                )
-            })
+        #[inline(always)]
+        fn $name(self) -> Self {
+            Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
         }
     };
 }
 macro_rules! rotr_64 {
     ($name:ident, $i:expr) => {
-    #[inline(always)]
-    fn $name(self) -> Self {
-        Self::new(unsafe {
-            _mm_or_si128(
-                _mm_srli_epi64(self.x, $i as i32),
-                _mm_slli_epi64(self.x, 64 - $i as i32),
-            )
-        })
-    }
+        #[inline(always)]
+        fn $name(self) -> Self {
+            Self::new(unsafe {
+                _mm_or_si128(
+                    _mm_srli_epi64(self.x, $i as i32),
+                    _mm_slli_epi64(self.x, 64 - $i as i32),
+                )
+            })
+        }
     };
 }
 impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
@@ -296,15 +286,15 @@
 
 macro_rules! rotr_128 {
     ($name:ident, $i:expr) => {
-    #[inline(always)]
-    fn $name(self) -> Self {
-        Self::new(unsafe {
-            _mm_or_si128(
-                _mm_srli_si128(self.x, $i as i32),
-                _mm_slli_si128(self.x, 128 - $i as i32),
-            )
-        })
-    }
+        #[inline(always)]
+        fn $name(self) -> Self {
+            Self::new(unsafe {
+                _mm_or_si128(
+                    _mm_srli_si128(self.x, $i as i32),
+                    _mm_slli_si128(self.x, 128 - $i as i32),
+                )
+            })
+        }
     };
 }
 // TODO: completely unoptimized
@@ -411,7 +401,7 @@
     }
     #[inline(always)]
     fn from_lanes(xs: [u128; 1]) -> Self {
-        unimplemented!()
+        unimplemented!("{:?}", xs)
     }
 }
 
@@ -780,7 +770,7 @@
 impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
     #[inline(always)]
     fn bswap(self) -> Self {
-        Self::new(unsafe { unimplemented!() })
+        unimplemented!()
     }
 }
 
@@ -1078,6 +1068,7 @@
     }
 }
 
+#[allow(unused)]
 #[inline(always)]
 unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
     let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
@@ -1492,19 +1483,13 @@
     impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
     macro_rules! shuf_lane_bytes {
         ($name:ident, $k0:expr, $k1:expr) => {
-        #[inline(always)]
-        fn $name(self) -> Self {
-            Self::new(unsafe {
-                [
-                    _mm256_shuffle_epi8(
-                        self.x[0],
-                        _mm256_set_epi64x($k0, $k1, $k0, $k1),
-                    ),
-                    _mm256_shuffle_epi8(
-                        self.x[1],
-                        _mm256_set_epi64x($k0, $k1, $k0, $k1),
-                    )
-                ]
+            #[inline(always)]
+            fn $name(self) -> Self {
+                Self::new(unsafe {
+                    [
+                        _mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
+                        _mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
+                    ]
                 })
             }
         };
@@ -1522,7 +1507,7 @@
                         _mm256_or_si256(
                             _mm256_srli_epi32(self.x[1], $i as i32),
                             _mm256_slli_epi32(self.x[1], 32 - $i as i32),
-                        )
+                        ),
                     ]
                 })
             }