Update twox-hash to 2.1.0
Test: m rust
Change-Id: I219c5535fb0d3c931384dc886be273ed4ebbae0e
diff --git a/crates/twox-hash/.android-checksum.json b/crates/twox-hash/.android-checksum.json
index 943fbf0..9fe2f94 100644
--- a/crates/twox-hash/.android-checksum.json
+++ b/crates/twox-hash/.android-checksum.json
@@ -1 +1 @@
-{"package":null,"files":{".cargo-checksum.json":"f42149f4c80c877fbf4efab2015abaa87665fe72e2524ba32a34ef4e7259dd01","Android.bp":"962f743cf02d7ddf20632c8edcb5a4b521a29dfe7184c0655a1a729a744ec4d6","Cargo.lock":"71c86cf641dc92cf57d8d640be62fd2fe821e80629b62b9604b72bfa63f61daa","Cargo.toml":"51620af517dc97c59c1b22c7e76a68ceb1ac5a5fa91e646ecf7b9cdf26eb1b40","LICENSE":"fc031453123dd708eaa3701676a3bd4499e74210ed08681ceac606a36eed31b5","LICENSE.txt":"fc031453123dd708eaa3701676a3bd4499e74210ed08681ceac606a36eed31b5","METADATA":"f8c4855fe586be644fda88fe94c5218b1fd1704dd48949a4c5cf5a3956a69859","MODULE_LICENSE_MIT":"0d6f8afa3940b7f06bebee651376d43bc8b0d5b437337be2696d30377451e93a","README.md":"7caa8668bf52bbad9d74804bb6f41be0fbd17a4ccd41818eb3c901d44295172a","TEST_MAPPING":"bd09946d0ac6db7c8df83cad2aa5f9d1f8a14bc5d1c5317849cdd484fd87bbe4","cargo_embargo.json":"b622d968e90d8059fff193d85eab060c0530a0cf64db68475e4374dcd73e1ac2","src/bin/hash_file.rs":"bcf36ee78152bb1e8531ca14c4bf930949203a5ba9fc219d9e5998e83849d9e1","src/digest_0_10_support.rs":"b55203f545ccd33a933f030b71833509ae489b7a5539531a22ecdda2a4a31621","src/digest_0_9_support.rs":"fdfcf7ec654e095fa1fc1d9c6c2a0d62c1a5e05af8de523efb47971c80596a7c","src/digest_support.rs":"51ebace5d0786eba04f086f6b797f2e462551c00d6fe9ce4f61e06cd09b4c464","src/lib.rs":"b8113cf72c2e336172a61493bea93f19c3eb1a1db579719602518154ab457c14","src/sixty_four.rs":"7efb91b5c031850d908f6e153829909c8c04fa331b0cbbf6159e9a0eb9c4bb9f","src/std_support.rs":"67062ccb64f1e3174f7d3f91952f566b73b01c1309d2a99027980d27d8d3b51b","src/thirty_two.rs":"95e36f159297732779e6ffa33df618bbb3aba81fa169d70c688393316079d676","src/xxh3.rs":"9593e6e07a38391a4e3885449bdfe71e67f0597af47550ba6d240f6b2e0cef72"}}
\ No newline at end of file
+{"package":null,"files":{".cargo-checksum.json":"ac4b6846550b5de97f894aebf0b5f0c200aaec5cec59543b18837d5021200191","Android.bp":"9169c42476cb14813f86c0e8138185cb050f26bfa5831120c245ba89f2e71153","CHANGELOG.md":"ab441a59cfbb4d393ec50f8a20cc0ef26e029babb613f22d68029dfa00efd4da","Cargo.toml":"6d45d2aff43535358170842447aa612046739d1c75ed857ba0291bc950e36478","LICENSE":"fc031453123dd708eaa3701676a3bd4499e74210ed08681ceac606a36eed31b5","LICENSE.txt":"fc031453123dd708eaa3701676a3bd4499e74210ed08681ceac606a36eed31b5","METADATA":"d74ab9a9986f448224b519091c775ca2392b02d57e8c3b4ebe754efcdc26407e","MODULE_LICENSE_MIT":"0d6f8afa3940b7f06bebee651376d43bc8b0d5b437337be2696d30377451e93a","README.md":"ab3c241279453ec28ec9f984402f448eb987ba018b644229c99ca6d6a2ccac41","TEST_MAPPING":"bd09946d0ac6db7c8df83cad2aa5f9d1f8a14bc5d1c5317849cdd484fd87bbe4","cargo_embargo.json":"b622d968e90d8059fff193d85eab060c0530a0cf64db68475e4374dcd73e1ac2","clippy.toml":"68c4e9e2fdd2226efe63a39fb9eddaff52940c1ce24012ec5e8f11cf6d2a4ca5","src/lib.rs":"91e33b4f47c53f0fb7c5d0698a7b6b41f8558c1c45d39c3c4441166aa820dbe1","src/xxhash3.rs":"293bce13663248c63d2c8424f3f0d4c8abed6446ea846b7849181965bcf1e555","src/xxhash3/large.rs":"114429b4a1e2d770bd91ab20656ee2c29a8053be5c7d2a6966b3176e4eb7c12b","src/xxhash3/large/avx2.rs":"dd070bcf170fdbef84316f0054e78a17f61f3beabaed97011b4ef2a7a0bc517c","src/xxhash3/large/neon.rs":"489e46b49b4eb53357510f16260c1ec932041124a16d11b0524be76fbe5e8fe5","src/xxhash3/large/scalar.rs":"268b2b790a29b8d2f793273b7cc29f0efb3e4dd8d5cdad66d11a4c5ff38494fd","src/xxhash3/large/sse2.rs":"d90893cd94f85980861b78884f3313d61f76ea1505f61dddefd4b132d6e18fde","src/xxhash3/secret.rs":"df943f90f1f0a2aee9e979585c51d83f2811d6ef456cf05e8887f72a4b99cbb5","src/xxhash3/streaming.rs":"f92409e486c7133df893f227153e703e8837fd0b5df27d35e50b81bf224be06c","src/xxhash32.rs":"8d6ae63689fe9329418ae0616cb70af3d18f7486cbf245cf8fedba8f26fa6d66","src/xxhash3_128.rs":"c00d59ced64d3d002a2a32c2650def08cd8f19b9cce40d975c19115c56b17607","src/xxhash3_64.rs":"ebee709850d19f52288345e0e4c7c8ebd3aed9cac03a6309c47e29a74de136ea","src/xxhash64.rs":"38ea94f1b9b577dec5da5f383138a14ac3a148f97d566003906528984199af79"}}
\ No newline at end of file
diff --git a/crates/twox-hash/.cargo-checksum.json b/crates/twox-hash/.cargo-checksum.json
index 01b9de5..05db0b5 100644
--- a/crates/twox-hash/.cargo-checksum.json
+++ b/crates/twox-hash/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"Cargo.lock":"cd118ec94064fb986c7addc898d735a438c7b0f59971b4c15e1a5b4405e33824","Cargo.toml":"997419a42f25493d35917bb952faa883a7dbfe449b8351b7055181f4bc827dc9","LICENSE.txt":"f59bcb1c02654665f78a5f290bae76b6358137675f6459f51e6d474a4623d8ea","README.md":"e5b5bf9df5791c8d595384ce6c783e3b7de7a1ae0d486edaf086e33742f9edee","src/bin/hash_file.rs":"dd56cf4a061b399adf39ad65a32f3b9c66806bc69d70c7f8cf30df41f487af62","src/digest_0_10_support.rs":"d375a17242abd244ffb258c61e8b460942d8404c38e20c2a9899f89291f5ec42","src/digest_0_9_support.rs":"d64d83b8ff47ffb2f6cfd4b6a9a8b8119a04825cfe04bfad67b6d6b65f3b7cc8","src/digest_support.rs":"4cca163c09c7148ef8a15012a692effe0b612e0046317a902e074cbd1973f54c","src/lib.rs":"8a5e584e184cd9a7b7fb4ac0670ffa61965e030d0d75b42d0b04802f7db39b94","src/sixty_four.rs":"06fe2c1057f37981434072209be60b6e13a987b2931bb57d1ad4cebf32e5a23d","src/std_support.rs":"dadfcdd38b51d4c06010336230d561d0b5ade9724b212afe705d2672f467a816","src/thirty_two.rs":"9793615c3b9ef9364b5017a13e6b2a7bb43a66754c3d3225301d65c6de1173e6","src/xxh3.rs":"1c1489aab48f78ccca0330f11e76d14c703a06910d2482c6d93ce88ac6541686"},"package":"97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"}
\ No newline at end of file
+{"files":{"CHANGELOG.md":"1c1b1fd8fa05c02d7f9553379f764e69721640aa29d1cb3263d4e036b8d49758","Cargo.toml":"f3e4786f64c5f420ab6b3d1b8be8c7964a120bb323c712045f0830973924c615","LICENSE.txt":"f59bcb1c02654665f78a5f290bae76b6358137675f6459f51e6d474a4623d8ea","README.md":"08bf665663d85177f74b9d35b3bd5a7e57683cdd4a2843e92affa7f9320eb684","clippy.toml":"18a86102cc10237068b3370ac5710a285330f70096d584631bafd518cf9e05b9","src/lib.rs":"87808adcb7dbb3ef84d622ffb54a5b3e3ab5757db4acb8d8ea70a609f8ebc431","src/xxhash3.rs":"cef97ce27d7863c7f3d546cb13bd74539216ec8a2dc93ef3141adb870c1a8e5e","src/xxhash3/large.rs":"a35cab6d5b3ce572cf08305cac79177dd5e5928bde8dc299746dbcd4ceaca3a8","src/xxhash3/large/avx2.rs":"57a600190205e6397489f45c19bbc5fb70bb9cd8117f8e9899fd20b193ad1e7d","src/xxhash3/large/neon.rs":"ca479b24e64e911e8cbd3b868bb04256b7f3dfd33c754aed36bb14c010dca7d0","src/xxhash3/large/scalar.rs":"9dbca8e105302a1de286d6f6fd74bf735709bf2602d08659599d52ea540bc14c","src/xxhash3/large/sse2.rs":"024a7271d19fd57acc3b0f030b3318fa6923af175cc1528d260681685b400f04","src/xxhash3/secret.rs":"e4fedccd921255b7db2d4ff0aafb800046e6270fcbaca71599d631b0b8d30afb","src/xxhash3/streaming.rs":"175cb6d875043c90a5e4b4102825c9d3fc6a3ebc57064d5825a030ab10786d82","src/xxhash32.rs":"7d8acbd8f3d1a0a32fa297de3b6b44f14b2d135403d6d45f0ce66f1f0444cd52","src/xxhash3_128.rs":"97db17349ea7f91b8a416c70c3cc2c5373e8b0befc613e4fadc058ee98defbfb","src/xxhash3_64.rs":"b9b050eec3db19af71d61131ee4604d423bc00974cdcd8f1f1672b21c9799094","src/xxhash64.rs":"45bf5a53c130d572028785eae585e999b8f2bb7743ac5eedef958391e322b5b1"},"package":"e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908"}
\ No newline at end of file
diff --git a/crates/twox-hash/Android.bp b/crates/twox-hash/Android.bp
index 1953b81..371e6b1 100644
--- a/crates/twox-hash/Android.bp
+++ b/crates/twox-hash/Android.bp
@@ -18,19 +18,20 @@
host_supported: true,
crate_name: "twox_hash",
cargo_env_compat: true,
- cargo_pkg_version: "1.6.3",
+ cargo_pkg_version: "2.1.0",
crate_root: "src/lib.rs",
- edition: "2018",
+ edition: "2021",
features: [
+ "alloc",
"default",
- "rand",
+ "random",
"std",
+ "xxhash32",
+ "xxhash3_128",
+ "xxhash3_64",
+ "xxhash64",
],
- rustlibs: [
- "libcfg_if",
- "librand",
- "libstatic_assertions",
- ],
+ rustlibs: ["librand"],
apex_available: [
"//apex_available:platform",
"//apex_available:anyapex",
@@ -44,23 +45,26 @@
host_supported: true,
crate_name: "twox_hash",
cargo_env_compat: true,
- cargo_pkg_version: "1.6.3",
+ cargo_pkg_version: "2.1.0",
crate_root: "src/lib.rs",
test_suites: ["general-tests"],
auto_gen_config: true,
test_options: {
unit_test: true,
},
- edition: "2018",
+ edition: "2021",
features: [
+ "alloc",
"default",
- "rand",
+ "random",
"std",
+ "xxhash32",
+ "xxhash3_128",
+ "xxhash3_64",
+ "xxhash64",
],
rustlibs: [
- "libcfg_if",
"librand",
"libserde_json",
- "libstatic_assertions",
],
}
diff --git a/crates/twox-hash/CHANGELOG.md b/crates/twox-hash/CHANGELOG.md
new file mode 100644
index 0000000..18cbc4c
--- /dev/null
+++ b/crates/twox-hash/CHANGELOG.md
@@ -0,0 +1,104 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [2.1.0] - 2024-12-09
+
+[2.1.0]: https://github.com/shepmaster/twox-hash/tree/v2.1.0
+
+### Added
+
+- The XXH3 128-bit algorithm is implemented via `XxHash3_128` and the
+ `xxhash3_128` module.
+
+## [2.0.1] - 2024-11-04
+
+[2.0.1]: https://github.com/shepmaster/twox-hash/tree/v2.0.1
+
+### Fixed
+
+- Removed a panic that could occur when using `XxHash3_64` to hash 1
+ to 3 bytes of data in debug mode. Release mode and different lengths
+ of data are unaffected.
+
+## [2.0.0] - 2024-10-18
+
+[2.0.0]: https://github.com/shepmaster/twox-hash/tree/v2.0.0
+
+This release is a complete rewrite of the crate, including
+reorganization of the code. The XXH3 algorithm now matches the 0.8
+release of the reference C xxHash implementation.
+
+### Added
+
+- `XxHash32::oneshot` and `XxHash64::oneshot` can perform hashing with
+ zero allocation and generally improved performance. If you have code
+ that creates a hasher and hashes a slice of bytes exactly once, you
+ are strongly encouraged to use the new functions. This might look
+ like:
+
+ ```rust
+ // Before
+ let mut hasher = XxHash64::new(); // or XxHash32, or with seeds
+ some_bytes.hash(&mut hasher);
+ let hash = hasher.finish();
+
+ // After
+ let hash = XxHash64::oneshot(some_bytes);
+ ```
+
+- There is a feature flag for each hashing implementation. It is
+ recommended that you opt-out of the crate's default features and
+ only select the implementations you need to improve compile speed.
+
+### Changed
+
+- The crates minimum supported Rust version (MSRV) is now 1.81.
+
+- Functional and performance comparisons are made against the
+ reference C xxHash library version 0.8.2, which includes a stable
+ XXH3 algorithm.
+
+- Support for randomly-generated hasher instances is now behind the
+ `random` feature flag. It was previously combined with the `std`
+ feature flag.
+
+### Removed
+
+- The deprecated type aliases `XxHash` and `RandomXxHashBuilder` have
+ been removed. Replace them with `XxHash64` and
+ `xxhash64::RandomState` respectively.
+
+- `RandomXxHashBuilder32` and `RandomXxHashBuilder64` are no longer
+ available at the top-level of the crate. Replace them with
+ `xxhash32::RandomState` and ``xxhash64::RandomState` respectively.
+
+- `Xxh3Hash64` and `xx3::Hash64` have been renamed to `XxHash3_64` and
+ `xxhash3_64::Hasher` respectively.
+
+- The free functions `xxh3::hash64`, `xxh3::hash64_with_seed`, and
+ `xxh3::hash64_with_secret` are now associated functions of
+ `xxhash3_64::Hasher`: `oneshot`, `oneshot_with_seed` and
+ `oneshot_with_secret`. Note that the argument order has changed.
+
+- Support for the [digest][] crate has been removed. The digest crate
+ is for **cryptographic** hash functions and xxHash is
+ **non-cryptographic**.
+
+- `XxHash32` and `XxHash64` no longer implement `Copy`. This prevents
+ accidentally mutating a duplicate instance of the state instead of
+ the original state. `Clone` is still implemented so you can make
+ deliberate duplicates.
+
+- The XXH3 128-bit variant is not yet re-written. Work is in progress
+ for this.
+
+- We no longer provide support for randomly-generated instances of the
+ XXH3 64-bit variant. The XXH3 algorithm takes both a seed and a
+ secret as input and deciding what to randomize is non-trivial and
+ can have negative impacts on performance.
+
+[digest]: https://docs.rs/digest/latest/digest/
diff --git a/crates/twox-hash/Cargo.lock b/crates/twox-hash/Cargo.lock
deleted file mode 100644
index e6070db..0000000
--- a/crates/twox-hash/Cargo.lock
+++ /dev/null
@@ -1,234 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "crypto-common"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8"
-dependencies = [
- "generic-array 0.14.5",
- "typenum",
-]
-
-[[package]]
-name = "digest"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
-dependencies = [
- "generic-array 0.12.4",
-]
-
-[[package]]
-name = "digest"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
-dependencies = [
- "generic-array 0.14.5",
-]
-
-[[package]]
-name = "digest"
-version = "0.10.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
-dependencies = [
- "crypto-common",
-]
-
-[[package]]
-name = "generic-array"
-version = "0.12.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd"
-dependencies = [
- "typenum",
-]
-
-[[package]]
-name = "generic-array"
-version = "0.14.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
-dependencies = [
- "typenum",
- "version_check",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
-
-[[package]]
-name = "libc"
-version = "0.2.125"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.37"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1"
-dependencies = [
- "unicode-xid",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rand"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "libc",
- "rand_chacha",
- "rand_core",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
-
-[[package]]
-name = "serde"
-version = "1.0.137"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.137"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "serde_json"
-version = "1.0.81"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c"
-dependencies = [
- "itoa",
- "ryu",
- "serde",
-]
-
-[[package]]
-name = "static_assertions"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
-
-[[package]]
-name = "syn"
-version = "1.0.92"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ff7c592601f11445996a06f8ad0c27f094a58857c2f89e97974ab9235b92c52"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-xid",
-]
-
-[[package]]
-name = "twox-hash"
-version = "1.6.3"
-dependencies = [
- "cfg-if",
- "digest 0.10.3",
- "digest 0.8.1",
- "digest 0.9.0",
- "rand",
- "serde",
- "serde_json",
- "static_assertions",
-]
-
-[[package]]
-name = "typenum"
-version = "1.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
-
-[[package]]
-name = "unicode-xid"
-version = "0.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04"
-
-[[package]]
-name = "version_check"
-version = "0.9.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
-
-[[package]]
-name = "wasi"
-version = "0.10.2+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
diff --git a/crates/twox-hash/Cargo.toml b/crates/twox-hash/Cargo.toml
index 16aa1f4..8b4a07c 100644
--- a/crates/twox-hash/Cargo.toml
+++ b/crates/twox-hash/Cargo.toml
@@ -10,10 +10,17 @@
# See Cargo.toml.orig for the original contents.
[package]
-edition = "2018"
+edition = "2021"
+rust-version = "1.81"
name = "twox-hash"
-version = "1.6.3"
+version = "2.1.0"
authors = ["Jake Goulding <jake.goulding@gmail.com>"]
+build = false
+autolib = false
+autobins = false
+autoexamples = false
+autotests = false
+autobenches = false
description = "A Rust implementation of the XXHash and XXH3 algorithms"
documentation = "https://docs.rs/twox-hash/"
readme = "README.md"
@@ -27,45 +34,55 @@
license = "MIT"
repository = "https://github.com/shepmaster/twox-hash"
-[dependencies.cfg-if]
-version = ">= 0.1, < 2"
-default-features = false
+[package.metadata.docs.rs]
+all-features = true
-[dependencies.digest]
-version = "0.8"
-optional = true
-default-features = false
-package = "digest"
-
-[dependencies.digest_0_10]
-version = "0.10"
-optional = true
-default-features = false
-package = "digest"
-
-[dependencies.digest_0_9]
-version = "0.9"
-optional = true
-default-features = false
-package = "digest"
+[lib]
+name = "twox_hash"
+path = "src/lib.rs"
[dependencies.rand]
-version = ">= 0.3.10, < 0.9"
+version = "0.8.0"
+features = [
+ "std",
+ "std_rng",
+]
optional = true
+default-features = false
[dependencies.serde]
-version = "1.0"
+version = "1.0.0"
features = ["derive"]
optional = true
-
-[dependencies.static_assertions]
-version = "1.0"
default-features = false
[dev-dependencies.serde_json]
-version = "1.0"
+version = "1.0.117"
[features]
-default = ["std"]
-serialize = ["serde"]
-std = ["rand"]
+alloc = []
+default = [
+ "random",
+ "xxhash32",
+ "xxhash64",
+ "xxhash3_64",
+ "xxhash3_128",
+ "std",
+]
+random = ["dep:rand"]
+serialize = ["dep:serde"]
+std = ["alloc"]
+xxhash32 = []
+xxhash3_128 = []
+xxhash3_64 = []
+xxhash64 = []
+
+[lints.rust.unexpected_cfgs]
+level = "warn"
+priority = 0
+check-cfg = [
+ "cfg(_internal_xxhash3_force_scalar)",
+ "cfg(_internal_xxhash3_force_neon)",
+ "cfg(_internal_xxhash3_force_sse2)",
+ "cfg(_internal_xxhash3_force_avx2)",
+]
diff --git a/crates/twox-hash/METADATA b/crates/twox-hash/METADATA
index 894dc0b..9d24e2d 100644
--- a/crates/twox-hash/METADATA
+++ b/crates/twox-hash/METADATA
@@ -1,17 +1,17 @@
name: "twox-hash"
description: "A Rust implementation of the XXHash and XXH3 algorithms"
third_party {
- version: "1.6.3"
+ version: "2.1.0"
license_type: NOTICE
last_upgrade_date {
- year: 2024
- month: 1
- day: 17
+ year: 2025
+ month: 2
+ day: 6
}
homepage: "https://crates.io/crates/twox-hash"
identifier {
type: "Archive"
- value: "https://static.crates.io/crates/twox-hash/twox-hash-1.6.3.crate"
- version: "1.6.3"
+ value: "https://static.crates.io/crates/twox-hash/twox-hash-2.1.0.crate"
+ version: "2.1.0"
}
}
diff --git a/crates/twox-hash/README.md b/crates/twox-hash/README.md
index d8656f3..6939cf4 100644
--- a/crates/twox-hash/README.md
+++ b/crates/twox-hash/README.md
@@ -1,23 +1,59 @@
-# TwoX-Hash
+A Rust implementation of the [xxHash] algorithm.
-A Rust implementation of the [XXHash] algorithm.
+[![Crates.io][crates-badge]][crates-url]
+[![Documentation][docs-badge]][docs-url]
+[![Build Status][actions-badge]][actions-url]
-[](https://travis-ci.org/shepmaster/twox-hash) [](https://crates.io/crates/twox-hash)
+[xxHash]: https://github.com/Cyan4973/xxHash
-[Documentation](https://docs.rs/twox-hash/)
+[crates-badge]: https://img.shields.io/crates/v/twox-hash.svg
+[crates-url]: https://crates.io/crates/twox-hash
+[docs-badge]: https://img.shields.io/docsrs/twox-hash
+[docs-url]: https://docs.rs/twox-hash/
+[actions-badge]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml/badge.svg?branch=main
+[actions-url]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml?query=branch%3Amain
-[XXHash]: https://github.com/Cyan4973/xxHash
+# Examples
-## Examples
+These examples use [`XxHash64`][] but the same ideas can be
+used for [`XxHash32`][] or [`XxHash3_64`][].
-### With a fixed seed
+## Hashing arbitrary data
+
+### When all the data is available at once
```rust
-use std::hash::BuildHasherDefault;
-use std::collections::HashMap;
use twox_hash::XxHash64;
-let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
+let seed = 1234;
+let hash = XxHash64::oneshot(seed, b"some bytes");
+assert_eq!(0xeab5_5659_a496_d78b, hash);
+```
+
+### When the data is streaming
+
+```rust
+use std::hash::Hasher as _;
+use twox_hash::XxHash64;
+
+let seed = 1234;
+let mut hasher = XxHash64::with_seed(seed);
+hasher.write(b"some");
+hasher.write(b" ");
+hasher.write(b"bytes");
+let hash = hasher.finish();
+assert_eq!(0xeab5_5659_a496_d78b, hash);
+```
+
+## In a [`HashMap`][]
+
+### With a default seed
+
+```rust
+use std::{collections::HashMap, hash::BuildHasherDefault};
+use twox_hash::XxHash64;
+
+let mut hash = HashMap::<_, _, BuildHasherDefault<XxHash64>>::default();
hash.insert(42, "the answer");
assert_eq!(hash.get(&42), Some(&"the answer"));
```
@@ -26,73 +62,45 @@
```rust
use std::collections::HashMap;
-use twox_hash::RandomXxHashBuilder64;
+use twox_hash::xxhash64;
-let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
+let mut hash = HashMap::<_, _, xxhash64::RandomState>::default();
hash.insert(42, "the answer");
assert_eq!(hash.get(&42), Some(&"the answer"));
```
-## Benchmarks
+### With a fixed seed
-### 64-bit
+```rust
+use std::collections::HashMap;
+use twox_hash::xxhash64;
-| Bytes | SipHasher (MB/s) | XXHash (MB/s) | Ratio |
-|---------|------------------|---------------|-------|
-| 1 | 52 | 38 | 73% |
-| 4 | 210 | 148 | 70% |
-| 16 | 615 | 615 | 100% |
-| 32 | 914 | 1391 | 152% |
-| 128 | 1347 | 3657 | 271% |
-| 256 | 1414 | 5019 | 355% |
-| 512 | 1546 | 6168 | 399% |
-| 1024 | 1565 | 6206 | 397% |
-| 1048576 | 1592 | 7564 | 475% |
+let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe));
+hash.insert(42, "the answer");
+assert_eq!(hash.get(&42), Some(&"the answer"));
+```
-| Bytes | [FnvHasher][fnv] (MB/s) | XXHash (MB/s) | Ratio |
-|---------|-------------------------|---------------|-------|
-| 1 | 1000 | 38 | 4% |
-| 4 | 800 | 148 | 19% |
-| 16 | 761 | 615 | 81% |
-| 32 | 761 | 1391 | 183% |
-| 128 | 727 | 3657 | 503% |
-| 256 | 759 | 5019 | 661% |
-| 512 | 745 | 6168 | 828% |
-| 1024 | 741 | 6206 | 838% |
-| 1048576 | 745 | 7564 | 1015% |
+# Feature Flags
-### 32-bit
+| name | description |
+|------------|---------------------------------------------------------------------------------------------------------|
+| xxhash32 | Include the [`XxHash32`][] algorithm |
+| xxhash64 | Include the [`XxHash64`][] algorithm |
+| xxhash3_64 | Include the [`XxHash3_64`][] algorithm |
+| random | Create random instances of the hashers |
+| serialize | Serialize and deserialize hasher state with Serde |
+| std | Use the Rust standard library. Enable this if you want SIMD support in [`XxHash3_64`][] |
+| alloc | Use the Rust allocator library. Enable this if you want to create [`XxHash3_64`][] with dynamic secrets |
-| Bytes | SipHasher (MB/s) | XXHash32 (MB/s) | Ratio |
-|---------|------------------|-----------------|-------|
-| 1 | 52 | 55 | 106% |
-| 4 | 210 | 210 | 100% |
-| 16 | 615 | 1230 | 200% |
-| 32 | 914 | 1882 | 206% |
-| 128 | 1347 | 3282 | 244% |
-| 256 | 1414 | 3459 | 245% |
-| 512 | 1546 | 3792 | 245% |
-| 1024 | 1565 | 3938 | 252% |
-| 1048576 | 1592 | 4127 | 259% |
+# Benchmarks
-| Bytes | [FnvHasher][fnv] (MB/s) | XXHash32 (MB/s) | Ratio |
-|---------|-------------------------|-----------------|-------|
-| 1 | 1000 | 55 | 6% |
-| 4 | 800 | 210 | 26% |
-| 16 | 761 | 1230 | 162% |
-| 32 | 761 | 1882 | 247% |
-| 128 | 727 | 3282 | 451% |
-| 256 | 759 | 3459 | 456% |
-| 512 | 745 | 3792 | 509% |
-| 1024 | 741 | 3938 | 531% |
-| 1048576 | 745 | 4127 | 554% |
+See benchmarks in the [comparison][] README.
+[comparison]: https://github.com/shepmaster/twox-hash/tree/main/comparison
-[fnv]: https://github.com/servo/rust-fnv
+# Contributing
-## Contributing
-
-1. Fork it ( https://github.com/shepmaster/twox-hash/fork )
+1. Fork it (<https://github.com/shepmaster/twox-hash/fork>)
2. Create your feature branch (`git checkout -b my-new-feature`)
3. Add a failing test.
4. Add code to pass the test.
@@ -100,3 +108,9 @@
6. Ensure tests pass.
7. Push to the branch (`git push origin my-new-feature`)
8. Create a new Pull Request
+
+
+[`Hashmap`]: std::collections::HashMap
+[`XxHash32`]: crate::XxHash32
+[`XxHash64`]: crate::XxHash64
+[`XxHash3_64`]: crate::XxHash3_64
diff --git a/crates/twox-hash/clippy.toml b/crates/twox-hash/clippy.toml
new file mode 100644
index 0000000..8483b87
--- /dev/null
+++ b/crates/twox-hash/clippy.toml
@@ -0,0 +1 @@
+check-private-items = true
diff --git a/crates/twox-hash/src/bin/hash_file.rs b/crates/twox-hash/src/bin/hash_file.rs
deleted file mode 100644
index 509b48d..0000000
--- a/crates/twox-hash/src/bin/hash_file.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use std::env;
-use std::fs::File;
-use std::hash::Hasher;
-use std::io::{BufRead, BufReader};
-use twox_hash::XxHash64;
-
-fn main() {
- for arg in env::args().skip(1) {
- let f = File::open(&arg).unwrap();
- let mut f = BufReader::new(f);
-
- let mut hasher = XxHash64::with_seed(0);
-
- loop {
- let consumed = {
- let bytes = f.fill_buf().unwrap();
- if bytes.is_empty() {
- break;
- }
- hasher.write(bytes);
- bytes.len()
- };
- f.consume(consumed);
- }
-
- println!("{:16x} {}", hasher.finish(), arg);
- }
-}
diff --git a/crates/twox-hash/src/digest_0_10_support.rs b/crates/twox-hash/src/digest_0_10_support.rs
deleted file mode 100644
index 935c096..0000000
--- a/crates/twox-hash/src/digest_0_10_support.rs
+++ /dev/null
@@ -1,92 +0,0 @@
-use core::hash::Hasher;
-
-use digest_0_10::{
- generic_array::typenum::consts::{U16, U4, U8},
- FixedOutput, HashMarker, Output, OutputSizeUser, Update,
-};
-
-use crate::{xxh3, XxHash32, XxHash64};
-
-// ----------
-
-impl Update for XxHash32 {
- fn update(&mut self, data: &[u8]) {
- self.write(data);
- }
-}
-
-impl OutputSizeUser for XxHash32 {
- type OutputSize = U4;
-}
-
-impl FixedOutput for XxHash32 {
- fn finalize_into(self, out: &mut Output<Self>) {
- let tmp: &mut [u8; 4] = out.as_mut();
- *tmp = self.finish().to_be_bytes();
- }
-}
-
-impl HashMarker for XxHash32 {}
-
-// ----------
-
-impl Update for XxHash64 {
- fn update(&mut self, data: &[u8]) {
- self.write(data);
- }
-}
-
-impl OutputSizeUser for XxHash64 {
- type OutputSize = U8;
-}
-
-impl FixedOutput for XxHash64 {
- fn finalize_into(self, out: &mut Output<Self>) {
- let tmp: &mut [u8; 8] = out.as_mut();
- *tmp = self.finish().to_be_bytes();
- }
-}
-
-impl HashMarker for XxHash64 {}
-
-// ----------
-
-impl Update for xxh3::Hash64 {
- fn update(&mut self, data: &[u8]) {
- self.write(data);
- }
-}
-
-impl OutputSizeUser for xxh3::Hash64 {
- type OutputSize = U8;
-}
-
-impl FixedOutput for xxh3::Hash64 {
- fn finalize_into(self, out: &mut Output<Self>) {
- let tmp: &mut [u8; 8] = out.as_mut();
- *tmp = self.finish().to_be_bytes();
- }
-}
-
-impl HashMarker for xxh3::Hash64 {}
-
-// ----------
-
-impl Update for xxh3::Hash128 {
- fn update(&mut self, data: &[u8]) {
- self.write(data);
- }
-}
-
-impl OutputSizeUser for xxh3::Hash128 {
- type OutputSize = U16;
-}
-
-impl FixedOutput for xxh3::Hash128 {
- fn finalize_into(self, out: &mut Output<Self>) {
- let tmp: &mut [u8; 16] = out.as_mut();
- *tmp = xxh3::HasherExt::finish_ext(&self).to_be_bytes();
- }
-}
-
-impl HashMarker for xxh3::Hash128 {}
diff --git a/crates/twox-hash/src/digest_0_9_support.rs b/crates/twox-hash/src/digest_0_9_support.rs
deleted file mode 100644
index 67788cd..0000000
--- a/crates/twox-hash/src/digest_0_9_support.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-use core::hash::Hasher;
-
-use digest_0_9::{
- generic_array::{
- typenum::consts::{U16, U4, U8},
- GenericArray,
- },
- Digest,
-};
-
-use crate::{xxh3, XxHash32, XxHash64};
-
-impl Digest for XxHash32 {
- type OutputSize = U4;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn update(&mut self, data: impl AsRef<[u8]>) {
- self.write(data.as_ref());
- }
-
- fn chain(mut self, data: impl AsRef<[u8]>) -> Self
- where
- Self: Sized,
- {
- self.update(data);
- self
- }
-
- fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
- self.finish().to_be_bytes().into()
- }
-
- fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.finalize();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 4
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).finalize()
- }
-}
-
-impl Digest for XxHash64 {
- type OutputSize = U8;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn update(&mut self, data: impl AsRef<[u8]>) {
- self.write(data.as_ref());
- }
-
- fn chain(mut self, data: impl AsRef<[u8]>) -> Self
- where
- Self: Sized,
- {
- self.update(data);
- self
- }
-
- fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
- self.finish().to_be_bytes().into()
- }
-
- fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.finalize();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 8
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).finalize()
- }
-}
-
-impl Digest for xxh3::Hash64 {
- type OutputSize = U8;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn update(&mut self, data: impl AsRef<[u8]>) {
- self.write(data.as_ref());
- }
-
- fn chain(mut self, data: impl AsRef<[u8]>) -> Self
- where
- Self: Sized,
- {
- self.update(data);
- self
- }
-
- fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
- self.finish().to_be_bytes().into()
- }
-
- fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.clone().finalize();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 8
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).finalize()
- }
-}
-
-impl Digest for xxh3::Hash128 {
- type OutputSize = U16;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn update(&mut self, data: impl AsRef<[u8]>) {
- self.write(data.as_ref());
- }
-
- fn chain(mut self, data: impl AsRef<[u8]>) -> Self
- where
- Self: Sized,
- {
- self.update(data);
- self
- }
-
- fn finalize(self) -> GenericArray<u8, Self::OutputSize> {
- xxh3::HasherExt::finish_ext(&self).to_be_bytes().into()
- }
-
- fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.clone().finalize();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 8
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).finalize()
- }
-}
diff --git a/crates/twox-hash/src/digest_support.rs b/crates/twox-hash/src/digest_support.rs
deleted file mode 100644
index 7b00b9d..0000000
--- a/crates/twox-hash/src/digest_support.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-use core::hash::Hasher;
-
-use digest::{
- generic_array::{
- typenum::consts::{U16, U4, U8},
- GenericArray,
- },
- Digest,
-};
-
-use crate::{xxh3, XxHash32, XxHash64};
-
-impl Digest for XxHash32 {
- type OutputSize = U4;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn input<B: AsRef<[u8]>>(&mut self, data: B) {
- self.write(data.as_ref());
- }
-
- fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
- where
- Self: Sized,
- {
- self.input(data);
- self
- }
-
- fn result(self) -> GenericArray<u8, Self::OutputSize> {
- self.finish().to_be_bytes().into()
- }
-
- fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.result();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 4
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).result()
- }
-}
-
-impl Digest for XxHash64 {
- type OutputSize = U8;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn input<B: AsRef<[u8]>>(&mut self, data: B) {
- self.write(data.as_ref());
- }
-
- fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
- where
- Self: Sized,
- {
- self.input(data);
- self
- }
-
- fn result(self) -> GenericArray<u8, Self::OutputSize> {
- self.finish().to_be_bytes().into()
- }
-
- fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.result();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 8
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).result()
- }
-}
-
-impl Digest for xxh3::Hash64 {
- type OutputSize = U8;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn input<B: AsRef<[u8]>>(&mut self, data: B) {
- self.write(data.as_ref());
- }
-
- fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
- where
- Self: Sized,
- {
- self.input(data);
- self
- }
-
- fn result(self) -> GenericArray<u8, Self::OutputSize> {
- self.finish().to_be_bytes().into()
- }
-
- fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.clone().result();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 8
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).result()
- }
-}
-
-impl Digest for xxh3::Hash128 {
- type OutputSize = U16;
-
- fn new() -> Self {
- Self::default()
- }
-
- fn input<B: AsRef<[u8]>>(&mut self, data: B) {
- self.write(data.as_ref());
- }
-
- fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self
- where
- Self: Sized,
- {
- self.input(data);
- self
- }
-
- fn result(self) -> GenericArray<u8, Self::OutputSize> {
- xxh3::HasherExt::finish_ext(&self).to_be_bytes().into()
- }
-
- fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> {
- let result = self.clone().result();
- self.reset();
- result
- }
-
- fn reset(&mut self) {
- *self = Self::default();
- }
-
- fn output_size() -> usize {
- 8
- }
-
- fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> {
- Self::new().chain(data).result()
- }
-}
diff --git a/crates/twox-hash/src/lib.rs b/crates/twox-hash/src/lib.rs
index 414dc8d..9d94ce1 100644
--- a/crates/twox-hash/src/lib.rs
+++ b/crates/twox-hash/src/lib.rs
@@ -1,121 +1,96 @@
-//! A Rust implementation of the [XXHash] algorithm.
-//!
-//! [XXHash]: https://github.com/Cyan4973/xxHash
-//!
-//! ### With a fixed seed
-//!
-//! ```rust
-//! use std::hash::BuildHasherDefault;
-//! use std::collections::HashMap;
-//! use twox_hash::XxHash64;
-//!
-//! let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
-//!
-//! ### With a random seed
-//!
-//! ```rust
-//! use std::collections::HashMap;
-//! use twox_hash::RandomXxHashBuilder64;
-//!
-//! let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
-//! hash.insert(42, "the answer");
-//! assert_eq!(hash.get(&42), Some(&"the answer"));
-//! ```
+#![doc = include_str!("../README.md")]
+#![deny(rust_2018_idioms)]
+#![deny(missing_docs)]
+#![deny(unnameable_types)]
+#![cfg_attr(not(feature = "std"), no_std)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
-#![no_std]
-
+#[cfg(all(
+ feature = "alloc",
+ any(feature = "xxhash3_64", feature = "xxhash3_128")
+))]
extern crate alloc;
-#[cfg(test)]
+#[cfg(any(feature = "std", doc, test))]
extern crate std;
-use core::{marker::PhantomData, mem};
+#[cfg(feature = "xxhash32")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
+pub mod xxhash32;
-mod sixty_four;
-mod thirty_two;
-pub mod xxh3;
+#[cfg(feature = "xxhash32")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))]
+pub use xxhash32::Hasher as XxHash32;
-#[cfg(feature = "std")]
-mod std_support;
-#[cfg(feature = "std")]
-pub use std_support::sixty_four::RandomXxHashBuilder64;
-#[cfg(feature = "std")]
-pub use std_support::thirty_two::RandomXxHashBuilder32;
-#[cfg(feature = "std")]
-pub use std_support::xxh3::{
- RandomHashBuilder128 as RandomXxh3HashBuilder128,
- RandomHashBuilder64 as RandomXxh3HashBuilder64,
-};
+#[cfg(feature = "xxhash64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
+pub mod xxhash64;
-#[cfg(feature = "digest")]
-mod digest_support;
+#[cfg(feature = "xxhash64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))]
+pub use xxhash64::Hasher as XxHash64;
-#[cfg(feature = "digest_0_9")]
-mod digest_0_9_support;
+#[cfg(any(feature = "xxhash3_64", feature = "xxhash3_128"))]
+mod xxhash3;
-#[cfg(feature = "digest_0_10")]
-mod digest_0_10_support;
+#[cfg(feature = "xxhash3_64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
+pub mod xxhash3_64;
-pub use crate::sixty_four::XxHash64;
-pub use crate::thirty_two::XxHash32;
-pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64};
+#[cfg(feature = "xxhash3_64")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))]
+pub use xxhash3_64::Hasher as XxHash3_64;
-/// A backwards compatibility type alias. Consider directly using
-/// `XxHash64` instead.
-pub type XxHash = XxHash64;
+#[cfg(feature = "xxhash3_128")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_128")))]
+pub mod xxhash3_128;
-#[cfg(feature = "std")]
-/// A backwards compatibility type alias. Consider directly using
-/// `RandomXxHashBuilder64` instead.
-pub type RandomXxHashBuilder = RandomXxHashBuilder64;
+#[cfg(feature = "xxhash3_128")]
+#[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_128")))]
+pub use xxhash3_128::Hasher as XxHash3_128;
-/// An unaligned buffer with iteration support for `UnalignedItem`.
-struct UnalignedBuffer<'a, T> {
- buf: &'a [u8],
- phantom: PhantomData<T>,
+#[allow(dead_code, reason = "Too lazy to cfg-gate these")]
+trait IntoU32 {
+ fn into_u32(self) -> u32;
}
-/// Types implementing this trait must be transmutable from a `*const
-/// u8` to `*const Self` at any possible alignment.
-///
-/// The intent is to use this with only primitive integer types (and
-/// tightly-packed arrays of those integers).
-#[allow(clippy::missing_safety_doc)]
-unsafe trait UnalignedItem {}
-
-unsafe impl UnalignedItem for [u64; 4] {}
-unsafe impl UnalignedItem for [u32; 4] {}
-unsafe impl UnalignedItem for u64 {}
-unsafe impl UnalignedItem for u32 {}
-
-impl<'a, T: UnalignedItem> UnalignedBuffer<'a, T> {
- #[inline]
- fn new(buf: &'a [u8]) -> Self {
- Self {
- buf,
- phantom: PhantomData,
- }
- }
-
- #[inline]
- fn remaining(&self) -> &[u8] {
- self.buf
+impl IntoU32 for u8 {
+ fn into_u32(self) -> u32 {
+ self.into()
}
}
-impl<'a, T: UnalignedItem> Iterator for UnalignedBuffer<'a, T> {
- type Item = T;
+#[allow(dead_code, reason = "Too lazy to cfg-gate these")]
+trait IntoU64 {
+ fn into_u64(self) -> u64;
+}
- fn next(&mut self) -> Option<Self::Item> {
- let size = mem::size_of::<T>();
- self.buf.get(size..).map(|remaining| {
- // `self.buf` has at least `size` bytes that can be read as `T`.
- let result = unsafe { (self.buf.as_ptr() as *const T).read_unaligned() };
- self.buf = remaining;
- result
- })
+impl IntoU64 for u8 {
+ fn into_u64(self) -> u64 {
+ self.into()
+ }
+}
+
+impl IntoU64 for u32 {
+ fn into_u64(self) -> u64 {
+ self.into()
+ }
+}
+
+#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+impl IntoU64 for usize {
+ fn into_u64(self) -> u64 {
+ self as u64
+ }
+}
+
+#[allow(dead_code, reason = "Too lazy to cfg-gate these")]
+trait IntoU128 {
+ fn into_u128(self) -> u128;
+}
+
+impl IntoU128 for u64 {
+ fn into_u128(self) -> u128 {
+ u128::from(self)
}
}
diff --git a/crates/twox-hash/src/sixty_four.rs b/crates/twox-hash/src/sixty_four.rs
deleted file mode 100644
index c151586..0000000
--- a/crates/twox-hash/src/sixty_four.rs
+++ /dev/null
@@ -1,413 +0,0 @@
-use crate::UnalignedBuffer;
-use core::{cmp, hash::Hasher};
-
-#[cfg(feature = "serialize")]
-use serde::{Deserialize, Serialize};
-
-const CHUNK_SIZE: usize = 32;
-
-pub const PRIME_1: u64 = 11_400_714_785_074_694_791;
-pub const PRIME_2: u64 = 14_029_467_366_897_019_727;
-pub const PRIME_3: u64 = 1_609_587_929_392_839_161;
-pub const PRIME_4: u64 = 9_650_029_242_287_828_579;
-pub const PRIME_5: u64 = 2_870_177_450_012_600_261;
-
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Copy, Clone, PartialEq)]
-struct XxCore {
- v1: u64,
- v2: u64,
- v3: u64,
- v4: u64,
-}
-
-/// Calculates the 64-bit hash.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct XxHash64 {
- total_len: u64,
- seed: u64,
- core: XxCore,
- #[cfg_attr(feature = "serialize", serde(flatten))]
- buffer: Buffer,
-}
-
-impl XxCore {
- fn with_seed(seed: u64) -> XxCore {
- XxCore {
- v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
- v2: seed.wrapping_add(PRIME_2),
- v3: seed,
- v4: seed.wrapping_sub(PRIME_1),
- }
- }
-
- #[inline(always)]
- fn ingest_chunks<I>(&mut self, values: I)
- where
- I: IntoIterator<Item = [u64; 4]>,
- {
- #[inline(always)]
- fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 {
- value = value.wrapping_mul(PRIME_2);
- current_value = current_value.wrapping_add(value);
- current_value = current_value.rotate_left(31);
- current_value.wrapping_mul(PRIME_1)
- }
-
- // By drawing these out, we can avoid going back and forth to
- // memory. It only really helps for large files, when we need
- // to iterate multiple times here.
-
- let mut v1 = self.v1;
- let mut v2 = self.v2;
- let mut v3 = self.v3;
- let mut v4 = self.v4;
-
- for [n1, n2, n3, n4] in values {
- v1 = ingest_one_number(v1, n1.to_le());
- v2 = ingest_one_number(v2, n2.to_le());
- v3 = ingest_one_number(v3, n3.to_le());
- v4 = ingest_one_number(v4, n4.to_le());
- }
-
- self.v1 = v1;
- self.v2 = v2;
- self.v3 = v3;
- self.v4 = v4;
- }
-
- #[inline(always)]
- fn finish(&self) -> u64 {
- // The original code pulls out local vars for v[1234]
- // here. Performance tests did not show that to be effective
- // here, presumably because this method is not called in a
- // tight loop.
-
- #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
- let mut hash;
-
- hash = self.v1.rotate_left(1);
- hash = hash.wrapping_add(self.v2.rotate_left(7));
- hash = hash.wrapping_add(self.v3.rotate_left(12));
- hash = hash.wrapping_add(self.v4.rotate_left(18));
-
- #[inline(always)]
- fn mix_one(mut hash: u64, mut value: u64) -> u64 {
- value = value.wrapping_mul(PRIME_2);
- value = value.rotate_left(31);
- value = value.wrapping_mul(PRIME_1);
- hash ^= value;
- hash = hash.wrapping_mul(PRIME_1);
- hash.wrapping_add(PRIME_4)
- }
-
- hash = mix_one(hash, self.v1);
- hash = mix_one(hash, self.v2);
- hash = mix_one(hash, self.v3);
- hash = mix_one(hash, self.v4);
-
- hash
- }
-}
-
-impl core::fmt::Debug for XxCore {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
- write!(
- f,
- "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
- self.v1, self.v2, self.v3, self.v4
- )
- }
-}
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-#[repr(align(8))]
-#[cfg_attr(feature = "serialize", serde(transparent))]
-struct AlignToU64<T>(T);
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-struct Buffer {
- #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
- data: AlignToU64<[u8; CHUNK_SIZE]>,
- #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
- len: usize,
-}
-
-impl Buffer {
- fn data(&self) -> &[u8] {
- &self.data.0[..self.len]
- }
-
- /// Consumes as much of the parameter as it can, returning the unused part.
- fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
- let to_use = cmp::min(self.available(), data.len());
- let (data, remaining) = data.split_at(to_use);
- self.data.0[self.len..][..to_use].copy_from_slice(data);
- self.len += to_use;
- remaining
- }
-
- fn set_data(&mut self, data: &[u8]) {
- debug_assert!(self.is_empty());
- debug_assert!(data.len() < CHUNK_SIZE);
- self.data.0[..data.len()].copy_from_slice(data);
- self.len = data.len();
- }
-
- fn available(&self) -> usize {
- CHUNK_SIZE - self.len
- }
-
- fn is_empty(&self) -> bool {
- self.len == 0
- }
-
- fn is_full(&self) -> bool {
- self.len == CHUNK_SIZE
- }
-}
-
-impl XxHash64 {
- /// Constructs the hash with an initial seed
- pub fn with_seed(seed: u64) -> XxHash64 {
- XxHash64 {
- total_len: 0,
- seed,
- core: XxCore::with_seed(seed),
- buffer: Buffer::default(),
- }
- }
-
- pub(crate) fn write(&mut self, bytes: &[u8]) {
- let remaining = self.maybe_consume_bytes(bytes);
- if !remaining.is_empty() {
- let mut remaining = UnalignedBuffer::new(remaining);
- self.core.ingest_chunks(&mut remaining);
- self.buffer.set_data(remaining.remaining());
- }
- self.total_len += bytes.len() as u64;
- }
-
- // Consume bytes and try to make `self.buffer` empty.
- // If there are not enough bytes, `self.buffer` can be non-empty, and this
- // function returns an empty slice.
- fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
- if self.buffer.is_empty() {
- data
- } else {
- let data = self.buffer.consume(data);
- if self.buffer.is_full() {
- let mut u64s = UnalignedBuffer::new(self.buffer.data());
- self.core.ingest_chunks(&mut u64s);
- debug_assert!(u64s.remaining().is_empty());
- self.buffer.len = 0;
- }
- data
- }
- }
-
- pub(crate) fn finish(&self) -> u64 {
- let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
- // We have processed at least one full chunk
- self.core.finish()
- } else {
- self.seed.wrapping_add(PRIME_5)
- };
-
- hash = hash.wrapping_add(self.total_len);
-
- let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data());
- for buffered_u64 in &mut buffered_u64s {
- let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2);
- k1 = k1.rotate_left(31);
- k1 = k1.wrapping_mul(PRIME_1);
- hash ^= k1;
- hash = hash.rotate_left(27);
- hash = hash.wrapping_mul(PRIME_1);
- hash = hash.wrapping_add(PRIME_4);
- }
-
- let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining());
- for buffered_u32 in &mut buffered_u32s {
- let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1);
- hash ^= k1;
- hash = hash.rotate_left(23);
- hash = hash.wrapping_mul(PRIME_2);
- hash = hash.wrapping_add(PRIME_3);
- }
-
- let buffered_u8s = buffered_u32s.remaining();
- for &buffered_u8 in buffered_u8s {
- let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5);
- hash ^= k1;
- hash = hash.rotate_left(11);
- hash = hash.wrapping_mul(PRIME_1);
- }
-
- // The final intermixing
- hash ^= hash >> 33;
- hash = hash.wrapping_mul(PRIME_2);
- hash ^= hash >> 29;
- hash = hash.wrapping_mul(PRIME_3);
- hash ^= hash >> 32;
-
- hash
- }
-
- pub fn seed(&self) -> u64 {
- self.seed
- }
-
- pub fn total_len(&self) -> u64 {
- self.total_len
- }
-}
-
-impl Default for XxHash64 {
- fn default() -> XxHash64 {
- XxHash64::with_seed(0)
- }
-}
-
-impl Hasher for XxHash64 {
- fn finish(&self) -> u64 {
- XxHash64::finish(self)
- }
-
- fn write(&mut self, bytes: &[u8]) {
- XxHash64::write(self, bytes)
- }
-}
-
-#[cfg(feature = "std")]
-pub use crate::std_support::sixty_four::RandomXxHashBuilder64;
-
-#[cfg(test)]
-mod test {
- use super::{RandomXxHashBuilder64, XxHash64};
- use std::collections::HashMap;
- use std::hash::BuildHasherDefault;
- use std::prelude::v1::*;
-
- #[test]
- fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
- let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
- let mut byte_by_byte = XxHash64::with_seed(0);
- for byte in bytes.chunks(1) {
- byte_by_byte.write(byte);
- }
-
- let mut one_chunk = XxHash64::with_seed(0);
- one_chunk.write(&bytes);
-
- assert_eq!(byte_by_byte.core, one_chunk.core);
- }
-
- #[test]
- fn hash_of_nothing_matches_c_implementation() {
- let mut hasher = XxHash64::with_seed(0);
- hasher.write(&[]);
- assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
- }
-
- #[test]
- fn hash_of_single_byte_matches_c_implementation() {
- let mut hasher = XxHash64::with_seed(0);
- hasher.write(&[42]);
- assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
- }
-
- #[test]
- fn hash_of_multiple_bytes_matches_c_implementation() {
- let mut hasher = XxHash64::with_seed(0);
- hasher.write(b"Hello, world!\0");
- assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
- }
-
- #[test]
- fn hash_of_multiple_chunks_matches_c_implementation() {
- let bytes: Vec<_> = (0..100).collect();
- let mut hasher = XxHash64::with_seed(0);
- hasher.write(&bytes);
- assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
- }
-
- #[test]
- fn hash_with_different_seed_matches_c_implementation() {
- let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
- hasher.write(&[]);
- assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
- }
-
- #[test]
- fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
- let bytes: Vec<_> = (0..100).collect();
- let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
- hasher.write(&bytes);
- assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
- }
-
- #[test]
- fn can_be_used_in_a_hashmap_with_a_default_seed() {
- let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
- hash.insert(42, "the answer");
- assert_eq!(hash.get(&42), Some(&"the answer"));
- }
-
- #[test]
- fn can_be_used_in_a_hashmap_with_a_random_seed() {
- let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
- hash.insert(42, "the answer");
- assert_eq!(hash.get(&42), Some(&"the answer"));
- }
-
- #[cfg(feature = "serialize")]
- type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
-
- #[cfg(feature = "serialize")]
- #[test]
- fn test_serialization_cycle() -> TestResult {
- let mut hasher = XxHash64::with_seed(0);
- hasher.write(b"Hello, world!\0");
- hasher.finish();
-
- let serialized = serde_json::to_string(&hasher)?;
- let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
- assert_eq!(hasher, unserialized);
- Ok(())
- }
-
- #[cfg(feature = "serialize")]
- #[test]
- fn test_serialization_stability() -> TestResult {
- let mut hasher = XxHash64::with_seed(0);
- hasher.write(b"Hello, world!\0");
- hasher.finish();
-
- let serialized = r#"{
- "total_len": 14,
- "seed": 0,
- "core": {
- "v1": 6983438078262162902,
- "v2": 14029467366897019727,
- "v3": 0,
- "v4": 7046029288634856825
- },
- "buffer": [
- 72, 101, 108, 108, 111, 44, 32, 119,
- 111, 114, 108, 100, 33, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
- ],
- "buffer_usage": 14
- }"#;
-
- let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
- assert_eq!(hasher, unserialized);
- Ok(())
- }
-}
diff --git a/crates/twox-hash/src/std_support.rs b/crates/twox-hash/src/std_support.rs
deleted file mode 100644
index d79085e..0000000
--- a/crates/twox-hash/src/std_support.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-pub mod sixty_four {
- use crate::XxHash64;
- use core::hash::BuildHasher;
- use rand::{self, Rng};
-
- #[derive(Clone)]
- /// Constructs a randomized seed and reuses it for multiple hasher instances.
- pub struct RandomXxHashBuilder64(u64);
-
- impl RandomXxHashBuilder64 {
- fn new() -> RandomXxHashBuilder64 {
- RandomXxHashBuilder64(rand::thread_rng().gen())
- }
- }
-
- impl Default for RandomXxHashBuilder64 {
- fn default() -> RandomXxHashBuilder64 {
- RandomXxHashBuilder64::new()
- }
- }
-
- impl BuildHasher for RandomXxHashBuilder64 {
- type Hasher = XxHash64;
-
- fn build_hasher(&self) -> XxHash64 {
- XxHash64::with_seed(self.0)
- }
- }
-}
-
-pub mod thirty_two {
- use crate::XxHash32;
- use core::hash::BuildHasher;
- use rand::{self, Rng};
-
- #[derive(Clone)]
- /// Constructs a randomized seed and reuses it for multiple hasher instances. See the usage warning on `XxHash32`.
- pub struct RandomXxHashBuilder32(u32);
-
- impl RandomXxHashBuilder32 {
- fn new() -> RandomXxHashBuilder32 {
- RandomXxHashBuilder32(rand::thread_rng().gen())
- }
- }
-
- impl Default for RandomXxHashBuilder32 {
- fn default() -> RandomXxHashBuilder32 {
- RandomXxHashBuilder32::new()
- }
- }
-
- impl BuildHasher for RandomXxHashBuilder32 {
- type Hasher = XxHash32;
-
- fn build_hasher(&self) -> XxHash32 {
- XxHash32::with_seed(self.0)
- }
- }
-}
-
-pub mod xxh3 {
- use crate::xxh3::{Hash128, Hash64};
- use core::hash::BuildHasher;
- use rand::{self, Rng};
-
- #[derive(Clone)]
- /// Constructs a randomized seed and reuses it for multiple hasher instances.
- pub struct RandomHashBuilder64(u64);
-
- impl RandomHashBuilder64 {
- fn new() -> RandomHashBuilder64 {
- RandomHashBuilder64(rand::thread_rng().gen())
- }
- }
-
- impl Default for RandomHashBuilder64 {
- fn default() -> RandomHashBuilder64 {
- RandomHashBuilder64::new()
- }
- }
-
- impl BuildHasher for RandomHashBuilder64 {
- type Hasher = Hash64;
-
- fn build_hasher(&self) -> Hash64 {
- Hash64::with_seed(self.0)
- }
- }
-
- #[derive(Clone)]
- /// Constructs a randomized seed and reuses it for multiple hasher instances.
- pub struct RandomHashBuilder128(u64);
-
- impl RandomHashBuilder128 {
- fn new() -> RandomHashBuilder128 {
- RandomHashBuilder128(rand::thread_rng().gen())
- }
- }
-
- impl Default for RandomHashBuilder128 {
- fn default() -> RandomHashBuilder128 {
- RandomHashBuilder128::new()
- }
- }
-
- impl BuildHasher for RandomHashBuilder128 {
- type Hasher = Hash128;
-
- fn build_hasher(&self) -> Hash128 {
- Hash128::with_seed(self.0)
- }
- }
-}
diff --git a/crates/twox-hash/src/thirty_two.rs b/crates/twox-hash/src/thirty_two.rs
deleted file mode 100644
index cfa44cd..0000000
--- a/crates/twox-hash/src/thirty_two.rs
+++ /dev/null
@@ -1,416 +0,0 @@
-use crate::UnalignedBuffer;
-use core::{cmp, hash::Hasher};
-
-#[cfg(feature = "serialize")]
-use serde::{Deserialize, Serialize};
-
-const CHUNK_SIZE: usize = 16;
-
-pub const PRIME_1: u32 = 2_654_435_761;
-pub const PRIME_2: u32 = 2_246_822_519;
-pub const PRIME_3: u32 = 3_266_489_917;
-pub const PRIME_4: u32 = 668_265_263;
-pub const PRIME_5: u32 = 374_761_393;
-
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Copy, Clone, PartialEq)]
-struct XxCore {
- v1: u32,
- v2: u32,
- v3: u32,
- v4: u32,
-}
-
-/// Calculates the 32-bit hash. Care should be taken when using this
-/// hash.
-///
-/// Although this struct implements `Hasher`, it only calculates a
-/// 32-bit number, leaving the upper bits as 0. This means it is
-/// unlikely to be correct to use this in places like a `HashMap`.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct XxHash32 {
- total_len: u64,
- seed: u32,
- core: XxCore,
- #[cfg_attr(feature = "serialize", serde(flatten))]
- buffer: Buffer,
-}
-
-impl XxCore {
- fn with_seed(seed: u32) -> XxCore {
- XxCore {
- v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
- v2: seed.wrapping_add(PRIME_2),
- v3: seed,
- v4: seed.wrapping_sub(PRIME_1),
- }
- }
-
- #[inline(always)]
- fn ingest_chunks<I>(&mut self, values: I)
- where
- I: IntoIterator<Item = [u32; 4]>,
- {
- #[inline(always)]
- fn ingest_one_number(mut current_value: u32, mut value: u32) -> u32 {
- value = value.wrapping_mul(PRIME_2);
- current_value = current_value.wrapping_add(value);
- current_value = current_value.rotate_left(13);
- current_value.wrapping_mul(PRIME_1)
- }
-
- // By drawing these out, we can avoid going back and forth to
- // memory. It only really helps for large files, when we need
- // to iterate multiple times here.
-
- let mut v1 = self.v1;
- let mut v2 = self.v2;
- let mut v3 = self.v3;
- let mut v4 = self.v4;
-
- for [n1, n2, n3, n4] in values {
- v1 = ingest_one_number(v1, n1.to_le());
- v2 = ingest_one_number(v2, n2.to_le());
- v3 = ingest_one_number(v3, n3.to_le());
- v4 = ingest_one_number(v4, n4.to_le());
- }
-
- self.v1 = v1;
- self.v2 = v2;
- self.v3 = v3;
- self.v4 = v4;
- }
-
- #[inline(always)]
- fn finish(&self) -> u32 {
- // The original code pulls out local vars for v[1234]
- // here. Performance tests did not show that to be effective
- // here, presumably because this method is not called in a
- // tight loop.
-
- #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
- let mut hash;
-
- hash = self.v1.rotate_left(1);
- hash = hash.wrapping_add(self.v2.rotate_left(7));
- hash = hash.wrapping_add(self.v3.rotate_left(12));
- hash = hash.wrapping_add(self.v4.rotate_left(18));
-
- hash
- }
-}
-
-impl core::fmt::Debug for XxCore {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
- write!(
- f,
- "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
- self.v1, self.v2, self.v3, self.v4
- )
- }
-}
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-#[repr(align(4))]
-#[cfg_attr(feature = "serialize", serde(transparent))]
-struct AlignToU32<T>(T);
-
-#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
-#[derive(Debug, Copy, Clone, Default, PartialEq)]
-struct Buffer {
- #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
- data: AlignToU32<[u8; CHUNK_SIZE]>,
- #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
- len: usize,
-}
-
-impl Buffer {
- fn data(&self) -> &[u8] {
- &self.data.0[..self.len]
- }
-
- /// Consumes as much of the parameter as it can, returning the unused part.
- fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
- let to_use = cmp::min(self.available(), data.len());
- let (data, remaining) = data.split_at(to_use);
- self.data.0[self.len..][..to_use].copy_from_slice(data);
- self.len += to_use;
- remaining
- }
-
- fn set_data(&mut self, data: &[u8]) {
- debug_assert!(self.is_empty());
- debug_assert!(data.len() < CHUNK_SIZE);
- self.data.0[..data.len()].copy_from_slice(data);
- self.len = data.len();
- }
-
- fn available(&self) -> usize {
- CHUNK_SIZE - self.len
- }
-
- fn is_empty(&self) -> bool {
- self.len == 0
- }
-
- fn is_full(&self) -> bool {
- self.len == CHUNK_SIZE
- }
-}
-
-impl XxHash32 {
- /// Constructs the hash with an initial seed
- pub fn with_seed(seed: u32) -> XxHash32 {
- XxHash32 {
- total_len: 0,
- seed,
- core: XxCore::with_seed(seed),
- buffer: Buffer::default(),
- }
- }
-
- pub(crate) fn write(&mut self, bytes: &[u8]) {
- let remaining = self.maybe_consume_bytes(bytes);
- if !remaining.is_empty() {
- let mut remaining = UnalignedBuffer::new(remaining);
- self.core.ingest_chunks(&mut remaining);
- self.buffer.set_data(remaining.remaining());
- }
- self.total_len += bytes.len() as u64;
- }
-
- // Consume bytes and try to make `self.buffer` empty.
- // If there are not enough bytes, `self.buffer` can be non-empty, and this
- // function returns an empty slice.
- fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
- if self.buffer.is_empty() {
- data
- } else {
- let data = self.buffer.consume(data);
- if self.buffer.is_full() {
- let mut u32s = UnalignedBuffer::new(self.buffer.data());
- self.core.ingest_chunks(&mut u32s);
- debug_assert!(u32s.remaining().is_empty());
- self.buffer.len = 0;
- }
- data
- }
- }
-
- pub(crate) fn finish(&self) -> u32 {
- let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
- // We have processed at least one full chunk
- self.core.finish()
- } else {
- self.seed.wrapping_add(PRIME_5)
- };
-
- hash = hash.wrapping_add(self.total_len as u32);
-
- let mut buffered_u32s = UnalignedBuffer::<u32>::new(self.buffer.data());
- for buffered_u32 in &mut buffered_u32s {
- let k1 = buffered_u32.to_le().wrapping_mul(PRIME_3);
- hash = hash.wrapping_add(k1);
- hash = hash.rotate_left(17);
- hash = hash.wrapping_mul(PRIME_4);
- }
-
- let buffered_u8s = buffered_u32s.remaining();
- for &buffered_u8 in buffered_u8s {
- let k1 = u32::from(buffered_u8).wrapping_mul(PRIME_5);
- hash = hash.wrapping_add(k1);
- hash = hash.rotate_left(11);
- hash = hash.wrapping_mul(PRIME_1);
- }
-
- // The final intermixing
- hash ^= hash >> 15;
- hash = hash.wrapping_mul(PRIME_2);
- hash ^= hash >> 13;
- hash = hash.wrapping_mul(PRIME_3);
- hash ^= hash >> 16;
-
- hash
- }
-
- pub fn seed(&self) -> u32 {
- self.seed
- }
-
- /// Get the total number of bytes hashed, truncated to 32 bits.
- /// For the full 64-bit byte count, use `total_len_64`
- pub fn total_len(&self) -> u32 {
- self.total_len as u32
- }
-
- /// Get the total number of bytes hashed.
- pub fn total_len_64(&self) -> u64 {
- self.total_len
- }
-}
-
-impl Default for XxHash32 {
- fn default() -> XxHash32 {
- XxHash32::with_seed(0)
- }
-}
-
-impl Hasher for XxHash32 {
- fn finish(&self) -> u64 {
- u64::from(XxHash32::finish(self))
- }
-
- fn write(&mut self, bytes: &[u8]) {
- XxHash32::write(self, bytes)
- }
-}
-
-#[cfg(feature = "std")]
-pub use crate::std_support::thirty_two::RandomXxHashBuilder32;
-
-#[cfg(test)]
-mod test {
- use super::{RandomXxHashBuilder32, XxHash32};
- use std::collections::HashMap;
- use std::hash::BuildHasherDefault;
- use std::prelude::v1::*;
-
- #[test]
- fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
- let bytes: Vec<_> = (0..32).map(|_| 0).collect();
-
- let mut byte_by_byte = XxHash32::with_seed(0);
- for byte in bytes.chunks(1) {
- byte_by_byte.write(byte);
- }
-
- let mut one_chunk = XxHash32::with_seed(0);
- one_chunk.write(&bytes);
-
- assert_eq!(byte_by_byte.core, one_chunk.core);
- }
-
- #[test]
- fn hash_of_nothing_matches_c_implementation() {
- let mut hasher = XxHash32::with_seed(0);
- hasher.write(&[]);
- assert_eq!(hasher.finish(), 0x02cc_5d05);
- }
-
- #[test]
- fn hash_of_single_byte_matches_c_implementation() {
- let mut hasher = XxHash32::with_seed(0);
- hasher.write(&[42]);
- assert_eq!(hasher.finish(), 0xe0fe_705f);
- }
-
- #[test]
- fn hash_of_multiple_bytes_matches_c_implementation() {
- let mut hasher = XxHash32::with_seed(0);
- hasher.write(b"Hello, world!\0");
- assert_eq!(hasher.finish(), 0x9e5e_7e93);
- }
-
- #[test]
- fn hash_of_multiple_chunks_matches_c_implementation() {
- let bytes: Vec<_> = (0..100).collect();
- let mut hasher = XxHash32::with_seed(0);
- hasher.write(&bytes);
- assert_eq!(hasher.finish(), 0x7f89_ba44);
- }
-
- #[test]
- fn hash_with_different_seed_matches_c_implementation() {
- let mut hasher = XxHash32::with_seed(0x42c9_1977);
- hasher.write(&[]);
- assert_eq!(hasher.finish(), 0xd6bf_8459);
- }
-
- #[test]
- fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
- let bytes: Vec<_> = (0..100).collect();
- let mut hasher = XxHash32::with_seed(0x42c9_1977);
- hasher.write(&bytes);
- assert_eq!(hasher.finish(), 0x6d2f_6c17);
- }
-
- #[test]
- fn can_be_used_in_a_hashmap_with_a_default_seed() {
- let mut hash: HashMap<_, _, BuildHasherDefault<XxHash32>> = Default::default();
- hash.insert(42, "the answer");
- assert_eq!(hash.get(&42), Some(&"the answer"));
- }
-
- #[test]
- fn can_be_used_in_a_hashmap_with_a_random_seed() {
- let mut hash: HashMap<_, _, RandomXxHashBuilder32> = Default::default();
- hash.insert(42, "the answer");
- assert_eq!(hash.get(&42), Some(&"the answer"));
- }
-
- #[cfg(feature = "serialize")]
- type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
-
- #[cfg(feature = "serialize")]
- #[test]
- fn test_serialization_cycle() -> TestResult {
- let mut hasher = XxHash32::with_seed(0);
- hasher.write(b"Hello, world!\0");
- hasher.finish();
-
- let serialized = serde_json::to_string(&hasher)?;
- let unserialized: XxHash32 = serde_json::from_str(&serialized)?;
- assert_eq!(hasher, unserialized);
- Ok(())
- }
-
- #[cfg(feature = "serialize")]
- #[test]
- fn test_serialization_stability() -> TestResult {
- let mut hasher = XxHash32::with_seed(0);
- hasher.write(b"Hello, world!\0");
- hasher.finish();
-
- let serialized = r#"{
- "total_len": 14,
- "seed": 0,
- "core": {
- "v1": 606290984,
- "v2": 2246822519,
- "v3": 0,
- "v4": 1640531535
- },
- "buffer": [
- 72, 101, 108, 108, 111, 44, 32, 119,
- 111, 114, 108, 100, 33, 0, 0, 0
- ],
- "buffer_usage": 14
- }"#;
-
- let unserialized: XxHash32 = serde_json::from_str(serialized).unwrap();
- assert_eq!(hasher, unserialized);
- Ok(())
- }
-
- // This test validates wraparound/truncation behavior for very large inputs
- // of a 32-bit hash, but runs very slowly in the normal "cargo test"
- // build config since it hashes 4.3GB of data. It runs reasonably quick
- // under "cargo test --release".
- /*
- #[test]
- fn len_overflow_32bit() {
- // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32.
- let bytes200: Vec<u8> = (0..200).collect();
- let mut hasher = XxHash32::with_seed(0);
- for _ in 0..(4_300_000_000u64 / 200u64) {
- hasher.write(&bytes200);
- }
- assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00);
- assert_eq!(hasher.total_len(), 0x004c_cb00);
- // retult is tested against the C implementation
- assert_eq!(hasher.finish(), 0x1522_4ca7);
- }
- */
-}
diff --git a/crates/twox-hash/src/xxh3.rs b/crates/twox-hash/src/xxh3.rs
deleted file mode 100644
index 0ffc541..0000000
--- a/crates/twox-hash/src/xxh3.rs
+++ /dev/null
@@ -1,1666 +0,0 @@
-//! The in-progress XXH3 algorithm.
-//!
-//! Please read [the notes in original implementation][warning] to
-//! learn about when to use these algorithms. Specifically, the
-//! version of code this crate reproduces says:
-//!
-//! > The algorithm is currently in development, meaning its return
-//! values might still change in future versions. However, the API
-//! is stable, and can be used in production, typically for
-//! generation of ephemeral hashes (produced and consumed in same
-//! session).
-//!
-//! [warning]: https://github.com/Cyan4973/xxHash#new-hash-algorithms
-
-use alloc::vec::Vec;
-
-use core::convert::TryInto;
-use core::hash::Hasher;
-use core::mem;
-use core::ops::{Deref, DerefMut};
-use core::slice;
-
-#[cfg(target_arch = "x86")]
-use core::arch::x86::*;
-#[cfg(target_arch = "x86_64")]
-use core::arch::x86_64::*;
-
-use cfg_if::cfg_if;
-use static_assertions::{const_assert, const_assert_eq};
-
-#[cfg(feature = "serialize")]
-use serde::{Deserialize, Serialize};
-
-use crate::sixty_four::{
- PRIME_1 as PRIME64_1, PRIME_2 as PRIME64_2, PRIME_3 as PRIME64_3, PRIME_4 as PRIME64_4,
- PRIME_5 as PRIME64_5,
-};
-use crate::thirty_two::{PRIME_1 as PRIME32_1, PRIME_2 as PRIME32_2, PRIME_3 as PRIME32_3};
-
-#[cfg(feature = "std")]
-pub use crate::std_support::xxh3::{RandomHashBuilder128, RandomHashBuilder64};
-
-#[inline(always)]
-pub fn hash64(data: &[u8]) -> u64 {
- hash64_with_seed(data, 0)
-}
-
-#[inline(always)]
-pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 {
- let len = data.len();
-
- if len <= 16 {
- hash_len_0to16_64bits(data, len, &SECRET, seed)
- } else if len <= 128 {
- hash_len_17to128_64bits(data, len, &SECRET, seed)
- } else if len <= MIDSIZE_MAX {
- hash_len_129to240_64bits(data, len, &SECRET, seed)
- } else {
- hash_long_64bits_with_seed(data, len, seed)
- }
-}
-
-#[inline(always)]
-pub fn hash64_with_secret(data: &[u8], secret: &[u8]) -> u64 {
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- let len = data.len();
-
- if len <= 16 {
- hash_len_0to16_64bits(data, len, secret, 0)
- } else if len <= 128 {
- hash_len_17to128_64bits(data, len, secret, 0)
- } else if len <= MIDSIZE_MAX {
- hash_len_129to240_64bits(data, len, secret, 0)
- } else {
- hash_long_64bits_with_secret(data, len, secret)
- }
-}
-
-#[inline(always)]
-pub fn hash128(data: &[u8]) -> u128 {
- hash128_with_seed(data, 0)
-}
-
-#[inline(always)]
-pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 {
- let len = data.len();
-
- if len <= 16 {
- hash_len_0to16_128bits(data, len, &SECRET, seed)
- } else if len <= 128 {
- hash_len_17to128_128bits(data, len, &SECRET, seed)
- } else if len <= MIDSIZE_MAX {
- hash_len_129to240_128bits(data, len, &SECRET, seed)
- } else {
- hash_long_128bits_with_seed(data, len, seed)
- }
-}
-
-#[inline(always)]
-pub fn hash128_with_secret(data: &[u8], secret: &[u8]) -> u128 {
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- let len = data.len();
-
- if len <= 16 {
- hash_len_0to16_128bits(data, len, secret, 0)
- } else if len <= 128 {
- hash_len_17to128_128bits(data, len, secret, 0)
- } else if len <= MIDSIZE_MAX {
- hash_len_129to240_128bits(data, len, secret, 0)
- } else {
- hash_long_128bits_with_secret(data, len, secret)
- }
-}
-
-/// Calculates the 64-bit hash.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone, Default)]
-pub struct Hash64(State);
-
-impl Hash64 {
- pub fn with_seed(seed: u64) -> Self {
- Self(State::with_seed(seed))
- }
-
- pub fn with_secret<S: Into<Vec<u8>>>(secret: S) -> Self {
- Self(State::with_secret(secret))
- }
-}
-
-impl Hasher for Hash64 {
- #[inline(always)]
- fn finish(&self) -> u64 {
- self.0.digest64()
- }
-
- #[inline(always)]
- fn write(&mut self, bytes: &[u8]) {
- self.0.update(bytes, AccWidth::Acc64Bits)
- }
-}
-
-/// Calculates the 128-bit hash.
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone, Default)]
-pub struct Hash128(State);
-
-impl Hash128 {
- pub fn with_seed(seed: u64) -> Self {
- Self(State::with_seed(seed))
- }
-
- pub fn with_secret<S: Into<Vec<u8>>>(secret: S) -> Self {
- Self(State::with_secret(secret))
- }
-}
-
-impl Hasher for Hash128 {
- #[inline(always)]
- fn finish(&self) -> u64 {
- self.0.digest128() as u64
- }
-
- #[inline(always)]
- fn write(&mut self, bytes: &[u8]) {
- self.0.update(bytes, AccWidth::Acc128Bits)
- }
-}
-
-pub trait HasherExt: Hasher {
- fn finish_ext(&self) -> u128;
-}
-
-impl HasherExt for Hash128 {
- #[inline(always)]
- fn finish_ext(&self) -> u128 {
- self.0.digest128()
- }
-}
-
-/* ==========================================
- * XXH3 default settings
- * ========================================== */
-
-const SECRET_DEFAULT_SIZE: usize = 192;
-const SECRET_SIZE_MIN: usize = 136;
-
-const SECRET: Secret = Secret([
- 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
- 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
- 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
- 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
- 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
- 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
- 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
- 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
- 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
- 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
- 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
- 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
-]);
-
-#[repr(align(64))]
-#[derive(Clone)]
-struct Secret([u8; SECRET_DEFAULT_SIZE]);
-
-const_assert_eq!(mem::size_of::<Secret>() % 16, 0);
-
-impl Default for Secret {
- #[inline(always)]
- fn default() -> Self {
- SECRET
- }
-}
-
-impl Deref for Secret {
- type Target = [u8];
-
- #[inline(always)]
- fn deref(&self) -> &Self::Target {
- &self.0[..]
- }
-}
-
-cfg_if! {
- if #[cfg(feature = "serialize")] {
- impl Serialize for Secret {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- serializer.serialize_bytes(self)
- }
- }
-
- impl<'de> Deserialize<'de> for Secret {
- fn deserialize<D>(deserializer: D) -> Result<Secret, D::Error>
- where
- D: serde::Deserializer<'de>,
- {
- deserializer.deserialize_bytes(SecretVisitor)
- }
- }
-
- struct SecretVisitor;
-
- impl<'de> serde::de::Visitor<'de> for SecretVisitor {
- type Value = Secret;
-
- fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
- formatter.write_str("secret with a bytes array")
- }
-
- fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
- where
- E: serde::de::Error,
- {
- if v.len() == SECRET_DEFAULT_SIZE {
- let mut secret = [0; SECRET_DEFAULT_SIZE];
-
- secret.copy_from_slice(v);
-
- Ok(Secret(secret))
- } else {
- Err(E::custom("incomplete secret data"))
- }
- }
- }
- }
-}
-
-impl Secret {
- #[inline(always)]
- pub fn with_seed(seed: u64) -> Self {
- let mut secret = [0; SECRET_DEFAULT_SIZE];
-
- for off in (0..SECRET_DEFAULT_SIZE).step_by(16) {
- secret[off..].write_u64_le(SECRET[off..].read_u64_le().wrapping_add(seed));
- secret[off + 8..].write_u64_le(SECRET[off + 8..].read_u64_le().wrapping_sub(seed));
- }
-
- Secret(secret)
- }
-}
-
-cfg_if! {
- if #[cfg(target_feature = "avx2")] {
- #[repr(align(32))]
- #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
- #[derive(Clone)]
- struct Acc([u64; ACC_NB]);
- } else if #[cfg(target_feature = "sse2")] {
- #[repr(align(16))]
- #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
- #[derive(Clone)]
- struct Acc([u64; ACC_NB]);
- } else {
- #[repr(align(8))]
- #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
- #[derive(Clone)]
- struct Acc([u64; ACC_NB]);
- }
-}
-
-const ACC_SIZE: usize = mem::size_of::<Acc>();
-
-const_assert_eq!(ACC_SIZE, 64);
-
-impl Default for Acc {
- #[inline(always)]
- fn default() -> Self {
- Acc([
- u64::from(PRIME32_3),
- PRIME64_1,
- PRIME64_2,
- PRIME64_3,
- PRIME64_4,
- u64::from(PRIME32_2),
- PRIME64_5,
- u64::from(PRIME32_1),
- ])
- }
-}
-
-impl Deref for Acc {
- type Target = [u64];
-
- #[inline(always)]
- fn deref(&self) -> &Self::Target {
- &self.0
- }
-}
-
-impl DerefMut for Acc {
- #[inline(always)]
- fn deref_mut(&mut self) -> &mut Self::Target {
- &mut self.0
- }
-}
-
-trait Buf {
- fn read_u32_le(&self) -> u32;
-
- fn read_u64_le(&self) -> u64;
-}
-
-trait BufMut {
- fn write_u32_le(&mut self, n: u32);
-
- fn write_u64_le(&mut self, n: u64);
-}
-
-impl Buf for [u8] {
- #[inline(always)]
- fn read_u32_le(&self) -> u32 {
- let buf = &self[..mem::size_of::<u32>()];
- u32::from_le_bytes(buf.try_into().unwrap())
- }
-
- #[inline(always)]
- fn read_u64_le(&self) -> u64 {
- let buf = &self[..mem::size_of::<u64>()];
- u64::from_le_bytes(buf.try_into().unwrap())
- }
-}
-
-impl BufMut for [u8] {
- #[inline(always)]
- fn write_u32_le(&mut self, n: u32) {
- self[..mem::size_of::<u32>()].copy_from_slice(&n.to_le_bytes()[..]);
- }
-
- #[inline(always)]
- fn write_u64_le(&mut self, n: u64) {
- self[..mem::size_of::<u64>()].copy_from_slice(&n.to_le_bytes()[..]);
- }
-}
-
-/* ==========================================
- * Short keys
- * ========================================== */
-
-#[inline(always)]
-fn hash_len_0to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
- debug_assert!(len <= 16);
-
- if len > 8 {
- hash_len_9to16_64bits(data, len, key, seed)
- } else if len >= 4 {
- hash_len_4to8_64bits(data, len, key, seed)
- } else if len > 0 {
- hash_len_1to3_64bits(data, len, key, seed)
- } else {
- 0
- }
-}
-
-#[inline(always)]
-fn hash_len_9to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
- debug_assert!((9..=16).contains(&len));
-
- let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed);
- let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed);
- let acc = (len as u64)
- .wrapping_add(ll1)
- .wrapping_add(ll2)
- .wrapping_add(mul128_fold64(ll1, ll2));
-
- avalanche(acc)
-}
-
-#[inline(always)]
-fn hash_len_4to8_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
- debug_assert!((4..=8).contains(&len));
-
- let in1 = u64::from(data.read_u32_le());
- let in2 = u64::from(data[len - 4..].read_u32_le());
- let in64 = in1.wrapping_add(in2 << 32);
- let keyed = in64 ^ key.read_u64_le().wrapping_add(seed);
- let mix64 =
- (len as u64).wrapping_add((keyed ^ (keyed >> 51)).wrapping_mul(u64::from(PRIME32_1)));
-
- avalanche((mix64 ^ (mix64 >> 47)).wrapping_mul(PRIME64_2))
-}
-
-#[inline(always)]
-fn hash_len_1to3_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 {
- debug_assert!((1..=3).contains(&len));
-
- let c1 = u32::from(data[0]);
- let c2 = u32::from(data[len >> 1]);
- let c3 = u32::from(data[len - 1]);
- let combined = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24);
- let keyed = u64::from(combined) ^ u64::from(key.read_u32_le()).wrapping_add(seed);
- let mixed = keyed.wrapping_mul(PRIME64_1);
-
- avalanche(mixed)
-}
-
-#[inline(always)]
-fn hash_len_17to128_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 {
- debug_assert!((17..=128).contains(&len));
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- let mut acc = PRIME64_1.wrapping_mul(len as u64);
-
- if len > 32 {
- if len > 64 {
- if len > 96 {
- acc = acc
- .wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed))
- .wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed));
- }
- acc = acc
- .wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed))
- .wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed));
- }
-
- acc = acc
- .wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed))
- .wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed));
- }
-
- acc = acc
- .wrapping_add(mix_16bytes(data, secret, seed))
- .wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed));
-
- avalanche(acc)
-}
-
-const MIDSIZE_MAX: usize = 240;
-const MIDSIZE_STARTOFFSET: usize = 3;
-const MIDSIZE_LASTOFFSET: usize = 17;
-
-#[inline(always)]
-fn hash_len_129to240_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 {
- debug_assert!((129..=MIDSIZE_MAX).contains(&len));
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- let acc = (len as u64).wrapping_mul(PRIME64_1);
- let acc = (0..8).fold(acc, |acc, i| {
- acc.wrapping_add(mix_16bytes(&data[16 * i..], &secret[16 * i..], seed))
- });
- let acc = avalanche(acc);
-
- let nb_rounds = len / 16;
- debug_assert!(nb_rounds >= 8);
-
- let acc = (8..nb_rounds).fold(acc, |acc, i| {
- acc.wrapping_add(mix_16bytes(
- &data[16 * i..],
- &secret[16 * (i - 8) + MIDSIZE_STARTOFFSET..],
- seed,
- ))
- });
-
- avalanche(acc.wrapping_add(mix_16bytes(
- &data[len - 16..],
- &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..],
- seed,
- )))
-}
-
-/* ==========================================
- * Long keys
- * ========================================== */
-
-const STRIPE_LEN: usize = 64;
-const SECRET_CONSUME_RATE: usize = 8; // nb of secret bytes consumed at each accumulation
-const SECRET_MERGEACCS_START: usize = 11; // do not align on 8, so that secret is different from accumulator
-const SECRET_LASTACC_START: usize = 7; // do not align on 8, so that secret is different from scrambler
-const ACC_NB: usize = STRIPE_LEN / mem::size_of::<u64>();
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub(crate) enum AccWidth {
- Acc64Bits,
- Acc128Bits,
-}
-
-#[inline(always)]
-fn hash_long_64bits_with_default_secret(data: &[u8], len: usize) -> u64 {
- hash_long_internal(data, len, &SECRET)
-}
-
-#[inline(always)]
-fn hash_long_64bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u64 {
- hash_long_internal(data, len, secret)
-}
-
-/// Generate a custom key, based on alteration of default kSecret with the seed,
-/// and then use this key for long mode hashing.
-///
-/// This operation is decently fast but nonetheless costs a little bit of time.
-/// Try to avoid it whenever possible (typically when `seed.is_none()`).
-#[inline(always)]
-fn hash_long_64bits_with_seed(data: &[u8], len: usize, seed: u64) -> u64 {
- if seed == 0 {
- hash_long_64bits_with_default_secret(data, len)
- } else {
- let secret = Secret::with_seed(seed);
-
- hash_long_internal(data, len, &secret)
- }
-}
-
-#[inline(always)]
-fn hash_long_internal(data: &[u8], len: usize, secret: &[u8]) -> u64 {
- let mut acc = Acc::default();
-
- hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc64Bits);
-
- merge_accs(
- &acc,
- &secret[SECRET_MERGEACCS_START..],
- (len as u64).wrapping_mul(PRIME64_1),
- )
-}
-
-#[inline(always)]
-fn hash_long_internal_loop(
- acc: &mut [u64],
- data: &[u8],
- len: usize,
- secret: &[u8],
- acc_width: AccWidth,
-) {
- let secret_len = secret.len();
- let nb_rounds = (secret_len - STRIPE_LEN) / SECRET_CONSUME_RATE;
- let block_len = STRIPE_LEN * nb_rounds;
-
- debug_assert!(secret_len >= SECRET_SIZE_MIN);
-
- let mut chunks = data.chunks_exact(block_len);
-
- for chunk in &mut chunks {
- accumulate(acc, chunk, secret, nb_rounds, acc_width);
- unsafe {
- scramble_acc(acc, &secret[secret_len - STRIPE_LEN..]);
- }
- }
-
- /* last partial block */
- debug_assert!(len > STRIPE_LEN);
-
- let nb_stripes = (len % block_len) / STRIPE_LEN;
-
- debug_assert!(nb_stripes < (secret_len / SECRET_CONSUME_RATE));
-
- accumulate(acc, chunks.remainder(), secret, nb_stripes, acc_width);
-
- /* last stripe */
- if (len & (STRIPE_LEN - 1)) != 0 {
- unsafe {
- accumulate512(
- acc,
- &data[len - STRIPE_LEN..],
- &secret[secret_len - STRIPE_LEN - SECRET_LASTACC_START..],
- acc_width,
- );
- }
- }
-}
-
-#[inline(always)]
-fn accumulate(acc: &mut [u64], data: &[u8], secret: &[u8], nb_stripes: usize, acc_width: AccWidth) {
- for n in 0..nb_stripes {
- unsafe {
- accumulate512(
- acc,
- &data[n * STRIPE_LEN..],
- &secret[n * SECRET_CONSUME_RATE..],
- acc_width,
- );
- }
- }
-}
-
-#[inline(always)]
-const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
- ((z << 6) | (y << 4) | (x << 2) | w) as i32
-}
-
-#[cfg(target_feature = "avx2")]
-mod avx2 {
- use super::*;
-
- #[target_feature(enable = "avx2")]
- pub(crate) unsafe fn accumulate512(
- acc: &mut [u64],
- data: &[u8],
- keys: &[u8],
- acc_width: AccWidth,
- ) {
- let xacc = acc.as_mut_ptr() as *mut __m256i;
- let xdata = data.as_ptr() as *const __m256i;
- let xkey = keys.as_ptr() as *const __m256i;
-
- for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() {
- let d = _mm256_loadu_si256(xdata.add(i));
- let k = _mm256_loadu_si256(xkey.add(i));
- let dk = _mm256_xor_si256(d, k); // uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...}
- let mul = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...}
-
- xacc.add(i).write(if acc_width == AccWidth::Acc128Bits {
- let dswap = _mm256_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2));
- let add = _mm256_add_epi64(xacc.add(i).read(), dswap);
- _mm256_add_epi64(mul, add)
- } else {
- let add = _mm256_add_epi64(xacc.add(i).read(), d);
- _mm256_add_epi64(mul, add)
- })
- }
- }
-
- #[target_feature(enable = "avx2")]
- pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) {
- let xacc = acc.as_mut_ptr() as *mut __m256i;
- let xkey = key.as_ptr() as *const __m256i;
- let prime32 = _mm256_set1_epi32(PRIME32_1 as i32);
-
- for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() {
- let data = xacc.add(i).read();
- let shifted = _mm256_srli_epi64(data, 47);
- let data = _mm256_xor_si256(data, shifted);
-
- let k = _mm256_loadu_si256(xkey.add(i));
- let dk = _mm256_xor_si256(data, k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */
- let dk1 = _mm256_mul_epu32(dk, prime32);
-
- let d2 = _mm256_shuffle_epi32(dk, 0x31);
- let dk2 = _mm256_mul_epu32(d2, prime32);
- let dk2h = _mm256_slli_epi64(dk2, 32);
-
- xacc.add(i).write(_mm256_add_epi64(dk1, dk2h));
- }
- }
-}
-
-#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
-mod sse2 {
- use super::*;
-
- #[target_feature(enable = "sse2")]
- #[allow(clippy::cast_ptr_alignment)]
- pub(crate) unsafe fn accumulate512(
- acc: &mut [u64],
- data: &[u8],
- keys: &[u8],
- acc_width: AccWidth,
- ) {
- let xacc = acc.as_mut_ptr() as *mut __m128i;
- let xdata = data.as_ptr() as *const __m128i;
- let xkey = keys.as_ptr() as *const __m128i;
-
- for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() {
- let d = _mm_loadu_si128(xdata.add(i));
- let k = _mm_loadu_si128(xkey.add(i));
- let dk = _mm_xor_si128(d, k); // uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */
- let mul = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */
- xacc.add(i).write(if acc_width == AccWidth::Acc128Bits {
- let dswap = _mm_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2));
- let add = _mm_add_epi64(xacc.add(i).read(), dswap);
- _mm_add_epi64(mul, add)
- } else {
- let add = _mm_add_epi64(xacc.add(i).read(), d);
- _mm_add_epi64(mul, add)
- })
- }
- }
-
- #[target_feature(enable = "sse2")]
- #[allow(clippy::cast_ptr_alignment)]
- pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) {
- let xacc = acc.as_mut_ptr() as *mut __m128i;
- let xkey = key.as_ptr() as *const __m128i;
- let prime32 = _mm_set1_epi32(PRIME32_1 as i32);
-
- for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() {
- let data = xacc.add(i).read();
- let shifted = _mm_srli_epi64(data, 47);
- let data = _mm_xor_si128(data, shifted);
-
- let k = _mm_loadu_si128(xkey.add(i));
- let dk = _mm_xor_si128(data, k);
-
- let dk1 = _mm_mul_epu32(dk, prime32);
-
- let d2 = _mm_shuffle_epi32(dk, 0x31);
- let dk2 = _mm_mul_epu32(d2, prime32);
- let dk2h = _mm_slli_epi64(dk2, 32);
-
- xacc.add(i).write(_mm_add_epi64(dk1, dk2h));
- }
- }
-}
-
-#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))]
-mod generic {
- use super::*;
-
- #[inline(always)]
- pub(crate) unsafe fn accumulate512(
- acc: &mut [u64],
- data: &[u8],
- key: &[u8],
- acc_width: AccWidth,
- ) {
- for i in (0..ACC_NB).step_by(2) {
- let in1 = data[8 * i..].read_u64_le();
- let in2 = data[8 * (i + 1)..].read_u64_le();
- let key1 = key[8 * i..].read_u64_le();
- let key2 = key[8 * (i + 1)..].read_u64_le();
- let data_key1 = key1 ^ in1;
- let data_key2 = key2 ^ in2;
- acc[i] = acc[i].wrapping_add(mul32_to64(data_key1, data_key1 >> 32));
- acc[i + 1] = acc[i + 1].wrapping_add(mul32_to64(data_key2, data_key2 >> 32));
-
- if acc_width == AccWidth::Acc128Bits {
- acc[i] = acc[i].wrapping_add(in2);
- acc[i + 1] = acc[i + 1].wrapping_add(in1);
- } else {
- acc[i] = acc[i].wrapping_add(in1);
- acc[i + 1] = acc[i + 1].wrapping_add(in2);
- }
- }
- }
-
- #[inline(always)]
- fn mul32_to64(a: u64, b: u64) -> u64 {
- (a & 0xFFFFFFFF).wrapping_mul(b & 0xFFFFFFFF)
- }
-
- #[inline(always)]
- pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) {
- for i in 0..ACC_NB {
- let key64 = key[8 * i..].read_u64_le();
- let mut acc64 = acc[i];
- acc64 ^= acc64 >> 47;
- acc64 ^= key64;
- acc64 = acc64.wrapping_mul(u64::from(PRIME32_1));
- acc[i] = acc64;
- }
- }
-}
-
-cfg_if! {
- if #[cfg(target_feature = "avx2")] {
- use avx2::{accumulate512, scramble_acc};
- } else if #[cfg(target_feature = "sse2")] {
- use sse2::{accumulate512, scramble_acc};
- } else {
- use generic::{accumulate512, scramble_acc};
- }
-}
-
-#[inline(always)]
-fn merge_accs(acc: &[u64], secret: &[u8], start: u64) -> u64 {
- avalanche(
- start
- .wrapping_add(mix2accs(acc, secret))
- .wrapping_add(mix2accs(&acc[2..], &secret[16..]))
- .wrapping_add(mix2accs(&acc[4..], &secret[32..]))
- .wrapping_add(mix2accs(&acc[6..], &secret[48..])),
- )
-}
-
-#[inline(always)]
-fn mix2accs(acc: &[u64], secret: &[u8]) -> u64 {
- mul128_fold64(
- acc[0] ^ secret.read_u64_le(),
- acc[1] ^ secret[8..].read_u64_le(),
- )
-}
-
-#[inline(always)]
-fn mix_16bytes(data: &[u8], key: &[u8], seed: u64) -> u64 {
- let ll1 = data.read_u64_le();
- let ll2 = data[8..].read_u64_le();
-
- mul128_fold64(
- ll1 ^ key.read_u64_le().wrapping_add(seed),
- ll2 ^ key[8..].read_u64_le().wrapping_sub(seed),
- )
-}
-
-#[inline(always)]
-fn mul128_fold64(ll1: u64, ll2: u64) -> u64 {
- let lll = u128::from(ll1).wrapping_mul(u128::from(ll2));
-
- (lll as u64) ^ ((lll >> 64) as u64)
-}
-
-#[inline(always)]
-fn avalanche(mut h64: u64) -> u64 {
- h64 ^= h64 >> 37;
- h64 = h64.wrapping_mul(PRIME64_3);
- h64 ^ (h64 >> 32)
-}
-
-/* === XXH3 streaming === */
-
-const INTERNAL_BUFFER_SIZE: usize = 256;
-const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN;
-
-const_assert!(INTERNAL_BUFFER_SIZE >= MIDSIZE_MAX);
-const_assert_eq!(INTERNAL_BUFFER_SIZE % STRIPE_LEN, 0);
-
-#[repr(align(64))]
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone)]
-struct State {
- acc: Acc,
- secret: With,
- buf: Vec<u8>,
- seed: u64,
- total_len: usize,
- nb_stripes_so_far: usize,
-}
-
-#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
-#[derive(Clone)]
-enum With {
- Default(Secret),
- Custom(Secret),
- Ref(Vec<u8>),
-}
-
-impl Deref for With {
- type Target = [u8];
-
- fn deref(&self) -> &Self::Target {
- match self {
- With::Default(secret) | With::Custom(secret) => &secret.0[..],
- With::Ref(secret) => secret,
- }
- }
-}
-
-impl Default for State {
- fn default() -> Self {
- Self::new(0, With::Default(Secret::default()))
- }
-}
-
-impl State {
- fn new(seed: u64, secret: With) -> Self {
- State {
- acc: Acc::default(),
- secret,
- buf: Vec::with_capacity(INTERNAL_BUFFER_SIZE),
- seed,
- total_len: 0,
- nb_stripes_so_far: 0,
- }
- }
-
- fn with_seed(seed: u64) -> Self {
- Self::new(seed, With::Custom(Secret::with_seed(seed)))
- }
-
- fn with_secret<S: Into<Vec<u8>>>(secret: S) -> State {
- let secret = secret.into();
-
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- Self::new(0, With::Ref(secret))
- }
-
- #[inline(always)]
- fn secret_limit(&self) -> usize {
- self.secret.len() - STRIPE_LEN
- }
-
- #[inline(always)]
- fn nb_stripes_per_block(&self) -> usize {
- self.secret_limit() / SECRET_CONSUME_RATE
- }
-
- #[inline(always)]
- fn update(&mut self, mut input: &[u8], acc_width: AccWidth) {
- let len = input.len();
-
- if len == 0 {
- return;
- }
-
- self.total_len += len;
-
- if self.buf.len() + len <= self.buf.capacity() {
- self.buf.extend_from_slice(input);
- return;
- }
-
- let nb_stripes_per_block = self.nb_stripes_per_block();
- let secret_limit = self.secret_limit();
-
- if !self.buf.is_empty() {
- // some data within internal buffer: fill then consume it
- let (load, rest) = input.split_at(self.buf.capacity() - self.buf.len());
- self.buf.extend_from_slice(load);
- input = rest;
- self.nb_stripes_so_far = consume_stripes(
- &mut self.acc,
- self.nb_stripes_so_far,
- nb_stripes_per_block,
- &self.buf,
- INTERNAL_BUFFER_STRIPES,
- &self.secret,
- secret_limit,
- acc_width,
- );
- self.buf.clear();
- }
-
- // consume input by full buffer quantities
- let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE);
-
- for chunk in &mut chunks {
- self.nb_stripes_so_far = consume_stripes(
- &mut self.acc,
- self.nb_stripes_so_far,
- nb_stripes_per_block,
- chunk,
- INTERNAL_BUFFER_STRIPES,
- &self.secret,
- secret_limit,
- acc_width,
- );
- }
-
- // some remaining input data : buffer it
- self.buf.extend_from_slice(chunks.remainder())
- }
-
- #[inline(always)]
- fn digest_long(&self, acc_width: AccWidth) -> Acc {
- let mut acc = self.acc.clone();
- let secret_limit = self.secret_limit();
-
- if self.buf.len() >= STRIPE_LEN {
- // digest locally, state remains unaltered, and can continue ingesting more data afterwards
- let total_nb_stripes = self.buf.len() / STRIPE_LEN;
- let _nb_stripes_so_far = consume_stripes(
- &mut acc,
- self.nb_stripes_so_far,
- self.nb_stripes_per_block(),
- &self.buf,
- total_nb_stripes,
- &self.secret,
- secret_limit,
- acc_width,
- );
- if (self.buf.len() % STRIPE_LEN) != 0 {
- unsafe {
- accumulate512(
- &mut acc,
- &self.buf[self.buf.len() - STRIPE_LEN..],
- &self.secret[secret_limit - SECRET_LASTACC_START..],
- acc_width,
- );
- }
- }
- } else if !self.buf.is_empty() {
- // one last stripe
- let mut last_stripe = [0u8; STRIPE_LEN];
- let catchup_size = STRIPE_LEN - self.buf.len();
-
- last_stripe[..catchup_size].copy_from_slice(unsafe {
- slice::from_raw_parts(
- self.buf.as_ptr().add(self.buf.capacity() - catchup_size),
- catchup_size,
- )
- });
- last_stripe[catchup_size..].copy_from_slice(&self.buf);
-
- unsafe {
- accumulate512(
- &mut acc,
- &last_stripe[..],
- &self.secret[secret_limit - SECRET_LASTACC_START..],
- acc_width,
- );
- }
- }
-
- acc
- }
-
- #[inline(always)]
- fn digest64(&self) -> u64 {
- if self.total_len > MIDSIZE_MAX {
- let acc = self.digest_long(AccWidth::Acc64Bits);
-
- merge_accs(
- &acc,
- &self.secret[SECRET_MERGEACCS_START..],
- (self.total_len as u64).wrapping_mul(PRIME64_1),
- )
- } else if self.seed != 0 {
- hash64_with_seed(&self.buf, self.seed)
- } else {
- hash64_with_secret(&self.buf, &self.secret[..self.secret_limit() + STRIPE_LEN])
- }
- }
-
- #[inline(always)]
- fn digest128(&self) -> u128 {
- let secret_limit = self.secret_limit();
-
- if self.total_len > MIDSIZE_MAX {
- let acc = self.digest_long(AccWidth::Acc128Bits);
-
- debug_assert!(secret_limit + STRIPE_LEN >= ACC_SIZE + SECRET_MERGEACCS_START);
-
- let total_len = self.total_len as u64;
-
- let low64 = merge_accs(
- &acc,
- &self.secret[SECRET_MERGEACCS_START..],
- total_len.wrapping_mul(PRIME64_1),
- );
- let high64 = merge_accs(
- &acc,
- &self.secret[secret_limit + STRIPE_LEN - ACC_SIZE - SECRET_MERGEACCS_START..],
- !total_len.wrapping_mul(PRIME64_2),
- );
-
- u128::from(low64) + (u128::from(high64) << 64)
- } else if self.seed != 0 {
- hash128_with_seed(&self.buf, self.seed)
- } else {
- hash128_with_secret(&self.buf, &self.secret[..secret_limit + STRIPE_LEN])
- }
- }
-}
-
-#[inline(always)]
-#[allow(clippy::too_many_arguments)]
-fn consume_stripes(
- acc: &mut [u64],
- nb_stripes_so_far: usize,
- nb_stripes_per_block: usize,
- data: &[u8],
- total_stripes: usize,
- secret: &[u8],
- secret_limit: usize,
- acc_width: AccWidth,
-) -> usize {
- debug_assert!(nb_stripes_so_far < nb_stripes_per_block);
-
- if nb_stripes_per_block - nb_stripes_so_far <= total_stripes {
- let nb_stripes = nb_stripes_per_block - nb_stripes_so_far;
-
- accumulate(
- acc,
- data,
- &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..],
- nb_stripes,
- acc_width,
- );
- unsafe {
- scramble_acc(acc, &secret[secret_limit..]);
- }
- accumulate(
- acc,
- &data[nb_stripes * STRIPE_LEN..],
- secret,
- total_stripes - nb_stripes,
- acc_width,
- );
-
- total_stripes - nb_stripes
- } else {
- accumulate(
- acc,
- data,
- &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..],
- total_stripes,
- acc_width,
- );
-
- nb_stripes_so_far + total_stripes
- }
-}
-
-/* ==========================================
- * XXH3 128 bits (=> XXH128)
- * ========================================== */
-
-#[inline(always)]
-fn hash_len_0to16_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 {
- debug_assert!(len <= 16);
-
- if len > 8 {
- hash_len_9to16_128bits(data, len, secret, seed)
- } else if len >= 4 {
- hash_len_4to8_128bits(data, len, secret, seed)
- } else if len > 0 {
- hash_len_1to3_128bits(data, len, secret, seed)
- } else {
- 0
- }
-}
-
-#[inline(always)]
-fn hash_len_1to3_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 {
- debug_assert!((1..=3).contains(&len));
-
- let c1 = u32::from(data[0]);
- let c2 = u32::from(data[len >> 1]);
- let c3 = u32::from(data[len - 1]);
- let combinedl = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24);
- let combinedh = combinedl.swap_bytes();
- let keyedl = u64::from(combinedl) ^ u64::from(key.read_u32_le()).wrapping_add(seed);
- let keyedh = u64::from(combinedh) ^ u64::from(key[4..].read_u32_le()).wrapping_sub(seed);
- let mixedl = keyedl.wrapping_mul(PRIME64_1);
- let mixedh = keyedh.wrapping_mul(PRIME64_2);
-
- u128::from(avalanche(mixedl)) + (u128::from(avalanche(mixedh)) << 64)
-}
-
-#[inline(always)]
-fn hash_len_4to8_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 {
- debug_assert!((4..=8).contains(&len));
-
- let in1 = u64::from(data.read_u32_le());
- let in2 = u64::from(data[len - 4..].read_u32_le());
- let in64l = in1.wrapping_add(in2 << 32);
- let in64h = in64l.swap_bytes();
- let keyedl = in64l ^ key.read_u64_le().wrapping_add(seed);
- let keyedh = in64h ^ key[8..].read_u64_le().wrapping_sub(seed);
- let mix64l1 =
- (len as u64).wrapping_add((keyedl ^ (keyedl >> 51)).wrapping_mul(u64::from(PRIME32_1)));
- let mix64l2 = (mix64l1 ^ (mix64l1 >> 47)).wrapping_mul(PRIME64_2);
- let mix64h1 = (keyedh ^ (keyedh >> 47))
- .wrapping_mul(PRIME64_1)
- .wrapping_sub(len as u64);
- let mix64h2 = (mix64h1 ^ (mix64h1 >> 43)).wrapping_mul(PRIME64_4);
-
- u128::from(avalanche(mix64l2)) + (u128::from(avalanche(mix64h2)) << 64)
-}
-
-#[inline(always)]
-fn hash_len_9to16_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 {
- debug_assert!((9..=16).contains(&len));
-
- let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed);
- let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed);
- let inlow = ll1 ^ ll2;
-
- let m128 = u128::from(inlow).wrapping_mul(u128::from(PRIME64_1));
- let high64 = ((m128 >> 64) as u64).wrapping_add(ll2.wrapping_mul(PRIME64_1));
- let low64 = (m128 as u64) ^ (high64 >> 32);
-
- let h128 = u128::from(low64).wrapping_mul(u128::from(PRIME64_2));
- let high64 = ((h128 >> 64) as u64).wrapping_add(high64.wrapping_mul(PRIME64_2));
- let low64 = h128 as u64;
-
- u128::from(avalanche(low64)) + (u128::from(avalanche(high64)) << 64)
-}
-
-#[inline(always)]
-fn hash_len_17to128_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 {
- debug_assert!((17..=128).contains(&len));
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- let mut acc1 = PRIME64_1.wrapping_mul(len as u64);
- let mut acc2 = 0u64;
-
- if len > 32 {
- if len > 64 {
- if len > 96 {
- acc1 = acc1.wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed));
- acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed));
- }
- acc1 = acc1.wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed));
- acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed));
- }
-
- acc1 = acc1.wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed));
- acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed));
- }
-
- acc1 = acc1.wrapping_add(mix_16bytes(data, secret, seed));
- acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed));
-
- let low64 = acc1.wrapping_add(acc2);
- let high64 = acc1
- .wrapping_mul(PRIME64_1)
- .wrapping_add(acc2.wrapping_mul(PRIME64_4))
- .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2));
-
- u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64)
-}
-
-#[inline(always)]
-fn hash_len_129to240_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 {
- debug_assert!((129..=MIDSIZE_MAX).contains(&len));
- debug_assert!(secret.len() >= SECRET_SIZE_MIN);
-
- let acc1 = (len as u64).wrapping_mul(PRIME64_1);
- let acc2 = 0u64;
-
- let (acc1, acc2) = (0..4).fold((acc1, acc2), |(acc1, acc2), i| {
- (
- acc1.wrapping_add(mix_16bytes(&data[32 * i..], &secret[32 * i..], seed)),
- acc2.wrapping_add(mix_16bytes(
- &data[32 * i + 16..],
- &secret[32 * i + 16..],
- 0u64.wrapping_sub(seed),
- )),
- )
- });
- let acc1 = avalanche(acc1);
- let acc2 = avalanche(acc2);
-
- let nb_rounds = len / 32;
- debug_assert!(nb_rounds >= 4);
-
- let (acc1, acc2) = (4..nb_rounds).fold((acc1, acc2), |(acc1, acc2), i| {
- (
- acc1.wrapping_add(mix_16bytes(
- &data[32 * i..],
- &secret[32 * (i - 4) + MIDSIZE_STARTOFFSET..],
- seed,
- )),
- acc2.wrapping_add(mix_16bytes(
- &data[32 * i + 16..],
- &secret[32 * (i - 4) + 16 + MIDSIZE_STARTOFFSET..],
- 0u64.wrapping_sub(seed),
- )),
- )
- });
-
- // last bytes
- let acc1 = acc1.wrapping_add(mix_16bytes(
- &data[len - 16..],
- &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..],
- seed,
- ));
- let acc2 = acc2.wrapping_add(mix_16bytes(
- &data[len - 32..],
- &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET - 16..],
- 0u64.wrapping_sub(seed),
- ));
-
- let low64 = acc1.wrapping_add(acc2);
- let high64 = acc1
- .wrapping_mul(PRIME64_1)
- .wrapping_add(acc2.wrapping_mul(PRIME64_4))
- .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2));
-
- u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64)
-}
-
-#[inline]
-fn hash_long_128bits_with_default_secret(data: &[u8], len: usize) -> u128 {
- hash_long_128bits_internal(data, len, &SECRET)
-}
-
-#[inline]
-fn hash_long_128bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u128 {
- hash_long_128bits_internal(data, len, secret)
-}
-
-#[inline]
-fn hash_long_128bits_with_seed(data: &[u8], len: usize, seed: u64) -> u128 {
- if seed == 0 {
- hash_long_128bits_with_default_secret(data, len)
- } else {
- let secret = Secret::with_seed(seed);
-
- hash_long_128bits_internal(data, len, &secret)
- }
-}
-
-#[inline(always)]
-fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 {
- let mut acc = Acc::default();
-
- hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc128Bits);
-
- debug_assert!(secret.len() >= acc.len() + SECRET_MERGEACCS_START);
-
- let low64 = merge_accs(
- &acc,
- &secret[SECRET_MERGEACCS_START..],
- (len as u64).wrapping_mul(PRIME64_1),
- );
- let high64 = merge_accs(
- &acc,
- &secret[secret.len() - ACC_SIZE - SECRET_MERGEACCS_START..],
- !(len as u64).wrapping_mul(PRIME64_2),
- );
-
- u128::from(low64) + (u128::from(high64) << 64)
-}
-
-/* === XXH3 128-bit streaming === */
-
-/* all the functions are actually the same as for 64-bit streaming variant,
-just the reset one is different (different initial acc values for 0,5,6,7),
-and near the end of the digest function */
-
-#[cfg(test)]
-mod tests {
- use alloc::vec;
-
- use super::*;
-
- const PRIME: u64 = 2654435761;
- const PRIME64: u64 = 11400714785074694797;
- const SANITY_BUFFER_SIZE: usize = 2243;
-
- fn sanity_buffer() -> [u8; SANITY_BUFFER_SIZE] {
- let mut buf = [0; SANITY_BUFFER_SIZE];
- let mut byte_gen: u64 = PRIME;
-
- for b in buf.iter_mut() {
- *b = (byte_gen >> 56) as u8;
- byte_gen = byte_gen.wrapping_mul(PRIME64);
- }
-
- buf
- }
-
- #[test]
- fn hash_64bits_sanity_check() {
- let buf = sanity_buffer();
-
- let test_cases = vec![
- (&[][..], 0, 0), /* zero-length hash is always 0 */
- (&[][..], PRIME64, 0),
- (&buf[..1], 0, 0x7198D737CFE7F386), /* 1 - 3 */
- (&buf[..1], PRIME64, 0xB70252DB7161C2BD), /* 1 - 3 */
- (&buf[..6], 0, 0x22CBF5F3E1F6257C), /* 4 - 8 */
- (&buf[..6], PRIME64, 0x6398631C12AB94CE), /* 4 - 8 */
- (&buf[..12], 0, 0xD5361CCEEBB5A0CC), /* 9 - 16 */
- (&buf[..12], PRIME64, 0xC4C125E75A808C3D), /* 9 - 16 */
- (&buf[..24], 0, 0x46796F3F78B20F6B), /* 17 - 32 */
- (&buf[..24], PRIME64, 0x60171A7CD0A44C10), /* 17 - 32 */
- (&buf[..48], 0, 0xD8D4D3590D136E11), /* 33 - 64 */
- (&buf[..48], PRIME64, 0x05441F2AEC2A1296), /* 33 - 64 */
- (&buf[..80], 0, 0xA1DC8ADB3145B86A), /* 65 - 96 */
- (&buf[..80], PRIME64, 0xC9D55256965B7093), /* 65 - 96 */
- (&buf[..112], 0, 0xE43E5717A61D3759), /* 97 -128 */
- (&buf[..112], PRIME64, 0x5A5F89A3FECE44A5), /* 97 -128 */
- (&buf[..195], 0, 0x6F747739CBAC22A5), /* 129-240 */
- (&buf[..195], PRIME64, 0x33368E23C7F95810), /* 129-240 */
- (&buf[..403], 0, 0x4834389B15D981E8), /* one block, last stripe is overlapping */
- (&buf[..403], PRIME64, 0x85CE5DFFC7B07C87), /* one block, last stripe is overlapping */
- (&buf[..512], 0, 0x6A1B982631F059A8), /* one block, finishing at stripe boundary */
- (&buf[..512], PRIME64, 0x10086868CF0ADC99), /* one block, finishing at stripe boundary */
- (&buf[..2048], 0, 0xEFEFD4449323CDD4), /* 2 blocks, finishing at block boundary */
- (&buf[..2048], PRIME64, 0x01C85E405ECA3F6E), /* 2 blocks, finishing at block boundary */
- (&buf[..2240], 0, 0x998C0437486672C7), /* 3 blocks, finishing at stripe boundary */
- (&buf[..2240], PRIME64, 0x4ED38056B87ABC7F), /* 3 blocks, finishing at stripe boundary */
- (&buf[..2243], 0, 0xA559D20581D742D3), /* 3 blocks, last stripe is overlapping */
- (&buf[..2243], PRIME64, 0x96E051AB57F21FC8), /* 3 blocks, last stripe is overlapping */
- ];
-
- for (buf, seed, result) in test_cases {
- {
- let hash = hash64_with_seed(buf, seed);
-
- assert_eq!(
- hash,
- result,
- "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
-
- // streaming API test
-
- // single ingestio
- {
- let mut hasher = Hash64::with_seed(seed);
- hasher.write(buf);
- let hash = hasher.finish();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
-
- if buf.len() > 3 {
- // 2 ingestions
- let mut hasher = Hash64::with_seed(seed);
- hasher.write(&buf[..3]);
- hasher.write(&buf[3..]);
- let hash = hasher.finish();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
-
- // byte by byte ingestion
- {
- let mut hasher = Hash64::with_seed(seed);
-
- for chunk in buf.chunks(1) {
- hasher.write(chunk);
- }
-
- let hash = hasher.finish();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
- }
- }
-
- #[test]
- fn hash_64bits_with_secret_sanity_check() {
- let buf = sanity_buffer();
- let secret = &buf[7..7 + SECRET_SIZE_MIN + 11];
-
- let test_cases = vec![
- (&[][..], secret, 0), /* zero-length hash is always 0 */
- (&buf[..1], secret, 0x7F69735D618DB3F0), /* 1 - 3 */
- (&buf[..6], secret, 0xBFCC7CB1B3554DCE), /* 6 - 8 */
- (&buf[..12], secret, 0x8C50DC90AC9206FC), /* 9 - 16 */
- (&buf[..24], secret, 0x1CD2C2EE9B9A0928), /* 17 - 32 */
- (&buf[..48], secret, 0xA785256D9D65D514), /* 33 - 64 */
- (&buf[..80], secret, 0x6F3053360D21BBB7), /* 65 - 96 */
- (&buf[..112], secret, 0x560E82D25684154C), /* 97 -128 */
- (&buf[..195], secret, 0xBA5BDDBC5A767B11), /* 129-240 */
- (&buf[..403], secret, 0xFC3911BBA656DB58), /* one block, last stripe is overlapping */
- (&buf[..512], secret, 0x306137DD875741F1), /* one block, finishing at stripe boundary */
- (&buf[..2048], secret, 0x2836B83880AD3C0C), /* > one block, at least one scrambling */
- (&buf[..2243], secret, 0x3446E248A00CB44A), /* > one block, at least one scrambling, last stripe unaligned */
- ];
-
- for (buf, secret, result) in test_cases {
- {
- let hash = hash64_with_secret(buf, secret);
-
- assert_eq!(
- hash,
- result,
- "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- hash,
- result
- );
- }
-
- // streaming API test
-
- // single ingestio
- {
- let mut hasher = Hash64::with_secret(secret);
- hasher.write(buf);
- let hash = hasher.finish();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- hash,
- result
- );
- }
-
- // byte by byte ingestion
- {
- let mut hasher = Hash64::with_secret(secret);
-
- for chunk in buf.chunks(1) {
- hasher.write(chunk);
- }
-
- let hash = hasher.finish();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- hash,
- result
- );
- }
- }
- }
-
- #[test]
- fn hash_128bits_sanity_check() {
- let buf = sanity_buffer();
-
- let test_cases = vec![
- (&[][..], 0, 0u64, 0u64), /* zero-length hash is { seed, -seed } by default */
- (&[][..], PRIME, 0, 0),
- (&buf[..1], 0, 0x7198D737CFE7F386, 0x3EE70EA338F3F1E8), /* 1-3 */
- (&buf[..1], PRIME, 0x8E05996EC27C0F46, 0x90DFC659A8BDCC0C), /* 1-3 */
- (&buf[..6], 0, 0x22CBF5F3E1F6257C, 0xD4E6C2B94FFC3BFA), /* 4-8 */
- (&buf[..6], PRIME, 0x97B28D3079F8541F, 0xEFC0B954298E6555), /* 4-8 */
- (&buf[..12], 0, 0x0E0CD01F05AC2F0D, 0x2B55C95951070D4B), /* 9-16 */
- (&buf[..12], PRIME, 0xA9DE561CA04CDF37, 0x609E31FDC00A43C9), /* 9-16 */
- (&buf[..24], 0, 0x46796F3F78B20F6B, 0x58FF55C3926C13FA), /* 17-32 */
- (&buf[..24], PRIME, 0x30D5C4E9EB415C55, 0x8868344B3A4645D0), /* 17-32 */
- (&buf[..48], 0, 0xD8D4D3590D136E11, 0x5527A42843020A62), /* 33-64 */
- (&buf[..48], PRIME, 0x1D8834E1A5407A1C, 0x44375B9FB060F541), /* 33-64 */
- (&buf[..81], 0, 0x4B9B448ED8DFD3DD, 0xE805A6D1A43D70E5), /* 65-96 */
- (&buf[..81], PRIME, 0xD2D6B075945617BA, 0xE58BE5736F6E7550), /* 65-96 */
- (&buf[..103], 0, 0xC5A9F97B29EFA44E, 0x254DB7BE881E125C), /* 97-128 */
- (&buf[..103], PRIME, 0xFA2086367CDB177F, 0x0AEDEA68C988B0C0), /* 97-128 */
- (&buf[..192], 0, 0xC3142FDDD9102A3F, 0x06F1747E77185F97), /* 129-240 */
- (&buf[..192], PRIME, 0xA89F07B35987540F, 0xCF1B35FB2C557F54), /* 129-240 */
- (&buf[..222], 0, 0xA61AC4EB3295F86B, 0x33FA7B7598C28A07), /* 129-240 */
- (&buf[..222], PRIME, 0x54135EB88AD8B75E, 0xBC45CE6AE50BCF53), /* 129-240 */
- (&buf[..403], 0, 0xB0C48E6D18E9D084, 0xB16FC17E992FF45D), /* one block, last stripe is overlapping */
- (&buf[..403], PRIME64, 0x0A1D320C9520871D, 0xCE11CB376EC93252), /* one block, last stripe is overlapping */
- (&buf[..512], 0, 0xA03428558AC97327, 0x4ECF51281BA406F7), /* one block, finishing at stripe boundary */
- (&buf[..512], PRIME64, 0xAF67A482D6C893F2, 0x1382D92F25B84D90), /* one block, finishing at stripe boundary */
- (&buf[..2048], 0, 0x21901B416B3B9863, 0x212AF8E6326F01E0), /* two blocks, finishing at block boundary */
- (&buf[..2048], PRIME, 0xBDBB2282577DADEC, 0xF78CDDC2C9A9A692), /* two blocks, finishing at block boundary */
- (&buf[..2240], 0, 0x00AD52FA9385B6FE, 0xC705BAD3356CE302), /* two blocks, ends at stripe boundary */
- (&buf[..2240], PRIME, 0x10FD0072EC68BFAA, 0xE1312F3458817F15), /* two blocks, ends at stripe boundary */
- (&buf[..2237], 0, 0x970C91411533862C, 0x4BBD06FF7BFF0AB1), /* two blocks, ends at stripe boundary */
- (&buf[..2237], PRIME, 0xD80282846D814431, 0x14EBB157B84D9785), /* two blocks, ends at stripe boundary */
- ];
-
- for (buf, seed, lo, hi) in test_cases {
- let result = u128::from(lo) + (u128::from(hi) << 64);
-
- {
- let hash = hash128_with_seed(buf, seed);
-
- assert_eq!(
- hash,
- result,
- "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
-
- // streaming API test
-
- // single ingestio
- {
- let mut hasher = Hash128::with_seed(seed);
- hasher.write(buf);
- let hash = hasher.finish_ext();
-
- assert_eq!(
- hash,
- result,
- "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
-
- if buf.len() > 3 {
- // 2 ingestions
- let mut hasher = Hash128::with_seed(seed);
- hasher.write(&buf[..3]);
- hasher.write(&buf[3..]);
- let hash = hasher.finish_ext();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
-
- // byte by byte ingestion
- {
- let mut hasher = Hash128::with_seed(seed);
-
- for chunk in buf.chunks(1) {
- hasher.write(chunk);
- }
-
- let hash = hasher.finish_ext();
-
- assert_eq!(
- hash,
- result,
- "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}",
- buf.len(),
- seed,
- hash,
- result
- );
- }
- }
- }
-}
diff --git a/crates/twox-hash/src/xxhash3.rs b/crates/twox-hash/src/xxhash3.rs
new file mode 100644
index 0000000..0067abe
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3.rs
@@ -0,0 +1,425 @@
+use core::slice;
+
+use crate::{IntoU128 as _, IntoU32 as _};
+
+pub mod large;
+
+pub(crate) use large::dispatch;
+pub use large::{Algorithm, Vector};
+
+pub mod secret;
+
+pub use secret::{Secret, SECRET_MINIMUM_LENGTH};
+
+mod streaming;
+
+pub use streaming::{
+ Finalize, FixedBuffer, FixedMutBuffer, RawHasherCore, SecretBuffer, SecretTooShortError,
+ SecretWithSeedError,
+};
+
+#[cfg(feature = "alloc")]
+pub use streaming::AllocRawHasher;
+
+pub mod primes {
+ pub const PRIME32_1: u64 = 0x9E3779B1;
+ pub const PRIME32_2: u64 = 0x85EBCA77;
+ pub const PRIME32_3: u64 = 0xC2B2AE3D;
+ pub const PRIME64_1: u64 = 0x9E3779B185EBCA87;
+ pub const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
+ pub const PRIME64_3: u64 = 0x165667B19E3779F9;
+ pub const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
+ pub const PRIME64_5: u64 = 0x27D4EB2F165667C5;
+ pub const PRIME_MX1: u64 = 0x165667919E3779F9;
+ pub const PRIME_MX2: u64 = 0x9FB21C651E98DF25;
+}
+
+pub const CUTOFF: usize = 240;
+
+pub const DEFAULT_SEED: u64 = 0;
+
+/// The length of the default secret.
+pub const DEFAULT_SECRET_LENGTH: usize = 192;
+
+pub type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH];
+
+pub const DEFAULT_SECRET_RAW: DefaultSecret = [
+ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+ 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+ 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+ 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+ 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+ 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+ 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+ 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+ 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+ 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+ 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+];
+
+// Safety: The default secret is long enough
+pub const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) };
+
+/// # Correctness
+///
+/// This function assumes that the incoming buffer has been populated
+/// with the default secret.
+#[inline]
+pub fn derive_secret(seed: u64, secret: &mut DefaultSecret) {
+ if seed == DEFAULT_SEED {
+ return;
+ }
+
+ let (words, _) = secret.bp_as_chunks_mut();
+ let (pairs, _) = words.bp_as_chunks_mut();
+
+ for [a_p, b_p] in pairs {
+ let a = u64::from_le_bytes(*a_p);
+ let b = u64::from_le_bytes(*b_p);
+
+ let a = a.wrapping_add(seed);
+ let b = b.wrapping_sub(seed);
+
+ *a_p = a.to_le_bytes();
+ *b_p = b.to_le_bytes();
+ }
+}
+
+/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][]
+/// bytes.
+#[derive(Debug)]
+pub struct OneshotWithSecretError(pub(crate) secret::Error);
+
+impl core::error::Error for OneshotWithSecretError {}
+
+impl core::fmt::Display for OneshotWithSecretError {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+macro_rules! assert_input_range {
+ ($min:literal.., $len:expr) => {
+ assert!($min <= $len);
+ };
+ ($min:literal..=$max:literal, $len:expr) => {
+ assert!($min <= $len);
+ assert!($len <= $max);
+ };
+}
+pub(crate) use assert_input_range;
+
+#[inline(always)]
+pub fn impl_1_to_3_bytes_combined(input: &[u8]) -> u32 {
+ assert_input_range!(1..=3, input.len());
+ let input_length = input.len() as u8; // OK as we checked that the length fits
+
+ input[input.len() - 1].into_u32()
+ | input_length.into_u32() << 8
+ | input[0].into_u32() << 16
+ | input[input.len() >> 1].into_u32() << 24
+}
+
+#[inline]
+pub fn impl_17_to_128_bytes_iter(
+ secret: &Secret,
+ input: &[u8],
+ mut f: impl FnMut(&[u8; 16], &[u8; 16], &[[u8; 16]; 2]),
+) {
+ let secret = secret.words_for_17_to_128();
+ let (secret, _) = secret.bp_as_chunks::<2>();
+ let (fwd, _) = input.bp_as_chunks();
+ let (_, bwd) = input.bp_as_rchunks();
+
+ let q = bwd.len();
+
+ if input.len() > 32 {
+ if input.len() > 64 {
+ if input.len() > 96 {
+ f(&fwd[3], &bwd[q - 4], &secret[3]);
+ }
+
+ f(&fwd[2], &bwd[q - 3], &secret[2]);
+ }
+
+ f(&fwd[1], &bwd[q - 2], &secret[1]);
+ }
+
+ f(&fwd[0], &bwd[q - 1], &secret[0]);
+}
+
+#[inline]
+pub fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 {
+ let data_words = to_u64s(data);
+ let secret_words = to_u64s(secret);
+
+ let mul_result = {
+ let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128();
+ let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128();
+
+ a.wrapping_mul(b)
+ };
+
+ mul_result.lower_half() ^ mul_result.upper_half()
+}
+
+#[inline]
+pub fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] {
+ let (pair, _) = bytes.bp_as_chunks::<8>();
+ [pair[0], pair[1]].map(u64::from_le_bytes)
+}
+
+#[inline]
+#[cfg(feature = "xxhash3_128")]
+pub fn pairs_of_u64_bytes(bytes: &[u8]) -> &[[[u8; 16]; 2]] {
+ let (u64_bytes, _) = bytes.bp_as_chunks::<16>();
+ let (pairs, _) = u64_bytes.bp_as_chunks::<2>();
+ pairs
+}
+
+#[inline]
+pub fn avalanche(mut x: u64) -> u64 {
+ x ^= x >> 37;
+ x = x.wrapping_mul(primes::PRIME_MX1);
+ x ^= x >> 32;
+ x
+}
+
+#[inline]
+pub fn avalanche_xxh64(mut x: u64) -> u64 {
+ x ^= x >> 33;
+ x = x.wrapping_mul(primes::PRIME64_2);
+ x ^= x >> 29;
+ x = x.wrapping_mul(primes::PRIME64_3);
+ x ^= x >> 32;
+ x
+}
+
+#[inline]
+pub fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) {
+ match block.bp_as_chunks() {
+ ([stripes @ .., last], []) => (stripes, last),
+ (stripes, last) => (stripes, last),
+ }
+}
+
+/// THis exists just to easily map the XXH3 algorithm to Rust as the
+/// algorithm describes 128-bit results as a pair of high and low u64
+/// values.
+#[derive(Copy, Clone)]
+pub(crate) struct X128 {
+ pub low: u64,
+ pub high: u64,
+}
+
+impl From<X128> for u128 {
+ fn from(value: X128) -> Self {
+ value.high.into_u128() << 64 | value.low.into_u128()
+ }
+}
+
+impl crate::IntoU128 for X128 {
+ fn into_u128(self) -> u128 {
+ self.into()
+ }
+}
+
+pub trait Halves {
+ type Output;
+
+ fn upper_half(self) -> Self::Output;
+ fn lower_half(self) -> Self::Output;
+}
+
+impl Halves for u64 {
+ type Output = u32;
+
+ #[inline]
+ fn upper_half(self) -> Self::Output {
+ (self >> 32) as _
+ }
+
+ #[inline]
+ fn lower_half(self) -> Self::Output {
+ self as _
+ }
+}
+
+impl Halves for u128 {
+ type Output = u64;
+
+ #[inline]
+ fn upper_half(self) -> Self::Output {
+ (self >> 64) as _
+ }
+
+ #[inline]
+ fn lower_half(self) -> Self::Output {
+ self as _
+ }
+}
+
+pub trait U8SliceExt {
+ fn first_u32(&self) -> Option<u32>;
+
+ fn last_u32(&self) -> Option<u32>;
+
+ fn first_u64(&self) -> Option<u64>;
+
+ fn last_u64(&self) -> Option<u64>;
+}
+
+impl U8SliceExt for [u8] {
+ #[inline]
+ fn first_u32(&self) -> Option<u32> {
+ self.first_chunk().copied().map(u32::from_le_bytes)
+ }
+
+ #[inline]
+ fn last_u32(&self) -> Option<u32> {
+ self.last_chunk().copied().map(u32::from_le_bytes)
+ }
+
+ #[inline]
+ fn first_u64(&self) -> Option<u64> {
+ self.first_chunk().copied().map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ fn last_u64(&self) -> Option<u64> {
+ self.last_chunk().copied().map(u64::from_le_bytes)
+ }
+}
+
+pub trait SliceBackport<T> {
+ fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]);
+
+ fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]);
+
+ fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]);
+}
+
+impl<T> SliceBackport<T> for [T] {
+ fn bp_as_chunks<const N: usize>(&self) -> (&[[T; N]], &[T]) {
+ assert_ne!(N, 0);
+ let len = self.len() / N;
+ // Safety: `(len / N) * N` has to be less-than-or-equal to `len`
+ let (head, tail) = unsafe { self.split_at_unchecked(len * N) };
+ // Safety: (1) `head` points to valid data, (2) the alignment
+ // of an array and the individual type are the same, (3) the
+ // valid elements are less-than-or-equal to the original
+ // slice.
+ let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) };
+ (head, tail)
+ }
+
+ fn bp_as_chunks_mut<const N: usize>(&mut self) -> (&mut [[T; N]], &mut [T]) {
+ assert_ne!(N, 0);
+ let len = self.len() / N;
+ // Safety: `(len / N) * N` has to be less than or equal to `len`
+ let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) };
+ // Safety: (1) `head` points to valid data, (2) the alignment
+ // of an array and the individual type are the same, (3) the
+ // valid elements are less-than-or-equal to the original
+ // slice.
+ let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) };
+ (head, tail)
+ }
+
+ fn bp_as_rchunks<const N: usize>(&self) -> (&[T], &[[T; N]]) {
+ assert_ne!(N, 0);
+ let len = self.len() / N;
+ // Safety: `(len / N) * N` has to be less than or equal to `len`
+ let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) };
+ // Safety: (1) `tail` points to valid data, (2) the alignment
+ // of an array and the individual type are the same, (3) the
+ // valid elements are less-than-or-equal to the original
+ // slice.
+ let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) };
+ (head, tail)
+ }
+}
+
+#[cfg(test)]
+pub mod test {
+ use std::array;
+
+ use super::*;
+
+ macro_rules! bytes {
+ ($($n: literal),* $(,)?) => {
+ &[$(&crate::xxhash3::test::gen_bytes::<$n>() as &[u8],)*] as &[&[u8]]
+ };
+ }
+ pub(crate) use bytes;
+
+ pub fn gen_bytes<const N: usize>() -> [u8; N] {
+ // Picking 251 as it's a prime number, which will hopefully
+ // help avoid incidental power-of-two alignment.
+ array::from_fn(|i| (i % 251) as u8)
+ }
+
+ #[test]
+ fn default_secret_is_valid() {
+ assert!(DEFAULT_SECRET.is_valid())
+ }
+
+ #[test]
+ fn backported_as_chunks() {
+ let x = [1, 2, 3, 4, 5];
+
+ let (a, b) = x.bp_as_chunks::<1>();
+ assert_eq!(a, &[[1], [2], [3], [4], [5]]);
+ assert_eq!(b, &[] as &[i32]);
+
+ let (a, b) = x.bp_as_chunks::<2>();
+ assert_eq!(a, &[[1, 2], [3, 4]]);
+ assert_eq!(b, &[5]);
+
+ let (a, b) = x.bp_as_chunks::<3>();
+ assert_eq!(a, &[[1, 2, 3]]);
+ assert_eq!(b, &[4, 5]);
+
+ let (a, b) = x.bp_as_chunks::<4>();
+ assert_eq!(a, &[[1, 2, 3, 4]]);
+ assert_eq!(b, &[5]);
+
+ let (a, b) = x.bp_as_chunks::<5>();
+ assert_eq!(a, &[[1, 2, 3, 4, 5]]);
+ assert_eq!(b, &[] as &[i32]);
+
+ let (a, b) = x.bp_as_chunks::<6>();
+ assert_eq!(a, &[] as &[[i32; 6]]);
+ assert_eq!(b, &[1, 2, 3, 4, 5]);
+ }
+
+ #[test]
+ fn backported_as_rchunks() {
+ let x = [1, 2, 3, 4, 5];
+
+ let (a, b) = x.bp_as_rchunks::<1>();
+ assert_eq!(a, &[] as &[i32]);
+ assert_eq!(b, &[[1], [2], [3], [4], [5]]);
+
+ let (a, b) = x.bp_as_rchunks::<2>();
+ assert_eq!(a, &[1]);
+ assert_eq!(b, &[[2, 3], [4, 5]]);
+
+ let (a, b) = x.bp_as_rchunks::<3>();
+ assert_eq!(a, &[1, 2]);
+ assert_eq!(b, &[[3, 4, 5]]);
+
+ let (a, b) = x.bp_as_rchunks::<4>();
+ assert_eq!(a, &[1]);
+ assert_eq!(b, &[[2, 3, 4, 5]]);
+
+ let (a, b) = x.bp_as_rchunks::<5>();
+ assert_eq!(a, &[] as &[i32]);
+ assert_eq!(b, &[[1, 2, 3, 4, 5]]);
+
+ let (a, b) = x.bp_as_rchunks::<6>();
+ assert_eq!(a, &[1, 2, 3, 4, 5]);
+ assert_eq!(b, &[] as &[[i32; 6]]);
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3/large.rs b/crates/twox-hash/src/xxhash3/large.rs
new file mode 100644
index 0000000..df1a7d0
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/large.rs
@@ -0,0 +1,295 @@
+use super::{
+ assert_input_range, avalanche, primes::*, stripes_with_tail, Halves, Secret, SliceBackport as _,
+};
+
+#[cfg(feature = "xxhash3_128")]
+use super::X128;
+
+use crate::{IntoU128, IntoU64};
+
+// This module is not `cfg`-gated because it is used by some of the
+// SIMD implementations.
+pub mod scalar;
+
+#[cfg(target_arch = "aarch64")]
+pub mod neon;
+
+#[cfg(target_arch = "x86_64")]
+pub mod avx2;
+
+#[cfg(target_arch = "x86_64")]
+pub mod sse2;
+
+macro_rules! dispatch {
+ (
+ fn $fn_name:ident<$($gen:ident),*>($($arg_name:ident : $arg_ty:ty),*) $(-> $ret_ty:ty)?
+ [$($wheres:tt)*]
+ ) => {
+ #[inline]
+ fn do_scalar<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+ where
+ $($wheres)*
+ {
+ $fn_name($crate::xxhash3::large::scalar::Impl, $($arg_name),*)
+ }
+
+ /// # Safety
+ ///
+ /// You must ensure that the CPU has the NEON feature
+ #[inline]
+ #[target_feature(enable = "neon")]
+ #[cfg(all(target_arch = "aarch64", feature = "std"))]
+ unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+ where
+ $($wheres)*
+ {
+ // Safety: The caller has ensured we have the NEON feature
+ unsafe {
+ $fn_name($crate::xxhash3::large::neon::Impl::new_unchecked(), $($arg_name),*)
+ }
+ }
+
+ /// # Safety
+ ///
+ /// You must ensure that the CPU has the AVX2 feature
+ #[inline]
+ #[target_feature(enable = "avx2")]
+ #[cfg(all(target_arch = "x86_64", feature = "std"))]
+ unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+ where
+ $($wheres)*
+ {
+ // Safety: The caller has ensured we have the AVX2 feature
+ unsafe {
+ $fn_name($crate::xxhash3::large::avx2::Impl::new_unchecked(), $($arg_name),*)
+ }
+ }
+
+ /// # Safety
+ ///
+ /// You must ensure that the CPU has the SSE2 feature
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ #[cfg(all(target_arch = "x86_64", feature = "std"))]
+ unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)?
+ where
+ $($wheres)*
+ {
+ // Safety: The caller has ensured we have the SSE2 feature
+ unsafe {
+ $fn_name($crate::xxhash3::large::sse2::Impl::new_unchecked(), $($arg_name),*)
+ }
+ }
+
+ // Now we invoke the right function
+
+ #[cfg(_internal_xxhash3_force_neon)]
+ return unsafe { do_neon($($arg_name),*) };
+
+ #[cfg(_internal_xxhash3_force_avx2)]
+ return unsafe { do_avx2($($arg_name),*) };
+
+ #[cfg(_internal_xxhash3_force_sse2)]
+ return unsafe { do_sse2($($arg_name),*) };
+
+ #[cfg(_internal_xxhash3_force_scalar)]
+ return do_scalar($($arg_name),*);
+
+ // This code can be unreachable if one of the `*_force_*` cfgs
+ // are set above, but that's the point.
+ #[allow(unreachable_code)]
+ {
+ #[cfg(all(target_arch = "aarch64", feature = "std"))]
+ {
+ if std::arch::is_aarch64_feature_detected!("neon") {
+ // Safety: We just ensured we have the NEON feature
+ return unsafe { do_neon($($arg_name),*) };
+ }
+ }
+
+ #[cfg(all(target_arch = "x86_64", feature = "std"))]
+ {
+ if is_x86_feature_detected!("avx2") {
+ // Safety: We just ensured we have the AVX2 feature
+ return unsafe { do_avx2($($arg_name),*) };
+ } else if is_x86_feature_detected!("sse2") {
+ // Safety: We just ensured we have the SSE2 feature
+ return unsafe { do_sse2($($arg_name),*) };
+ }
+ }
+ do_scalar($($arg_name),*)
+ }
+ };
+}
+pub(crate) use dispatch;
+
+pub trait Vector: Copy {
+ fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]);
+
+ fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]);
+}
+
+#[rustfmt::skip]
+pub const INITIAL_ACCUMULATORS: [u64; 8] = [
+ PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
+ PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
+];
+
+pub struct Algorithm<V>(pub V);
+
+impl<V> Algorithm<V>
+where
+ V: Vector,
+{
+ #[inline]
+ pub fn oneshot<F>(&self, secret: &Secret, input: &[u8], finalize: F) -> F::Output
+ where
+ F: super::Finalize,
+ {
+ assert_input_range!(241.., input.len());
+ let mut acc = INITIAL_ACCUMULATORS;
+
+ let stripes_per_block = (secret.len() - 64) / 8;
+ let block_size = 64 * stripes_per_block;
+
+ let mut blocks = input.chunks_exact(block_size);
+
+ let last_block = if blocks.remainder().is_empty() {
+ // Safety: We know that `input` is non-empty, which means
+ // that either there will be a remainder or one or more
+ // full blocks. That info isn't flowing to the optimizer,
+ // so we use `unwrap_unchecked`.
+ unsafe { blocks.next_back().unwrap_unchecked() }
+ } else {
+ blocks.remainder()
+ };
+
+ self.rounds(&mut acc, blocks, secret);
+
+ let len = input.len();
+
+ let last_stripe = input.last_chunk().unwrap();
+ finalize.large(self.0, acc, last_block, last_stripe, secret, len)
+ }
+
+ #[inline]
+ fn rounds<'a>(
+ &self,
+ acc: &mut [u64; 8],
+ blocks: impl IntoIterator<Item = &'a [u8]>,
+ secret: &Secret,
+ ) {
+ for block in blocks {
+ let (stripes, _) = block.bp_as_chunks();
+
+ self.round(acc, stripes, secret);
+ }
+ }
+
+ #[inline]
+ fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) {
+ let secret_end = secret.last_stripe();
+
+ self.round_accumulate(acc, stripes, secret);
+ self.0.round_scramble(acc, secret_end);
+ }
+
+ #[inline]
+ fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) {
+ let secrets = (0..stripes.len()).map(|i| {
+ // Safety: The number of stripes is determined by the
+ // block size, which is determined by the secret size.
+ unsafe { secret.stripe(i) }
+ });
+
+ for (stripe, secret) in stripes.iter().zip(secrets) {
+ self.0.accumulate(acc, stripe, secret);
+ }
+ }
+
+ #[inline(always)]
+ #[cfg(feature = "xxhash3_64")]
+ pub fn finalize_64(
+ &self,
+ mut acc: [u64; 8],
+ last_block: &[u8],
+ last_stripe: &[u8; 64],
+ secret: &Secret,
+ len: usize,
+ ) -> u64 {
+ debug_assert!(!last_block.is_empty());
+ self.last_round(&mut acc, last_block, last_stripe, secret);
+
+ let low = len.into_u64().wrapping_mul(PRIME64_1);
+ self.final_merge(&acc, low, secret.final_secret())
+ }
+
+ #[inline]
+ #[cfg(feature = "xxhash3_128")]
+ pub fn finalize_128(
+ &self,
+ mut acc: [u64; 8],
+ last_block: &[u8],
+ last_stripe: &[u8; 64],
+ secret: &Secret,
+ len: usize,
+ ) -> u128 {
+ debug_assert!(!last_block.is_empty());
+ self.last_round(&mut acc, last_block, last_stripe, secret);
+
+ let len = len.into_u64();
+
+ let low = len.wrapping_mul(PRIME64_1);
+ let low = self.final_merge(&acc, low, secret.final_secret());
+
+ let high = !len.wrapping_mul(PRIME64_2);
+ let high = self.final_merge(&acc, high, secret.for_128().final_secret());
+
+ X128 { low, high }.into()
+ }
+
+ #[inline]
+ fn last_round(
+ &self,
+ acc: &mut [u64; 8],
+ block: &[u8],
+ last_stripe: &[u8; 64],
+ secret: &Secret,
+ ) {
+ // Accumulation steps are run for the stripes in the last block,
+ // except for the last stripe (whether it is full or not)
+ let (stripes, _) = stripes_with_tail(block);
+
+ let secrets = (0..stripes.len()).map(|i| {
+ // Safety: The number of stripes is determined by the
+ // block size, which is determined by the secret size.
+ unsafe { secret.stripe(i) }
+ });
+
+ for (stripe, secret) in stripes.iter().zip(secrets) {
+ self.0.accumulate(acc, stripe, secret);
+ }
+
+ let last_stripe_secret = secret.last_stripe_secret_better_name();
+ self.0.accumulate(acc, last_stripe, last_stripe_secret);
+ }
+
+ #[inline]
+ fn final_merge(&self, acc: &[u64; 8], init_value: u64, secret: &[u8; 64]) -> u64 {
+ let (secrets, _) = secret.bp_as_chunks();
+ let mut result = init_value;
+ for i in 0..4 {
+ // 64-bit by 64-bit multiplication to 128-bit full result
+ let mul_result = {
+ let sa = u64::from_le_bytes(secrets[i * 2]);
+ let sb = u64::from_le_bytes(secrets[i * 2 + 1]);
+
+ let a = (acc[i * 2] ^ sa).into_u128();
+ let b = (acc[i * 2 + 1] ^ sb).into_u128();
+ a.wrapping_mul(b)
+ };
+ result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
+ }
+ avalanche(result)
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3/large/avx2.rs b/crates/twox-hash/src/xxhash3/large/avx2.rs
new file mode 100644
index 0000000..752d7aa
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/large/avx2.rs
@@ -0,0 +1,88 @@
+use core::arch::x86_64::*;
+
+use super::{scalar, Vector};
+
+#[derive(Copy, Clone)]
+pub struct Impl(());
+
+impl Impl {
+ /// # Safety
+ ///
+ /// You must ensure that the CPU has the AVX2 feature
+ #[inline]
+ #[cfg(feature = "std")]
+ pub unsafe fn new_unchecked() -> Impl {
+ Impl(())
+ }
+}
+
+impl Vector for Impl {
+ #[inline]
+ fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ // Safety: Type can only be constructed when AVX2 feature is present
+ unsafe { round_scramble_avx2(acc, secret_end) }
+ }
+
+ #[inline]
+ fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ // Safety: Type can only be constructed when AVX2 feature is present
+ unsafe { accumulate_avx2(acc, stripe, secret) }
+ }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the AVX2 feature
+#[inline]
+#[target_feature(enable = "avx2")]
+unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ // The scalar implementation is autovectorized nicely enough
+ scalar::Impl.round_scramble(acc, secret_end)
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the AVX2 feature
+#[inline]
+#[target_feature(enable = "avx2")]
+unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ let acc = acc.as_mut_ptr().cast::<__m256i>();
+ let stripe = stripe.as_ptr().cast::<__m256i>();
+ let secret = secret.as_ptr().cast::<__m256i>();
+
+ // Safety: The caller has ensured we have the AVX2
+ // feature. We load from and store to references so we
+ // know that data is valid. We use unaligned loads /
+ // stores. Data manipulation is otherwise done on
+ // intermediate values.
+ unsafe {
+ for i in 0..2 {
+ // [align-acc]: The C code aligns the accumulator to avoid
+ // the unaligned load and store here, but that doesn't
+ // seem to be a big performance loss.
+ let mut acc_0 = _mm256_loadu_si256(acc.add(i));
+ let stripe_0 = _mm256_loadu_si256(stripe.add(i));
+ let secret_0 = _mm256_loadu_si256(secret.add(i));
+
+ // let value[i] = stripe[i] ^ secret[i];
+ let value_0 = _mm256_xor_si256(stripe_0, secret_0);
+
+ // stripe_swap[i] = stripe[i ^ 1]
+ let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+
+ // acc[i] += stripe_swap[i]
+ acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0);
+
+ // value_shift[i] = value[i] >> 32
+ let value_shift_0 = _mm256_srli_epi64::<32>(value_0);
+
+ // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+ let product_0 = _mm256_mul_epu32(value_0, value_shift_0);
+
+ // acc[i] += product[i]
+ acc_0 = _mm256_add_epi64(acc_0, product_0);
+
+ _mm256_storeu_si256(acc.add(i), acc_0);
+ }
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3/large/neon.rs b/crates/twox-hash/src/xxhash3/large/neon.rs
new file mode 100644
index 0000000..fc49726
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/large/neon.rs
@@ -0,0 +1,210 @@
+use core::arch::aarch64::*;
+
+use super::Vector;
+use crate::xxhash3::{primes::PRIME32_1, SliceBackport as _};
+
+#[derive(Copy, Clone)]
+pub struct Impl(());
+
+impl Impl {
+ /// # Safety
+ ///
+ /// You must ensure that the CPU has the NEON feature
+ #[inline]
+ #[cfg(feature = "std")]
+ pub unsafe fn new_unchecked() -> Self {
+ Self(())
+ }
+}
+
+impl Vector for Impl {
+ #[inline]
+ fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ // Safety: Type can only be constructed when NEON feature is present
+ unsafe { round_scramble_neon(acc, secret_end) }
+ }
+
+ #[inline]
+ fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ // Safety: Type can only be constructed when NEON feature is present
+ unsafe { accumulate_neon(acc, stripe, secret) }
+ }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the NEON feature
+#[target_feature(enable = "neon")]
+#[inline]
+unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ let secret_base = secret_end.as_ptr().cast::<u64>();
+ let (acc, _) = acc.bp_as_chunks_mut::<2>();
+
+ for (i, acc) in acc.iter_mut().enumerate() {
+ // Safety: The caller has ensured we have the NEON
+ // feature. We load from and store to references so we
+ // know that data is valid. We use unaligned loads /
+ // stores. Data manipulation is otherwise done on
+ // intermediate values.
+ unsafe {
+ let mut accv = vld1q_u64(acc.as_ptr());
+ let secret = vld1q_u64(secret_base.add(i * 2));
+
+ // tmp[i] = acc[i] >> 47
+ let shifted = vshrq_n_u64::<47>(accv);
+
+ // acc[i] ^= tmp[i]
+ accv = veorq_u64(accv, shifted);
+
+ // acc[i] ^= secret[i]
+ accv = veorq_u64(accv, secret);
+
+ // acc[i] *= PRIME32_1
+ accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32);
+
+ vst1q_u64(acc.as_mut_ptr(), accv);
+ }
+ }
+}
+
+/// We process 4x u64 at a time as that allows us to completely
+/// fill a `uint64x2_t` with useful values when performing the
+/// multiplication.
+///
+/// # Safety
+///
+/// You must ensure that the CPU has the NEON feature
+#[target_feature(enable = "neon")]
+#[inline]
+unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ let (acc2, _) = acc.bp_as_chunks_mut::<4>();
+ for (i, acc) in acc2.iter_mut().enumerate() {
+ // Safety: The caller has ensured we have the NEON
+ // feature. We load from and store to references so we
+ // know that data is valid. We use unaligned loads /
+ // stores. Data manipulation is otherwise done on
+ // intermediate values.
+ unsafe {
+ let mut accv_0 = vld1q_u64(acc.as_ptr().cast::<u64>());
+ let mut accv_1 = vld1q_u64(acc.as_ptr().cast::<u64>().add(2));
+ let stripe_0 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4));
+ let stripe_1 = vld1q_u64(stripe.as_ptr().cast::<u64>().add(i * 4 + 2));
+ let secret_0 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4));
+ let secret_1 = vld1q_u64(secret.as_ptr().cast::<u64>().add(i * 4 + 2));
+
+ // stripe_rot[i ^ 1] = stripe[i];
+ let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0);
+ let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1);
+
+ // value[i] = stripe[i] ^ secret[i];
+ let value_0 = veorq_u64(stripe_0, secret_0);
+ let value_1 = veorq_u64(stripe_1, secret_1);
+
+ // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i]
+ //
+ // Each vector has 64-bit values, but we treat them as
+ // 32-bit and then unzip them. This naturally splits
+ // the upper and lower 32 bits.
+ let parts_0 = vreinterpretq_u32_u64(value_0);
+ let parts_1 = vreinterpretq_u32_u64(value_1);
+
+ let hi = vuzp1q_u32(parts_0, parts_1);
+ let lo = vuzp2q_u32(parts_0, parts_1);
+
+ let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo));
+ let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo);
+
+ reordering_barrier(sum_0);
+ reordering_barrier(sum_1);
+
+ // acc[i] += sum[i]
+ accv_0 = vaddq_u64(accv_0, sum_0);
+ accv_1 = vaddq_u64(accv_1, sum_1);
+
+ vst1q_u64(acc.as_mut_ptr().cast::<u64>(), accv_0);
+ vst1q_u64(acc.as_mut_ptr().cast::<u64>().add(2), accv_1);
+ };
+ }
+}
+
+// There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the
+// lower 64 bits of the result) operation, so we have to make our
+// own out of 32-bit operations . We can simplify by realizing
+// that we are always multiplying by a 32-bit number.
+//
+// The basic algorithm is traditional long multiplication. `[]`
+// denotes groups of 32 bits.
+//
+// [AAAA][BBBB]
+// x [CCCC]
+// --------------------
+// [BCBC][BCBC]
+// + [ACAC][ACAC]
+// --------------------
+// [ACBC][BCBC] // 64-bit truncation occurs
+//
+// This can be written in NEON as a vectorwise wrapping
+// multiplication of the high-order chunk of the input (`A`)
+// against the constant and then a multiply-widen-and-accumulate
+// of the low-order chunk of the input and the constant:
+//
+// 1. High-order, vectorwise
+//
+// [AAAA][BBBB]
+// x [CCCC][0000]
+// --------------------
+// [ACAC][0000]
+//
+// 2. Low-order, widening
+//
+// [BBBB]
+// x [CCCC] // widening
+// --------------------
+// [BCBC][BCBC]
+//
+// 3. Accumulation
+//
+// [ACAC][0000]
+// + [BCBC][BCBC] // vectorwise
+// --------------------
+// [ACBC][BCBC]
+//
+// Thankfully, NEON has a single multiply-widen-and-accumulate
+// operation.
+#[inline]
+pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t {
+ // Safety: We only compute using our argument values and do
+ // not change memory.
+ unsafe {
+ let input_as_u32 = vreinterpretq_u32_u64(input);
+ let factor = vmov_n_u32(og_factor);
+ let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32);
+ let factor_striped = vreinterpretq_u32_u64(factor_striped);
+
+ let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped);
+ let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32);
+
+ let input_lo = vmovn_u64(input);
+ vmlal_u32(high_shifted, input_lo, factor)
+ }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the NEON feature
+//
+// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323
+#[inline]
+#[target_feature(enable = "neon")]
+unsafe fn reordering_barrier(r: uint64x2_t) {
+ // Safety: The caller has ensured we have the NEON feature. We
+ // aren't doing anything with the argument, so we shouldn't be
+ // able to cause unsafety!
+ unsafe {
+ core::arch::asm!(
+ "/* {r:v} */",
+ r = in(vreg) r,
+ options(nomem, nostack),
+ )
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3/large/scalar.rs b/crates/twox-hash/src/xxhash3/large/scalar.rs
new file mode 100644
index 0000000..77da53a
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/large/scalar.rs
@@ -0,0 +1,69 @@
+use super::Vector;
+use crate::xxhash3::{primes::PRIME32_1, SliceBackport as _};
+
+#[derive(Copy, Clone)]
+pub struct Impl;
+
+impl Vector for Impl {
+ #[inline]
+ fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ let (last, _) = secret_end.bp_as_chunks();
+ let last = last.iter().copied().map(u64::from_le_bytes);
+
+ for (acc, secret) in acc.iter_mut().zip(last) {
+ *acc ^= *acc >> 47;
+ *acc ^= secret;
+ *acc = acc.wrapping_mul(PRIME32_1);
+ }
+ }
+
+ #[inline]
+ fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ let (stripe, _) = stripe.bp_as_chunks();
+ let (secret, _) = secret.bp_as_chunks();
+
+ for i in 0..8 {
+ let stripe = u64::from_le_bytes(stripe[i]);
+ let secret = u64::from_le_bytes(secret[i]);
+
+ let value = stripe ^ secret;
+ acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe);
+ acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]);
+ }
+ }
+}
+
+#[inline]
+#[cfg(any(miri, not(target_arch = "aarch64")))]
+fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+ use super::IntoU64;
+
+ let lhs = (lhs as u32).into_u64();
+ let rhs = (rhs as u32).into_u64();
+
+ let product = lhs.wrapping_mul(rhs);
+ acc.wrapping_add(product)
+}
+
+#[inline]
+// https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610
+// https://github.com/llvm/llvm-project/issues/98481
+#[cfg(all(not(miri), target_arch = "aarch64"))]
+fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 {
+ let res;
+
+ // Safety: We only compute using our argument values and do
+ // not change memory.
+ unsafe {
+ core::arch::asm!(
+ "umaddl {res}, {lhs:w}, {rhs:w}, {acc}",
+ lhs = in(reg) lhs,
+ rhs = in(reg) rhs,
+ acc = in(reg) acc,
+ res = out(reg) res,
+ options(pure, nomem, nostack),
+ )
+ }
+
+ res
+}
diff --git a/crates/twox-hash/src/xxhash3/large/sse2.rs b/crates/twox-hash/src/xxhash3/large/sse2.rs
new file mode 100644
index 0000000..29a9c2a
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/large/sse2.rs
@@ -0,0 +1,86 @@
+use core::arch::x86_64::*;
+
+use super::{scalar, Vector};
+
+#[derive(Copy, Clone)]
+pub struct Impl(());
+
+impl Impl {
+ /// # Safety
+ ///
+ /// You must ensure that the CPU has the SSE2 feature
+ #[inline]
+ #[cfg(feature = "std")]
+ pub unsafe fn new_unchecked() -> Impl {
+ Impl(())
+ }
+}
+
+impl Vector for Impl {
+ #[inline]
+ fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ // Safety: Type can only be constructed when SSE2 feature is present
+ unsafe { round_scramble_sse2(acc, secret_end) }
+ }
+
+ #[inline]
+ fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ // Safety: Type can only be constructed when SSE2 feature is present
+ unsafe { accumulate_sse2(acc, stripe, secret) }
+ }
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the SSE2 feature
+#[inline]
+#[target_feature(enable = "sse2")]
+unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) {
+ // The scalar implementation is autovectorized nicely enough
+ scalar::Impl.round_scramble(acc, secret_end)
+}
+
+/// # Safety
+///
+/// You must ensure that the CPU has the SSE2 feature
+#[inline]
+#[target_feature(enable = "sse2")]
+unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) {
+ let acc = acc.as_mut_ptr().cast::<__m128i>();
+ let stripe = stripe.as_ptr().cast::<__m128i>();
+ let secret = secret.as_ptr().cast::<__m128i>();
+
+ // Safety: The caller has ensured we have the SSE2
+ // feature. We load from and store to references so we
+ // know that data is valid. We use unaligned loads /
+ // stores. Data manipulation is otherwise done on
+ // intermediate values.
+ unsafe {
+ for i in 0..4 {
+ // See [align-acc].
+ let mut acc_0 = _mm_loadu_si128(acc.add(i));
+ let stripe_0 = _mm_loadu_si128(stripe.add(i));
+ let secret_0 = _mm_loadu_si128(secret.add(i));
+
+ // let value[i] = stripe[i] ^ secret[i];
+ let value_0 = _mm_xor_si128(stripe_0, secret_0);
+
+ // stripe_swap[i] = stripe[i ^ 1]
+ let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0);
+
+ // acc[i] += stripe_swap[i]
+ acc_0 = _mm_add_epi64(acc_0, stripe_swap_0);
+
+ // value_shift[i] = value[i] >> 32
+ let value_shift_0 = _mm_srli_epi64::<32>(value_0);
+
+ // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i])
+ let product_0 = _mm_mul_epu32(value_0, value_shift_0);
+
+ // acc[i] += product[i]
+ acc_0 = _mm_add_epi64(acc_0, product_0);
+
+ _mm_storeu_si128(acc.add(i), acc_0);
+ }
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3/secret.rs b/crates/twox-hash/src/xxhash3/secret.rs
new file mode 100644
index 0000000..63e55b2
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/secret.rs
@@ -0,0 +1,273 @@
+use core::{hint::assert_unchecked, mem};
+
+use super::SliceBackport as _;
+
+#[cfg(feature = "xxhash3_128")]
+use super::pairs_of_u64_bytes;
+
+/// The minimum length of a secret.
+pub const SECRET_MINIMUM_LENGTH: usize = 136;
+
+#[repr(transparent)]
+pub struct Secret([u8]);
+
+impl Secret {
+ #[inline]
+ pub fn new(bytes: &[u8]) -> Result<&Self, Error> {
+ // Safety: We check for validity before returning.
+ unsafe {
+ let this = Self::new_unchecked(bytes);
+ if this.is_valid() {
+ Ok(this)
+ } else {
+ Err(Error(()))
+ }
+ }
+ }
+
+ /// # Safety
+ ///
+ /// You must ensure that the secret byte length is >=
+ /// SECRET_MINIMUM_LENGTH.
+ #[inline]
+ pub const unsafe fn new_unchecked(bytes: &[u8]) -> &Self {
+ // Safety: We are `#[repr(transparent)]`. It's up to the
+ // caller to ensure the length
+ unsafe { mem::transmute(bytes) }
+ }
+
+ #[inline]
+ #[cfg(feature = "xxhash3_64")]
+ pub fn for_64(&self) -> Secret64BitView<'_> {
+ Secret64BitView(self)
+ }
+
+ #[inline]
+ #[cfg(feature = "xxhash3_128")]
+ pub fn for_128(&self) -> Secret128BitView<'_> {
+ Secret128BitView(self)
+ }
+
+ #[inline]
+ pub fn words_for_17_to_128(&self) -> &[[u8; 16]] {
+ self.reassert_preconditions();
+
+ let (words, _) = self.0.bp_as_chunks();
+ words
+ }
+
+ /// # Safety
+ ///
+ /// `i` must be less than the number of stripes in the secret
+ /// ([`Self::n_stripes`][]).
+ #[inline]
+ pub unsafe fn stripe(&self, i: usize) -> &[u8; 64] {
+ self.reassert_preconditions();
+
+ // Safety: The caller has ensured that `i` is
+ // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment.
+ unsafe {
+ debug_assert!(i < self.n_stripes());
+ &*self.0.get_unchecked(i * 8..).as_ptr().cast()
+ }
+ }
+
+ #[inline]
+ pub fn last_stripe(&self) -> &[u8; 64] {
+ self.reassert_preconditions();
+
+ self.0.last_chunk().unwrap()
+ }
+
+ #[inline]
+ pub fn last_stripe_secret_better_name(&self) -> &[u8; 64] {
+ self.reassert_preconditions();
+
+ self.0[self.0.len() - 71..].first_chunk().unwrap()
+ }
+
+ #[inline]
+ pub fn final_secret(&self) -> &[u8; 64] {
+ self.reassert_preconditions();
+
+ self.0[11..].first_chunk().unwrap()
+ }
+
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ #[inline]
+ pub fn n_stripes(&self) -> usize {
+ // stripes_per_block
+ (self.len() - 64) / 8
+ }
+
+ #[inline(always)]
+ fn reassert_preconditions(&self) {
+ // Safety: The length of the bytes was checked at value
+ // construction time.
+ unsafe {
+ debug_assert!(self.is_valid());
+ assert_unchecked(self.is_valid());
+ }
+ }
+
+ #[inline(always)]
+ pub fn is_valid(&self) -> bool {
+ self.0.len() >= SECRET_MINIMUM_LENGTH
+ }
+}
+
+#[derive(Copy, Clone)]
+#[cfg(feature = "xxhash3_64")]
+pub struct Secret64BitView<'a>(&'a Secret);
+
+#[cfg(feature = "xxhash3_64")]
+impl<'a> Secret64BitView<'a> {
+ #[inline]
+ pub fn words_for_0(self) -> [u64; 2] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b()[56..].bp_as_chunks();
+ [q[0], q[1]].map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_1_to_3(self) -> [u32; 2] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b().bp_as_chunks();
+ [q[0], q[1]].map(u32::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_4_to_8(self) -> [u64; 2] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b()[8..].bp_as_chunks();
+ [q[0], q[1]].map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_9_to_16(self) -> [u64; 4] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b()[24..].bp_as_chunks();
+ [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_127_to_240_part1(self) -> &'a [[u8; 16]] {
+ self.0.reassert_preconditions();
+
+ let (ss, _) = self.b().bp_as_chunks();
+ ss
+ }
+
+ #[inline]
+ pub fn words_for_127_to_240_part2(self) -> &'a [[u8; 16]] {
+ self.0.reassert_preconditions();
+
+ let (ss, _) = self.b()[3..].bp_as_chunks();
+ ss
+ }
+
+ #[inline]
+ pub fn words_for_127_to_240_part3(self) -> &'a [u8; 16] {
+ self.0.reassert_preconditions();
+
+ self.b()[119..].first_chunk().unwrap()
+ }
+
+ fn b(self) -> &'a [u8] {
+ &(self.0).0
+ }
+}
+
+#[derive(Copy, Clone)]
+#[cfg(feature = "xxhash3_128")]
+pub struct Secret128BitView<'a>(&'a Secret);
+
+#[cfg(feature = "xxhash3_128")]
+impl<'a> Secret128BitView<'a> {
+ #[inline]
+ pub fn words_for_0(self) -> [u64; 4] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b()[64..].bp_as_chunks();
+ [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_1_to_3(self) -> [u32; 4] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b().bp_as_chunks();
+ [q[0], q[1], q[2], q[3]].map(u32::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_4_to_8(self) -> [u64; 2] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b()[16..].bp_as_chunks();
+ [q[0], q[1]].map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_9_to_16(self) -> [u64; 4] {
+ self.0.reassert_preconditions();
+
+ let (q, _) = self.b()[32..].bp_as_chunks();
+ [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes)
+ }
+
+ #[inline]
+ pub fn words_for_127_to_240_part1(self) -> &'a [[[u8; 16]; 2]] {
+ self.0.reassert_preconditions();
+
+ pairs_of_u64_bytes(self.b())
+ }
+
+ #[inline]
+ pub fn words_for_127_to_240_part2(self) -> &'a [[[u8; 16]; 2]] {
+ self.0.reassert_preconditions();
+
+ pairs_of_u64_bytes(&self.b()[3..])
+ }
+
+ #[inline]
+ pub fn words_for_127_to_240_part3(self) -> &'a [[u8; 16]; 2] {
+ self.0.reassert_preconditions();
+
+ pairs_of_u64_bytes(&self.b()[103..]).first().unwrap()
+ }
+
+ #[inline]
+ pub fn final_secret(self) -> &'a [u8; 64] {
+ self.0.reassert_preconditions();
+
+ let b = self.b();
+ b[b.len() - 75..].first_chunk().unwrap()
+ }
+
+ fn b(self) -> &'a [u8] {
+ &(self.0).0
+ }
+}
+
+#[derive(Debug)]
+pub struct Error(());
+
+impl core::error::Error for Error {}
+
+impl core::fmt::Display for Error {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(
+ f,
+ "The secret must have at least {SECRET_MINIMUM_LENGTH} bytes"
+ )
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3/streaming.rs b/crates/twox-hash/src/xxhash3/streaming.rs
new file mode 100644
index 0000000..f8e484b
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3/streaming.rs
@@ -0,0 +1,561 @@
+use core::hint::assert_unchecked;
+
+use super::{large::INITIAL_ACCUMULATORS, *};
+
+/// A buffer containing the secret bytes.
+///
+/// # Safety
+///
+/// Must always return a slice with the same number of elements.
+pub unsafe trait FixedBuffer: AsRef<[u8]> {}
+
+/// A mutable buffer to contain the secret bytes.
+///
+/// # Safety
+///
+/// Must always return a slice with the same number of elements. The
+/// slice must always be the same as that returned from
+/// [`AsRef::as_ref`][].
+pub unsafe trait FixedMutBuffer: FixedBuffer + AsMut<[u8]> {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedBuffer for [u8; N] {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedMutBuffer for [u8; N] {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedBuffer for &[u8; N] {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedBuffer for &mut [u8; N] {}
+
+// Safety: An array will never change size.
+unsafe impl<const N: usize> FixedMutBuffer for &mut [u8; N] {}
+
+const STRIPE_BYTES: usize = 64;
+const BUFFERED_STRIPES: usize = 4;
+const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES;
+type Buffer = [u8; BUFFERED_BYTES];
+
+// Ensure that a full buffer always implies we are in the 241+ byte case.
+const _: () = assert!(BUFFERED_BYTES > CUTOFF);
+
+/// Holds secret and temporary buffers that are ensured to be
+/// appropriately sized.
+#[derive(Clone)]
+pub struct SecretBuffer<S> {
+ seed: u64,
+ secret: S,
+ buffer: Buffer,
+}
+
+impl<S> SecretBuffer<S> {
+ /// Returns the secret.
+ pub fn into_secret(self) -> S {
+ self.secret
+ }
+}
+
+impl<S> SecretBuffer<S>
+where
+ S: FixedBuffer,
+{
+ /// Takes the seed, secret, and buffer and performs no
+ /// modifications to them, only validating that the sizes are
+ /// appropriate.
+ pub fn new(seed: u64, secret: S) -> Result<Self, SecretTooShortError<S>> {
+ match Secret::new(secret.as_ref()) {
+ Ok(_) => Ok(Self {
+ seed,
+ secret,
+ buffer: [0; BUFFERED_BYTES],
+ }),
+ Err(e) => Err(SecretTooShortError(e, secret)),
+ }
+ }
+
+ #[inline(always)]
+ #[cfg(test)]
+ fn is_valid(&self) -> bool {
+ let secret = self.secret.as_ref();
+
+ secret.len() >= SECRET_MINIMUM_LENGTH
+ }
+
+ #[inline]
+ fn n_stripes(&self) -> usize {
+ Self::secret(&self.secret).n_stripes()
+ }
+
+ #[inline]
+ fn parts(&self) -> (u64, &Secret, &Buffer) {
+ (self.seed, Self::secret(&self.secret), &self.buffer)
+ }
+
+ #[inline]
+ fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) {
+ (self.seed, Self::secret(&self.secret), &mut self.buffer)
+ }
+
+ fn secret(secret: &S) -> &Secret {
+ let secret = secret.as_ref();
+ // Safety: We established the length at construction and the
+ // length is not allowed to change.
+ unsafe { Secret::new_unchecked(secret) }
+ }
+}
+
+impl<S> SecretBuffer<S>
+where
+ S: FixedMutBuffer,
+{
+ /// Fills the secret buffer with a secret derived from the seed
+ /// and the default secret. The secret must be exactly
+ /// [`DEFAULT_SECRET_LENGTH`][] bytes long.
+ pub fn with_seed(seed: u64, mut secret: S) -> Result<Self, SecretWithSeedError<S>> {
+ match <&mut DefaultSecret>::try_from(secret.as_mut()) {
+ Ok(secret_slice) => {
+ *secret_slice = DEFAULT_SECRET_RAW;
+ derive_secret(seed, secret_slice);
+
+ Ok(Self {
+ seed,
+ secret,
+ buffer: [0; BUFFERED_BYTES],
+ })
+ }
+ Err(_) => Err(SecretWithSeedError(secret)),
+ }
+ }
+}
+
+impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> {
+ /// Use the default seed and secret values while allocating nothing.
+ #[inline]
+ pub const fn default() -> Self {
+ SecretBuffer {
+ seed: DEFAULT_SEED,
+ secret: &DEFAULT_SECRET_RAW,
+ buffer: [0; BUFFERED_BYTES],
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct RawHasherCore<S> {
+ secret_buffer: SecretBuffer<S>,
+ buffer_usage: usize,
+ stripe_accumulator: StripeAccumulator,
+ total_bytes: usize,
+}
+
+impl<S> RawHasherCore<S> {
+ pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
+ Self {
+ secret_buffer,
+ buffer_usage: 0,
+ stripe_accumulator: StripeAccumulator::new(),
+ total_bytes: 0,
+ }
+ }
+
+ pub fn into_secret(self) -> S {
+ self.secret_buffer.into_secret()
+ }
+}
+
+impl<S> RawHasherCore<S>
+where
+ S: FixedBuffer,
+{
+ #[inline]
+ pub fn write(&mut self, input: &[u8]) {
+ let this = self;
+ dispatch! {
+ fn write_impl<S>(this: &mut RawHasherCore<S>, input: &[u8])
+ [S: FixedBuffer]
+ }
+ }
+
+ #[inline]
+ pub fn finish<F>(&self, finalize: F) -> F::Output
+ where
+ F: Finalize,
+ {
+ let this = self;
+ dispatch! {
+ fn finish_impl<S, F>(this: &RawHasherCore<S>, finalize: F) -> F::Output
+ [S: FixedBuffer, F: Finalize]
+ }
+ }
+}
+
+#[inline(always)]
+fn write_impl<S>(vector: impl Vector, this: &mut RawHasherCore<S>, mut input: &[u8])
+where
+ S: FixedBuffer,
+{
+ if input.is_empty() {
+ return;
+ }
+
+ let RawHasherCore {
+ secret_buffer,
+ buffer_usage,
+ stripe_accumulator,
+ total_bytes,
+ ..
+ } = this;
+
+ let n_stripes = secret_buffer.n_stripes();
+ let (_, secret, buffer) = secret_buffer.parts_mut();
+
+ *total_bytes += input.len();
+
+ // Safety: This is an invariant of the buffer.
+ unsafe {
+ debug_assert!(*buffer_usage <= buffer.len());
+ assert_unchecked(*buffer_usage <= buffer.len())
+ };
+
+ // We have some previous data saved; try to fill it up and process it first
+ if !buffer.is_empty() {
+ let remaining = &mut buffer[*buffer_usage..];
+ let n_to_copy = usize::min(remaining.len(), input.len());
+
+ let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy);
+ let (input_head, input_tail) = input.split_at(n_to_copy);
+
+ remaining_head.copy_from_slice(input_head);
+ *buffer_usage += n_to_copy;
+
+ input = input_tail;
+
+ // We did not fill up the buffer
+ if !remaining_tail.is_empty() {
+ return;
+ }
+
+ // We don't know this isn't the last of the data
+ if input.is_empty() {
+ return;
+ }
+
+ let (stripes, _) = buffer.bp_as_chunks();
+ for stripe in stripes {
+ stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
+ }
+ *buffer_usage = 0;
+ }
+
+ debug_assert!(*buffer_usage == 0);
+
+ // Process as much of the input data in-place as possible,
+ // while leaving at least one full stripe for the
+ // finalization.
+ if let Some(len) = input.len().checked_sub(STRIPE_BYTES) {
+ let full_block_point = (len / STRIPE_BYTES) * STRIPE_BYTES;
+ // Safety: We know that `full_block_point` must be less than
+ // `input.len()` as we subtracted and then integer-divided
+ // (which rounds down) and then multiplied back. That's not
+ // evident to the compiler and `split_at` results in a
+ // potential panic.
+ //
+ // https://github.com/llvm/llvm-project/issues/104827
+ let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) };
+ let (stripes, _) = stripes.bp_as_chunks();
+
+ for stripe in stripes {
+ stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret)
+ }
+ input = remainder;
+ }
+
+ // Any remaining data has to be less than the buffer, and the
+ // buffer is empty so just fill up the buffer.
+ debug_assert!(*buffer_usage == 0);
+ debug_assert!(!input.is_empty());
+
+ // Safety: We have parsed all the full blocks of input except one
+ // and potentially a full block minus one byte. That amount of
+ // data must be less than the buffer.
+ let buffer_head = unsafe {
+ debug_assert!(input.len() < 2 * STRIPE_BYTES);
+ debug_assert!(2 * STRIPE_BYTES < buffer.len());
+ buffer.get_unchecked_mut(..input.len())
+ };
+
+ buffer_head.copy_from_slice(input);
+ *buffer_usage = input.len();
+}
+
+#[inline(always)]
+fn finish_impl<S, F>(vector: impl Vector, this: &RawHasherCore<S>, finalize: F) -> F::Output
+where
+ S: FixedBuffer,
+ F: Finalize,
+{
+ let RawHasherCore {
+ ref secret_buffer,
+ buffer_usage,
+ mut stripe_accumulator,
+ total_bytes,
+ } = *this;
+
+ let n_stripes = secret_buffer.n_stripes();
+ let (seed, secret, buffer) = secret_buffer.parts();
+
+ // Safety: This is an invariant of the buffer.
+ unsafe {
+ debug_assert!(buffer_usage <= buffer.len());
+ assert_unchecked(buffer_usage <= buffer.len())
+ };
+
+ if total_bytes > CUTOFF {
+ let input = &buffer[..buffer_usage];
+
+ // Ingest final stripes
+ let (stripes, remainder) = stripes_with_tail(input);
+ for stripe in stripes {
+ stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret);
+ }
+
+ let mut temp = [0; 64];
+
+ let last_stripe = match input.last_chunk() {
+ Some(chunk) => chunk,
+ None => {
+ let n_to_reuse = 64 - input.len();
+ let to_reuse = buffer.len() - n_to_reuse;
+
+ let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse);
+ temp_head.copy_from_slice(&buffer[to_reuse..]);
+ temp_tail.copy_from_slice(input);
+
+ &temp
+ }
+ };
+
+ finalize.large(
+ vector,
+ stripe_accumulator.accumulator,
+ remainder,
+ last_stripe,
+ secret,
+ total_bytes,
+ )
+ } else {
+ finalize.small(DEFAULT_SECRET, seed, &buffer[..total_bytes])
+ }
+}
+
+pub trait Finalize {
+ type Output;
+
+ fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output;
+
+ fn large(
+ &self,
+ vector: impl Vector,
+ acc: [u64; 8],
+ last_block: &[u8],
+ last_stripe: &[u8; 64],
+ secret: &Secret,
+ len: usize,
+ ) -> Self::Output;
+}
+
+#[cfg(feature = "alloc")]
+#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
+pub mod with_alloc {
+ use ::alloc::boxed::Box;
+
+ use super::*;
+
+ // Safety: A plain slice will never change size.
+ unsafe impl FixedBuffer for Box<[u8]> {}
+
+ // Safety: A plain slice will never change size.
+ unsafe impl FixedMutBuffer for Box<[u8]> {}
+
+ type AllocSecretBuffer = SecretBuffer<Box<[u8]>>;
+
+ impl AllocSecretBuffer {
+ /// Allocates the secret and temporary buffers and fills them
+ /// with the default seed and secret values.
+ pub fn allocate_default() -> Self {
+ Self {
+ seed: DEFAULT_SEED,
+ secret: DEFAULT_SECRET_RAW.to_vec().into(),
+ buffer: [0; BUFFERED_BYTES],
+ }
+ }
+
+ /// Allocates the secret and temporary buffers and uses the
+ /// provided seed to construct the secret value.
+ pub fn allocate_with_seed(seed: u64) -> Self {
+ let mut secret = DEFAULT_SECRET_RAW;
+ derive_secret(seed, &mut secret);
+
+ Self {
+ seed,
+ secret: secret.to_vec().into(),
+ buffer: [0; BUFFERED_BYTES],
+ }
+ }
+
+ /// Allocates the temporary buffer and uses the provided seed
+ /// and secret buffer.
+ pub fn allocate_with_seed_and_secret(
+ seed: u64,
+ secret: impl Into<Box<[u8]>>,
+ ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+ Self::new(seed, secret.into())
+ }
+ }
+
+ pub type AllocRawHasher = RawHasherCore<Box<[u8]>>;
+
+ impl AllocRawHasher {
+ pub fn allocate_default() -> Self {
+ Self::new(SecretBuffer::allocate_default())
+ }
+
+ pub fn allocate_with_seed(seed: u64) -> Self {
+ Self::new(SecretBuffer::allocate_with_seed(seed))
+ }
+
+ pub fn allocate_with_seed_and_secret(
+ seed: u64,
+ secret: impl Into<Box<[u8]>>,
+ ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+ SecretBuffer::allocate_with_seed_and_secret(seed, secret).map(Self::new)
+ }
+ }
+}
+
+#[cfg(feature = "alloc")]
+pub use with_alloc::AllocRawHasher;
+
+/// Tracks which stripe we are currently on to know which part of the
+/// secret we should be using.
+#[derive(Copy, Clone)]
+pub struct StripeAccumulator {
+ pub accumulator: [u64; 8],
+ current_stripe: usize,
+}
+
+impl StripeAccumulator {
+ pub fn new() -> Self {
+ Self {
+ accumulator: INITIAL_ACCUMULATORS,
+ current_stripe: 0,
+ }
+ }
+
+ #[inline]
+ pub fn process_stripe(
+ &mut self,
+ vector: impl Vector,
+ stripe: &[u8; 64],
+ n_stripes: usize,
+ secret: &Secret,
+ ) {
+ let Self {
+ accumulator,
+ current_stripe,
+ ..
+ } = self;
+
+ // For each stripe
+
+ // Safety: The number of stripes is determined by the
+ // block size, which is determined by the secret size.
+ let secret_stripe = unsafe { secret.stripe(*current_stripe) };
+ vector.accumulate(accumulator, stripe, secret_stripe);
+
+ *current_stripe += 1;
+
+ // After a full block's worth
+ if *current_stripe == n_stripes {
+ let secret_end = secret.last_stripe();
+ vector.round_scramble(accumulator, secret_end);
+
+ *current_stripe = 0;
+ }
+ }
+}
+
+/// The provided secret was not exactly [`DEFAULT_SECRET_LENGTH`][]
+/// bytes.
+pub struct SecretWithSeedError<S>(S);
+
+impl<S> SecretWithSeedError<S> {
+ /// Returns the secret.
+ pub fn into_secret(self) -> S {
+ self.0
+ }
+}
+
+impl<S> core::error::Error for SecretWithSeedError<S> {}
+
+impl<S> core::fmt::Debug for SecretWithSeedError<S> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ f.debug_tuple("SecretWithSeedError").finish()
+ }
+}
+
+impl<S> core::fmt::Display for SecretWithSeedError<S> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(
+ f,
+ "The secret must be exactly {DEFAULT_SECRET_LENGTH} bytes"
+ )
+ }
+}
+
+/// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][]
+/// bytes.
+pub struct SecretTooShortError<S>(secret::Error, S);
+
+impl<S> SecretTooShortError<S> {
+ /// Returns the secret.
+ pub fn into_secret(self) -> S {
+ self.1
+ }
+}
+
+impl<S> core::error::Error for SecretTooShortError<S> {}
+
+impl<S> core::fmt::Debug for SecretTooShortError<S> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ f.debug_tuple("SecretTooShortError").finish()
+ }
+}
+
+impl<S> core::fmt::Display for SecretTooShortError<S> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn secret_buffer_default_is_valid() {
+ assert!(SecretBuffer::default().is_valid());
+ }
+
+ #[test]
+ fn secret_buffer_allocate_default_is_valid() {
+ assert!(SecretBuffer::allocate_default().is_valid())
+ }
+
+ #[test]
+ fn secret_buffer_allocate_with_seed_is_valid() {
+ assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid())
+ }
+}
diff --git a/crates/twox-hash/src/xxhash32.rs b/crates/twox-hash/src/xxhash32.rs
new file mode 100644
index 0000000..42ff7a1
--- /dev/null
+++ b/crates/twox-hash/src/xxhash32.rs
@@ -0,0 +1,697 @@
+//! The implementation of XXH32.
+
+use core::{
+ fmt,
+ hash::{self, BuildHasher},
+ mem,
+};
+
+use crate::{IntoU32, IntoU64};
+
+// Keeping these constants in this form to match the C code.
+const PRIME32_1: u32 = 0x9E3779B1;
+const PRIME32_2: u32 = 0x85EBCA77;
+const PRIME32_3: u32 = 0xC2B2AE3D;
+const PRIME32_4: u32 = 0x27D4EB2F;
+const PRIME32_5: u32 = 0x165667B1;
+
+type Lane = u32;
+type Lanes = [Lane; 4];
+type Bytes = [u8; 16];
+
+const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
+
+#[derive(Clone, PartialEq)]
+struct BufferData(Lanes);
+
+impl BufferData {
+ const fn new() -> Self {
+ Self([0; 4])
+ }
+
+ const fn bytes(&self) -> &Bytes {
+ const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
+ // SAFETY[bytes]: The alignment of `u32` is at least that of
+ // `u8` and all the values are initialized.
+ unsafe { &*self.0.as_ptr().cast() }
+ }
+
+ fn bytes_mut(&mut self) -> &mut Bytes {
+ // SAFETY: See SAFETY[bytes]
+ unsafe { &mut *self.0.as_mut_ptr().cast() }
+ }
+}
+
+impl fmt::Debug for BufferData {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_list().entries(self.0.iter()).finish()
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+struct Buffer {
+ offset: usize,
+ data: BufferData,
+}
+
+impl Buffer {
+ const fn new() -> Self {
+ Self {
+ offset: 0,
+ data: BufferData::new(),
+ }
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
+ // Most of the slice methods we use here have `_unchecked` variants, but
+ //
+ // 1. this method is called one time per `Hasher::write` call
+ // 2. this method early exits if we don't have anything in the buffer
+ //
+ // Because of this, removing the panics via `unsafe` doesn't
+ // have much benefit other than reducing code size by a tiny
+ // bit.
+
+ if self.offset == 0 {
+ return (None, data);
+ };
+
+ let bytes = self.data.bytes_mut();
+ debug_assert!(self.offset <= bytes.len());
+
+ let empty = &mut bytes[self.offset..];
+ let n_to_copy = usize::min(empty.len(), data.len());
+
+ let dst = &mut empty[..n_to_copy];
+
+ let (src, rest) = data.split_at(n_to_copy);
+
+ dst.copy_from_slice(src);
+ self.offset += n_to_copy;
+
+ debug_assert!(self.offset <= bytes.len());
+
+ if self.offset == bytes.len() {
+ self.offset = 0;
+ (Some(&self.data.0), rest)
+ } else {
+ (None, rest)
+ }
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn set(&mut self, data: &[u8]) {
+ if data.is_empty() {
+ return;
+ }
+
+ debug_assert_eq!(self.offset, 0);
+
+ let n_to_copy = data.len();
+
+ let bytes = self.data.bytes_mut();
+ debug_assert!(n_to_copy < bytes.len());
+
+ bytes[..n_to_copy].copy_from_slice(data);
+ self.offset = data.len();
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn remaining(&self) -> &[u8] {
+ &self.data.bytes()[..self.offset]
+ }
+}
+
+#[derive(Clone, PartialEq)]
+struct Accumulators(Lanes);
+
+impl Accumulators {
+ const fn new(seed: u32) -> Self {
+ Self([
+ seed.wrapping_add(PRIME32_1).wrapping_add(PRIME32_2),
+ seed.wrapping_add(PRIME32_2),
+ seed,
+ seed.wrapping_sub(PRIME32_1),
+ ])
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn write(&mut self, lanes: Lanes) {
+ let [acc1, acc2, acc3, acc4] = &mut self.0;
+ let [lane1, lane2, lane3, lane4] = lanes;
+
+ *acc1 = round(*acc1, lane1.to_le());
+ *acc2 = round(*acc2, lane2.to_le());
+ *acc3 = round(*acc3, lane3.to_le());
+ *acc4 = round(*acc4, lane4.to_le());
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
+ while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
+ // SAFETY: We have the right number of bytes and are
+ // handling the unaligned case.
+ let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
+ self.write(lanes);
+ data = rest;
+ }
+ data
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ const fn finish(&self) -> u32 {
+ let [acc1, acc2, acc3, acc4] = self.0;
+
+ let acc1 = acc1.rotate_left(1);
+ let acc2 = acc2.rotate_left(7);
+ let acc3 = acc3.rotate_left(12);
+ let acc4 = acc4.rotate_left(18);
+
+ acc1.wrapping_add(acc2)
+ .wrapping_add(acc3)
+ .wrapping_add(acc4)
+ }
+}
+
+impl fmt::Debug for Accumulators {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let [acc1, acc2, acc3, acc4] = self.0;
+ f.debug_struct("Accumulators")
+ .field("acc1", &acc1)
+ .field("acc2", &acc2)
+ .field("acc3", &acc3)
+ .field("acc4", &acc4)
+ .finish()
+ }
+}
+
+/// Calculates the 32-bit hash.
+///
+/// ### Caution
+///
+/// Although this struct implements [`hash::Hasher`][], it only calculates a
+/// 32-bit number, leaving the upper bits as 0. This means it is
+/// unlikely to be correct to use this in places like a [`HashMap`][std::collections::HashMap].
+#[derive(Debug, Clone, PartialEq)]
+pub struct Hasher {
+ seed: u32,
+ accumulators: Accumulators,
+ buffer: Buffer,
+ length: u64,
+}
+
+impl Default for Hasher {
+ fn default() -> Self {
+ Self::with_seed(0)
+ }
+}
+
+impl Hasher {
+ /// Hash all data at once. If you can use this function, you may
+ /// see noticable speed gains for certain types of input.
+ #[must_use]
+ // RATIONALE[inline]: Keeping parallel to the 64-bit
+ // implementation, even though the performance gains for the
+ // 32-bit version haven't been tested.
+ #[inline]
+ pub fn oneshot(seed: u32, data: &[u8]) -> u32 {
+ let len = data.len();
+
+ // Since we know that there's no more data coming, we don't
+ // need to construct the intermediate buffers or copy data to
+ // or from the buffers.
+
+ let mut accumulators = Accumulators::new(seed);
+
+ let data = accumulators.write_many(data);
+
+ Self::finish_with(seed, len.into_u64(), &accumulators, data)
+ }
+
+ /// Constructs the hasher with an initial seed.
+ #[must_use]
+ pub const fn with_seed(seed: u32) -> Self {
+ // Step 1. Initialize internal accumulators
+ Self {
+ seed,
+ accumulators: Accumulators::new(seed),
+ buffer: Buffer::new(),
+ length: 0,
+ }
+ }
+
+ /// The seed this hasher was created with.
+ pub const fn seed(&self) -> u32 {
+ self.seed
+ }
+
+ /// The total number of bytes hashed.
+ pub const fn total_len(&self) -> u64 {
+ self.length
+ }
+
+ /// The total number of bytes hashed, truncated to 32 bits.
+ ///
+ /// For the full 64-bit byte count, use [`total_len`](Self::total_len).
+ pub const fn total_len_32(&self) -> u32 {
+ self.length as u32
+ }
+
+ /// Returns the hash value for the values written so far. Unlike
+ /// [`hash::Hasher::finish`][], this method returns the actual 32-bit
+ /// value calculated, not a 64-bit value.
+ #[must_use]
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ pub fn finish_32(&self) -> u32 {
+ Self::finish_with(
+ self.seed,
+ self.length,
+ &self.accumulators,
+ self.buffer.remaining(),
+ )
+ }
+
+ #[must_use]
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 {
+ // Step 3. Accumulator convergence
+ let mut acc = if len < BYTES_IN_LANE.into_u64() {
+ seed.wrapping_add(PRIME32_5)
+ } else {
+ accumulators.finish()
+ };
+
+ // Step 4. Add input length
+ //
+ // "Note that, if input length is so large that it requires
+ // more than 32-bits, only the lower 32-bits are added to the
+ // accumulator."
+ acc += len as u32;
+
+ // Step 5. Consume remaining input
+ while let Some((chunk, rest)) = remaining.split_first_chunk() {
+ let lane = u32::from_ne_bytes(*chunk).to_le();
+
+ acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3));
+ acc = acc.rotate_left(17).wrapping_mul(PRIME32_4);
+
+ remaining = rest;
+ }
+
+ for &byte in remaining {
+ let lane = byte.into_u32();
+
+ acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5));
+ acc = acc.rotate_left(11).wrapping_mul(PRIME32_1);
+ }
+
+ // Step 6. Final mix (avalanche)
+ acc ^= acc >> 15;
+ acc = acc.wrapping_mul(PRIME32_2);
+ acc ^= acc >> 13;
+ acc = acc.wrapping_mul(PRIME32_3);
+ acc ^= acc >> 16;
+
+ acc
+ }
+}
+
+impl hash::Hasher for Hasher {
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn write(&mut self, data: &[u8]) {
+ let len = data.len();
+
+ // Step 2. Process stripes
+ let (buffered_lanes, data) = self.buffer.extend(data);
+
+ if let Some(&lanes) = buffered_lanes {
+ self.accumulators.write(lanes);
+ }
+
+ let data = self.accumulators.write_many(data);
+
+ self.buffer.set(data);
+
+ self.length += len.into_u64();
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn finish(&self) -> u64 {
+ Hasher::finish_32(self).into()
+ }
+}
+
+// RATIONALE: See RATIONALE[inline]
+#[inline]
+const fn round(mut acc: u32, lane: u32) -> u32 {
+ acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2));
+ acc = acc.rotate_left(13);
+ acc.wrapping_mul(PRIME32_1)
+}
+
+/// Constructs [`Hasher`][] for multiple hasher instances. See
+/// the [usage warning][Hasher#caution].
+#[derive(Clone)]
+pub struct State(u32);
+
+impl State {
+ /// Constructs the hasher with an initial seed.
+ pub fn with_seed(seed: u32) -> Self {
+ Self(seed)
+ }
+}
+
+impl BuildHasher for State {
+ type Hasher = Hasher;
+
+ fn build_hasher(&self) -> Self::Hasher {
+ Hasher::with_seed(self.0)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use core::{
+ array,
+ hash::{BuildHasherDefault, Hasher as _},
+ };
+ use std::collections::HashMap;
+
+ use super::*;
+
+ const _TRAITS: () = {
+ const fn is_clone<T: Clone>() {}
+ is_clone::<Hasher>();
+ is_clone::<State>();
+ };
+
+ const EMPTY_BYTES: [u8; 0] = [];
+
+ #[test]
+ fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
+ let bytes = [0; 32];
+
+ let mut byte_by_byte = Hasher::with_seed(0);
+ for byte in bytes.chunks(1) {
+ byte_by_byte.write(byte);
+ }
+ let byte_by_byte = byte_by_byte.finish();
+
+ let mut one_chunk = Hasher::with_seed(0);
+ one_chunk.write(&bytes);
+ let one_chunk = one_chunk.finish();
+
+ assert_eq!(byte_by_byte, one_chunk);
+ }
+
+ #[test]
+ fn hash_of_nothing_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(&EMPTY_BYTES);
+ assert_eq!(hasher.finish(), 0x02cc_5d05);
+ }
+
+ #[test]
+ fn hash_of_single_byte_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(&[42]);
+ assert_eq!(hasher.finish(), 0xe0fe_705f);
+ }
+
+ #[test]
+ fn hash_of_multiple_bytes_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(b"Hello, world!\0");
+ assert_eq!(hasher.finish(), 0x9e5e_7e93);
+ }
+
+ #[test]
+ fn hash_of_multiple_chunks_matches_c_implementation() {
+ let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(&bytes);
+ assert_eq!(hasher.finish(), 0x7f89_ba44);
+ }
+
+ #[test]
+ fn hash_with_different_seed_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0x42c9_1977);
+ hasher.write(&EMPTY_BYTES);
+ assert_eq!(hasher.finish(), 0xd6bf_8459);
+ }
+
+ #[test]
+ fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
+ let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+ let mut hasher = Hasher::with_seed(0x42c9_1977);
+ hasher.write(&bytes);
+ assert_eq!(hasher.finish(), 0x6d2f_6c17);
+ }
+
+ #[test]
+ fn hashes_with_different_offsets_are_the_same() {
+ let bytes = [0x7c; 4096];
+ let expected = Hasher::oneshot(0, &[0x7c; 64]);
+
+ let the_same = bytes
+ .windows(64)
+ .map(|w| {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(w);
+ hasher.finish_32()
+ })
+ .all(|h| h == expected);
+ assert!(the_same);
+ }
+
+ // This test validates wraparound/truncation behavior for very
+ // large inputs of a 32-bit hash, but runs very slowly in the
+ // normal "cargo test" build config since it hashes 4.3GB of
+ // data. It runs reasonably quick under "cargo test --release".
+ #[ignore]
+ #[test]
+ fn length_overflows_32bit() {
+ // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32.
+ let bytes200: [u8; 200] = array::from_fn(|i| i as _);
+
+ let mut hasher = Hasher::with_seed(0);
+ for _ in 0..(4_300_000_000 / bytes200.len()) {
+ hasher.write(&bytes200);
+ }
+
+ assert_eq!(hasher.total_len(), 0x0000_0001_004c_cb00);
+ assert_eq!(hasher.total_len_32(), 0x004c_cb00);
+
+ // compared against the C implementation
+ assert_eq!(hasher.finish(), 0x1522_4ca7);
+ }
+
+ #[test]
+ fn can_be_used_in_a_hashmap_with_a_default_seed() {
+ let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
+ hash.insert(42, "the answer");
+ assert_eq!(hash.get(&42), Some(&"the answer"));
+ }
+}
+
+#[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
+mod random_impl {
+ use super::*;
+
+ /// Constructs a randomized seed and reuses it for multiple hasher
+ /// instances. See the [usage warning][Hasher#caution].
+ #[derive(Clone)]
+ pub struct RandomState(State);
+
+ impl Default for RandomState {
+ fn default() -> Self {
+ Self::new()
+ }
+ }
+
+ impl RandomState {
+ fn new() -> Self {
+ Self(State::with_seed(rand::random()))
+ }
+ }
+
+ impl BuildHasher for RandomState {
+ type Hasher = Hasher;
+
+ fn build_hasher(&self) -> Self::Hasher {
+ self.0.build_hasher()
+ }
+ }
+
+ #[cfg(test)]
+ mod test {
+ use std::collections::HashMap;
+
+ use super::*;
+
+ const _: () = {
+ const fn is_clone<T: Clone>() {}
+ is_clone::<Hasher>();
+ is_clone::<RandomState>();
+ };
+
+ #[test]
+ fn can_be_used_in_a_hashmap_with_a_random_seed() {
+ let mut hash: HashMap<_, _, RandomState> = Default::default();
+ hash.insert(42, "the answer");
+ assert_eq!(hash.get(&42), Some(&"the answer"));
+ }
+ }
+}
+
+#[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
+pub use random_impl::*;
+
+#[cfg(feature = "serialize")]
+#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
+mod serialize_impl {
+ use serde::{Deserialize, Serialize};
+
+ use super::*;
+
+ impl<'de> Deserialize<'de> for Hasher {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ let shim = Deserialize::deserialize(deserializer)?;
+
+ let Shim {
+ total_len,
+ seed,
+ core,
+ buffer,
+ buffer_usage,
+ } = shim;
+ let Core { v1, v2, v3, v4 } = core;
+
+ let mut buffer_data = BufferData::new();
+ buffer_data.bytes_mut().copy_from_slice(&buffer);
+
+ Ok(Hasher {
+ seed,
+ accumulators: Accumulators([v1, v2, v3, v4]),
+ buffer: Buffer {
+ offset: buffer_usage,
+ data: buffer_data,
+ },
+ length: total_len,
+ })
+ }
+ }
+
+ impl Serialize for Hasher {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ let Hasher {
+ seed,
+ ref accumulators,
+ ref buffer,
+ length,
+ } = *self;
+ let [v1, v2, v3, v4] = accumulators.0;
+ let Buffer { offset, ref data } = *buffer;
+ let buffer = *data.bytes();
+
+ let shim = Shim {
+ total_len: length,
+ seed,
+ core: Core { v1, v2, v3, v4 },
+ buffer,
+ buffer_usage: offset,
+ };
+
+ shim.serialize(serializer)
+ }
+ }
+
+ #[derive(Serialize, Deserialize)]
+ struct Shim {
+ total_len: u64,
+ seed: u32,
+ core: Core,
+ buffer: [u8; 16],
+ buffer_usage: usize,
+ }
+
+ #[derive(Serialize, Deserialize)]
+ struct Core {
+ v1: u32,
+ v2: u32,
+ v3: u32,
+ v4: u32,
+ }
+
+ #[cfg(test)]
+ mod test {
+ use std::hash::Hasher as _;
+
+ use super::*;
+
+ type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
+
+ #[test]
+ fn test_serialization_cycle() -> Result {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(b"Hello, world!\0");
+ let _ = hasher.finish();
+
+ let serialized = serde_json::to_string(&hasher)?;
+ let unserialized: Hasher = serde_json::from_str(&serialized)?;
+ assert_eq!(hasher, unserialized);
+ Ok(())
+ }
+
+ #[test]
+ fn test_serialization_stability() -> Result {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(b"Hello, world!\0");
+ let _ = hasher.finish();
+
+ let expected_serialized = r#"{
+ "total_len": 14,
+ "seed": 0,
+ "core": {
+ "v1": 606290984,
+ "v2": 2246822519,
+ "v3": 0,
+ "v4": 1640531535
+ },
+ "buffer": [
+ 72, 101, 108, 108, 111, 44, 32, 119,
+ 111, 114, 108, 100, 33, 0, 0, 0
+ ],
+ "buffer_usage": 14
+ }"#;
+
+ let unserialized: Hasher = serde_json::from_str(expected_serialized)?;
+ assert_eq!(hasher, unserialized);
+
+ let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
+ let actual_value = serde_json::to_value(&hasher)?;
+ assert_eq!(expected_value, actual_value);
+
+ Ok(())
+ }
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3_128.rs b/crates/twox-hash/src/xxhash3_128.rs
new file mode 100644
index 0000000..d762437
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3_128.rs
@@ -0,0 +1,648 @@
+//! The implementation of XXH3_128.
+
+#![deny(
+ clippy::missing_safety_doc,
+ clippy::undocumented_unsafe_blocks,
+ unsafe_op_in_unsafe_fn
+)]
+
+use crate::{
+ xxhash3::{primes::*, *},
+ IntoU128 as _, IntoU64 as _,
+};
+
+pub use crate::xxhash3::{
+ FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError,
+ SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH,
+};
+
+/// Calculates the 128-bit hash.
+#[derive(Clone)]
+/// TODO: does not implement hash.
+pub struct Hasher {
+ #[cfg(feature = "alloc")]
+ inner: AllocRawHasher,
+ _private: (),
+}
+
+impl Hasher {
+ /// Hash all data at once. If you can use this function, you may
+ /// see noticable speed gains for certain types of input.
+ #[must_use]
+ #[inline]
+ pub fn oneshot(input: &[u8]) -> u128 {
+ impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
+ }
+
+ /// Hash all data at once using the provided seed and a secret
+ /// derived from the seed. If you can use this function, you may
+ /// see noticable speed gains for certain types of input.
+ #[must_use]
+ #[inline]
+ pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u128 {
+ let mut secret = DEFAULT_SECRET_RAW;
+
+ // We know that the secret will only be used if we have more
+ // than 240 bytes, so don't waste time computing it otherwise.
+ if input.len() > CUTOFF {
+ derive_secret(seed, &mut secret);
+ }
+
+ let secret = Secret::new(&secret).expect("The default secret length is invalid");
+
+ impl_oneshot(secret, seed, input)
+ }
+
+ /// Hash all data at once using the provided secret and the
+ /// default seed. If you can use this function, you may see
+ /// noticable speed gains for certain types of input.
+ #[inline]
+ pub fn oneshot_with_secret(
+ secret: &[u8],
+ input: &[u8],
+ ) -> Result<u128, OneshotWithSecretError> {
+ let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
+ Ok(impl_oneshot(secret, DEFAULT_SEED, input))
+ }
+
+ /// Hash all data at once using the provided seed and secret. If
+ /// you can use this function, you may see noticable speed gains
+ /// for certain types of input.
+ #[inline]
+ pub fn oneshot_with_seed_and_secret(
+ seed: u64,
+ secret: &[u8],
+ input: &[u8],
+ ) -> Result<u128, OneshotWithSecretError> {
+ let secret = if input.len() > CUTOFF {
+ Secret::new(secret).map_err(OneshotWithSecretError)?
+ } else {
+ DEFAULT_SECRET
+ };
+
+ Ok(impl_oneshot(secret, seed, input))
+ }
+}
+#[cfg(feature = "alloc")]
+#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
+mod with_alloc {
+ use ::alloc::boxed::Box;
+
+ use super::*;
+
+ impl Hasher {
+ /// Constructs the hasher using the default seed and secret values.
+ pub fn new() -> Self {
+ Self {
+ inner: RawHasherCore::allocate_default(),
+ _private: (),
+ }
+ }
+
+ /// Constructs the hasher using the provided seed and a secret
+ /// derived from the seed.
+ pub fn with_seed(seed: u64) -> Self {
+ Self {
+ inner: RawHasherCore::allocate_with_seed(seed),
+ _private: (),
+ }
+ }
+
+ /// Constructs the hasher using the provided seed and secret.
+ pub fn with_seed_and_secret(
+ seed: u64,
+ secret: impl Into<Box<[u8]>>,
+ ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+ Ok(Self {
+ inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?,
+ _private: (),
+ })
+ }
+
+ /// Returns the secret.
+ pub fn into_secret(self) -> Box<[u8]> {
+ self.inner.into_secret()
+ }
+
+ /// Writes some data into this `Hasher`.
+ #[inline]
+ pub fn write(&mut self, input: &[u8]) {
+ self.inner.write(input);
+ }
+
+ /// Returns the hash value for the values written so
+ /// far. Unlike [`std::hash::Hasher::finish`][], this method
+ /// returns the complete 128-bit value calculated, not a
+ /// 64-bit value.
+ #[inline]
+ pub fn finish_128(&self) -> u128 {
+ self.inner.finish(Finalize128)
+ }
+ }
+
+ impl Default for Hasher {
+ fn default() -> Self {
+ Self::new()
+ }
+ }
+}
+
+#[derive(Clone)]
+/// A lower-level interface for computing a hash from streaming data.
+///
+/// The algorithm requires a secret which can be a reasonably large
+/// piece of data. [`Hasher`][] makes one concrete implementation
+/// decision that uses dynamic memory allocation, but specialized
+/// usages may desire more flexibility. This type, combined with
+/// [`SecretBuffer`][], offer that flexibility at the cost of a
+/// generic type.
+pub struct RawHasher<S>(RawHasherCore<S>);
+
+impl<S> RawHasher<S> {
+ /// Construct the hasher with the provided seed, secret, and
+ /// temporary buffer.
+ pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
+ Self(RawHasherCore::new(secret_buffer))
+ }
+
+ /// Returns the secret.
+ pub fn into_secret(self) -> S {
+ self.0.into_secret()
+ }
+}
+
+impl<S> RawHasher<S>
+where
+ S: FixedBuffer,
+{
+ /// Writes some data into this `Hasher`.
+ #[inline]
+ pub fn write(&mut self, input: &[u8]) {
+ self.0.write(input);
+ }
+
+ /// Returns the hash value for the values written so
+ /// far. Unlike [`std::hash::Hasher::finish`][], this method
+ /// returns the complete 128-bit value calculated, not a
+ /// 64-bit value.
+ #[inline]
+ pub fn finish_128(&self) -> u128 {
+ self.0.finish(Finalize128)
+ }
+}
+
+struct Finalize128;
+
+impl Finalize for Finalize128 {
+ type Output = u128;
+
+ #[inline]
+ fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output {
+ impl_oneshot(secret, seed, input)
+ }
+
+ #[inline]
+ fn large(
+ &self,
+ vector: impl Vector,
+ acc: [u64; 8],
+ last_block: &[u8],
+ last_stripe: &[u8; 64],
+ secret: &Secret,
+ len: usize,
+ ) -> Self::Output {
+ Algorithm(vector).finalize_128(acc, last_block, last_stripe, secret, len)
+ }
+}
+
+#[inline(always)]
+fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
+ match input.len() {
+ 241.. => impl_241_plus_bytes(secret, input),
+
+ 129..=240 => impl_129_to_240_bytes(secret, seed, input),
+
+ 17..=128 => impl_17_to_128_bytes(secret, seed, input),
+
+ 9..=16 => impl_9_to_16_bytes(secret, seed, input),
+
+ 4..=8 => impl_4_to_8_bytes(secret, seed, input),
+
+ 1..=3 => impl_1_to_3_bytes(secret, seed, input),
+
+ 0 => impl_0_bytes(secret, seed),
+ }
+}
+
+#[inline(always)]
+fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 {
+ let secret_words = secret.for_128().words_for_0();
+
+ let low = avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]);
+ let high = avalanche_xxh64(seed ^ secret_words[2] ^ secret_words[3]);
+
+ X128 { low, high }.into()
+}
+
+#[inline(always)]
+fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
+ assert_input_range!(1..=3, input.len());
+
+ let combined = impl_1_to_3_bytes_combined(input);
+ let secret_words = secret.for_128().words_for_1_to_3();
+
+ let low = {
+ let secret = (secret_words[0] ^ secret_words[1]).into_u64();
+ secret.wrapping_add(seed) ^ combined.into_u64()
+ };
+ let high = {
+ let secret = (secret_words[2] ^ secret_words[3]).into_u64();
+ secret.wrapping_sub(seed) ^ combined.swap_bytes().rotate_left(13).into_u64()
+ };
+
+ let low = avalanche_xxh64(low);
+ let high = avalanche_xxh64(high);
+
+ X128 { low, high }.into()
+}
+
+#[inline(always)]
+fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
+ assert_input_range!(4..=8, input.len());
+ let input_first = input.first_u32().unwrap();
+ let input_last = input.last_u32().unwrap();
+
+ let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
+ let secret_words = secret.for_128().words_for_4_to_8();
+
+ let combined = input_first.into_u64() | (input_last.into_u64() << 32);
+ let lhs = {
+ let a = secret_words[0] ^ secret_words[1];
+ let b = a.wrapping_add(modified_seed);
+ b ^ combined
+ };
+ let rhs = PRIME64_1.wrapping_add(input.len().into_u64() << 2);
+ let mul_result = lhs.into_u128().wrapping_mul(rhs.into_u128());
+
+ let mut high = mul_result.upper_half();
+ let mut low = mul_result.lower_half();
+
+ high = high.wrapping_add(low << 1);
+
+ low ^= high >> 3;
+ low ^= low >> 35;
+ low = low.wrapping_mul(PRIME_MX2);
+ low ^= low >> 28;
+
+ high = avalanche(high);
+
+ X128 { low, high }.into()
+}
+
+#[inline(always)]
+fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
+ assert_input_range!(9..=16, input.len());
+ let input_first = input.first_u64().unwrap();
+ let input_last = input.last_u64().unwrap();
+
+ let secret_words = secret.for_128().words_for_9_to_16();
+ let val1 = ((secret_words[0] ^ secret_words[1]).wrapping_sub(seed)) ^ input_first ^ input_last;
+ let val2 = ((secret_words[2] ^ secret_words[3]).wrapping_add(seed)) ^ input_last;
+ let mul_result = val1.into_u128().wrapping_mul(PRIME64_1.into_u128());
+ let low = mul_result
+ .lower_half()
+ .wrapping_add((input.len() - 1).into_u64() << 54);
+
+ // Algorithm describes this in two ways
+ let high = mul_result
+ .upper_half()
+ .wrapping_add(val2.upper_half().into_u64() << 32)
+ .wrapping_add(val2.lower_half().into_u64().wrapping_mul(PRIME32_2));
+
+ let low = low ^ high.swap_bytes();
+
+ // Algorithm describes this multiplication in two ways.
+ let q = X128 { low, high }
+ .into_u128()
+ .wrapping_mul(PRIME64_2.into_u128());
+
+ let low = avalanche(q.lower_half());
+ let high = avalanche(q.upper_half());
+
+ X128 { low, high }.into()
+}
+
+#[inline]
+fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
+ assert_input_range!(17..=128, input.len());
+ let input_len = input.len().into_u64();
+ let mut acc = [input_len.wrapping_mul(PRIME64_1), 0];
+
+ impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| {
+ mix_two_chunks(&mut acc, fwd, bwd, secret, seed);
+ });
+
+ finalize_medium(acc, input_len, seed)
+}
+
+#[inline]
+fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 {
+ assert_input_range!(129..=240, input.len());
+ let input_len = input.len().into_u64();
+ let mut acc = [input_len.wrapping_mul(PRIME64_1), 0];
+
+ let head = pairs_of_u64_bytes(input);
+ let mut head = head.iter();
+
+ let ss = secret.for_128().words_for_127_to_240_part1();
+ for (input, secret) in head.by_ref().zip(ss).take(4) {
+ mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed);
+ }
+
+ let mut acc = acc.map(avalanche);
+
+ let ss = secret.for_128().words_for_127_to_240_part2();
+ for (input, secret) in head.zip(ss) {
+ mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed);
+ }
+
+ let (_, tail) = input.bp_as_rchunks::<16>();
+ let (_, tail) = tail.bp_as_rchunks::<2>();
+ let tail = tail.last().unwrap();
+ let ss = secret.for_128().words_for_127_to_240_part3();
+
+ // note that the half-chunk order and the seed is different here
+ mix_two_chunks(&mut acc, &tail[1], &tail[0], ss, seed.wrapping_neg());
+
+ finalize_medium(acc, input_len, seed)
+}
+
+#[inline]
+fn mix_two_chunks(
+ acc: &mut [u64; 2],
+ data1: &[u8; 16],
+ data2: &[u8; 16],
+ secret: &[[u8; 16]; 2],
+ seed: u64,
+) {
+ let data_words1 = to_u64s(data1);
+ let data_words2 = to_u64s(data2);
+
+ acc[0] = acc[0].wrapping_add(mix_step(data1, &secret[0], seed));
+ acc[1] = acc[1].wrapping_add(mix_step(data2, &secret[1], seed));
+ acc[0] ^= data_words2[0].wrapping_add(data_words2[1]);
+ acc[1] ^= data_words1[0].wrapping_add(data_words1[1]);
+}
+
+#[inline]
+fn finalize_medium(acc: [u64; 2], input_len: u64, seed: u64) -> u128 {
+ let low = acc[0].wrapping_add(acc[1]);
+ let high = acc[0]
+ .wrapping_mul(PRIME64_1)
+ .wrapping_add(acc[1].wrapping_mul(PRIME64_4))
+ .wrapping_add((input_len.wrapping_sub(seed)).wrapping_mul(PRIME64_2));
+
+ let low = avalanche(low);
+ let high = avalanche(high).wrapping_neg();
+
+ X128 { low, high }.into()
+}
+
+#[inline]
+fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u128 {
+ assert_input_range!(241.., input.len());
+ dispatch! {
+ fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u128
+ []
+ }
+}
+
+#[inline]
+fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u128 {
+ Algorithm(vector).oneshot(secret, input, Finalize128)
+}
+
+#[cfg(test)]
+mod test {
+ use crate::xxhash3::test::bytes;
+
+ use super::*;
+
+ const _: () = {
+ const fn is_clone<T: Clone>() {}
+ is_clone::<Hasher>();
+ };
+
+ const EMPTY_BYTES: [u8; 0] = [];
+
+ fn hash_byte_by_byte(input: &[u8]) -> u128 {
+ let mut hasher = Hasher::new();
+ for byte in input.chunks(1) {
+ hasher.write(byte)
+ }
+ hasher.finish_128()
+ }
+
+ #[test]
+ fn oneshot_empty() {
+ let hash = Hasher::oneshot(&EMPTY_BYTES);
+ assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f);
+ }
+
+ #[test]
+ fn streaming_empty() {
+ let hash = hash_byte_by_byte(&EMPTY_BYTES);
+ assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f);
+ }
+
+ #[test]
+ fn oneshot_1_to_3_bytes() {
+ test_1_to_3_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_1_to_3_bytes() {
+ test_1_to_3_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u128) {
+ let inputs = bytes![1, 2, 3];
+
+ let expected = [
+ 0xa6cd_5e93_9200_0f6a_c44b_dff4_074e_ecdb,
+ 0x6a4a_5274_c1b0_d3ad_d664_5fc3_051a_9457,
+ 0xe3b5_5f57_945a_17cf_5f42_99fc_161c_9cbb,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_4_to_8_bytes() {
+ test_4_to_8_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_4_to_8_bytes() {
+ test_4_to_8_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u128) {
+ let inputs = bytes![4, 5, 6, 7, 8];
+
+ let expected = [
+ 0xeb70_bf5f_c779_e9e6_a611_1d53_e80a_3db5,
+ 0x9434_5321_06a7_c141_c920_d234_7a85_929b,
+ 0x545f_093d_32b1_68fe_a6b5_2f4d_ea38_96a3,
+ 0x61ce_291b_c3a4_357d_dbb2_0782_1e6d_5efe,
+ 0xe1e4_432a_6221_7fe4_cfd5_0c61_c8bb_98c1,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_9_to_16_bytes() {
+ test_9_to_16_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_9_to_16_bytes() {
+ test_9_to_16_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u128) {
+ let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
+
+ let expected = [
+ 0x16c7_69d8_3e4a_ebce_9079_3197_9dca_3746,
+ 0xbd93_0669_a87b_4b37_e67b_f1ad_8dcf_73a8,
+ 0xacad_8071_8f47_d494_7d67_cfc1_730f_22a3,
+ 0x38f9_2247_a7f7_3cc5_7780_eb31_198f_13ca,
+ 0xae92_e123_e947_2408_bd79_5526_1902_66c0,
+ 0x5f91_e6bf_7418_cfaa_55d6_5715_e2a5_7c31,
+ 0x301a_9f75_4e8f_569a_0017_ea4b_e19b_c787,
+ 0x7295_0631_8276_07e2_8428_12cc_870d_cae2,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_17_to_128_bytes() {
+ test_17_to_128_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_17_to_128_bytes() {
+ test_17_to_128_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u128) {
+ let lower_boundary = bytes![17, 18, 19];
+ let chunk_boundary = bytes![31, 32, 33];
+ let upper_boundary = bytes![126, 127, 128];
+
+ let inputs = lower_boundary
+ .iter()
+ .chain(chunk_boundary)
+ .chain(upper_boundary);
+
+ let expected = [
+ // lower_boundary
+ 0x685b_c458_b37d_057f_c06e_233d_f772_9217,
+ 0x87ce_996b_b557_6d8d_e3a3_c96b_b0af_2c23,
+ 0x7619_bcef_2e31_1cd8_c47d_dc58_8737_93df,
+ // chunk_boundary
+ 0x4ed3_946d_393b_687b_b54d_e399_3874_ed20,
+ 0x25e7_c9b3_424c_eed2_457d_9566_b6fc_d697,
+ 0x0217_5c3a_abb0_0637_e08d_8495_1339_de86,
+ // upper_boundary
+ 0x0abc_2062_87ce_2afe_5181_0be2_9323_2106,
+ 0xd5ad_d870_c9c9_e00f_060c_2e3d_df0f_2fb9,
+ 0x1479_2fc3_af88_dc6c_0532_1a0b_64d6_7b41,
+ ];
+
+ for (input, expected) in inputs.zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_129_to_240_bytes() {
+ test_129_to_240_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_129_to_240_bytes() {
+ test_129_to_240_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u128) {
+ let lower_boundary = bytes![129, 130, 131];
+ let upper_boundary = bytes![238, 239, 240];
+
+ let inputs = lower_boundary.iter().chain(upper_boundary);
+
+ let expected = [
+ // lower_boundary
+ 0xdd5e_74ac_6b45_f54e_bc30_b633_82b0_9a3b,
+ 0x6cd2_e56a_10f1_e707_3ec5_f135_d0a7_d28f,
+ 0x6da7_92f1_702d_4494_5609_cfc7_9dba_18fd,
+ // upper_boundary
+ 0x73a9_e8f7_bd32_83c8_2a9b_ddd0_e5c4_014c,
+ 0x9843_ab31_a06b_e0df_fe21_3746_28fc_c539,
+ 0x65b5_be86_da55_40e7_c92b_68e1_6f83_bbb6,
+ ];
+
+ for (input, expected) in inputs.zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_241_plus_bytes() {
+ test_241_plus_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_241_plus_bytes() {
+ test_241_plus_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u128) {
+ let inputs = bytes![241, 242, 243, 244, 1024, 10240];
+
+ let expected = [
+ 0x1da1_cb61_bcb8_a2a1_02e8_cd95_421c_6d02,
+ 0x1623_84cb_44d1_d806_ddcb_33c4_9405_1832,
+ 0xbd2e_9fcf_378c_35e9_8835_f952_9193_e3dc,
+ 0x3ff4_93d7_a813_7ab6_bc17_c91e_c3cf_8d7f,
+ 0xd0ac_1f7b_93bf_57b9_e5d7_8baf_a45b_2aa5,
+ 0x4f63_75cc_a7ec_e1e1_bcd6_3266_df6e_2244,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ eprintln!("{hash:032x}\n{expected:032x}");
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+}
diff --git a/crates/twox-hash/src/xxhash3_64.rs b/crates/twox-hash/src/xxhash3_64.rs
new file mode 100644
index 0000000..2e967e4
--- /dev/null
+++ b/crates/twox-hash/src/xxhash3_64.rs
@@ -0,0 +1,611 @@
+//! The implementation of XXH3_64.
+
+#![deny(
+ clippy::missing_safety_doc,
+ clippy::undocumented_unsafe_blocks,
+ unsafe_op_in_unsafe_fn
+)]
+
+use core::hash;
+
+use crate::{
+ xxhash3::{primes::*, *},
+ IntoU128 as _, IntoU64 as _,
+};
+
+pub use crate::xxhash3::{
+ FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError,
+ SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH,
+};
+
+/// Calculates the 64-bit hash.
+#[derive(Clone)]
+pub struct Hasher {
+ #[cfg(feature = "alloc")]
+ inner: AllocRawHasher,
+ _private: (),
+}
+
+impl Hasher {
+ /// Hash all data at once. If you can use this function, you may
+ /// see noticable speed gains for certain types of input.
+ #[must_use]
+ #[inline]
+ pub fn oneshot(input: &[u8]) -> u64 {
+ impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input)
+ }
+
+ /// Hash all data at once using the provided seed and a secret
+ /// derived from the seed. If you can use this function, you may
+ /// see noticable speed gains for certain types of input.
+ #[must_use]
+ #[inline]
+ pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 {
+ let mut secret = DEFAULT_SECRET_RAW;
+
+ // We know that the secret will only be used if we have more
+ // than 240 bytes, so don't waste time computing it otherwise.
+ if input.len() > CUTOFF {
+ derive_secret(seed, &mut secret);
+ }
+
+ let secret = Secret::new(&secret).expect("The default secret length is invalid");
+
+ impl_oneshot(secret, seed, input)
+ }
+
+ /// Hash all data at once using the provided secret and the
+ /// default seed. If you can use this function, you may see
+ /// noticable speed gains for certain types of input.
+ #[inline]
+ pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result<u64, OneshotWithSecretError> {
+ let secret = Secret::new(secret).map_err(OneshotWithSecretError)?;
+ Ok(impl_oneshot(secret, DEFAULT_SEED, input))
+ }
+
+ /// Hash all data at once using the provided seed and secret. If
+ /// you can use this function, you may see noticable speed gains
+ /// for certain types of input.
+ #[inline]
+ pub fn oneshot_with_seed_and_secret(
+ seed: u64,
+ secret: &[u8],
+ input: &[u8],
+ ) -> Result<u64, OneshotWithSecretError> {
+ let secret = if input.len() > CUTOFF {
+ Secret::new(secret).map_err(OneshotWithSecretError)?
+ } else {
+ DEFAULT_SECRET
+ };
+
+ Ok(impl_oneshot(secret, seed, input))
+ }
+}
+
+#[cfg(feature = "alloc")]
+#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
+mod with_alloc {
+ use ::alloc::boxed::Box;
+
+ use super::*;
+
+ impl Hasher {
+ /// Constructs the hasher using the default seed and secret values.
+ pub fn new() -> Self {
+ Self {
+ inner: RawHasherCore::allocate_default(),
+ _private: (),
+ }
+ }
+
+ /// Constructs the hasher using the provided seed and a secret
+ /// derived from the seed.
+ pub fn with_seed(seed: u64) -> Self {
+ Self {
+ inner: RawHasherCore::allocate_with_seed(seed),
+ _private: (),
+ }
+ }
+
+ /// Constructs the hasher using the provided seed and secret.
+ pub fn with_seed_and_secret(
+ seed: u64,
+ secret: impl Into<Box<[u8]>>,
+ ) -> Result<Self, SecretTooShortError<Box<[u8]>>> {
+ Ok(Self {
+ inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?,
+ _private: (),
+ })
+ }
+
+ /// Returns the secret.
+ pub fn into_secret(self) -> Box<[u8]> {
+ self.inner.into_secret()
+ }
+ }
+
+ impl Default for Hasher {
+ fn default() -> Self {
+ Self::new()
+ }
+ }
+
+ impl hash::Hasher for Hasher {
+ #[inline]
+ fn write(&mut self, input: &[u8]) {
+ self.inner.write(input)
+ }
+
+ #[inline]
+ fn finish(&self) -> u64 {
+ self.inner.finish(Finalize64)
+ }
+ }
+}
+
+#[derive(Clone)]
+/// A lower-level interface for computing a hash from streaming data.
+///
+/// The algorithm requires a secret which can be a reasonably large
+/// piece of data. [`Hasher`][] makes one concrete implementation
+/// decision that uses dynamic memory allocation, but specialized
+/// usages may desire more flexibility. This type, combined with
+/// [`SecretBuffer`][], offer that flexibility at the cost of a
+/// generic type.
+pub struct RawHasher<S>(RawHasherCore<S>);
+
+impl<S> RawHasher<S> {
+ /// Construct the hasher with the provided seed, secret, and
+ /// temporary buffer.
+ pub fn new(secret_buffer: SecretBuffer<S>) -> Self {
+ Self(RawHasherCore::new(secret_buffer))
+ }
+
+ /// Returns the secret.
+ pub fn into_secret(self) -> S {
+ self.0.into_secret()
+ }
+}
+
+impl<S> hash::Hasher for RawHasher<S>
+where
+ S: FixedBuffer,
+{
+ #[inline]
+ fn write(&mut self, input: &[u8]) {
+ self.0.write(input);
+ }
+
+ #[inline]
+ fn finish(&self) -> u64 {
+ self.0.finish(Finalize64)
+ }
+}
+
+struct Finalize64;
+
+impl Finalize for Finalize64 {
+ type Output = u64;
+
+ #[inline(always)]
+ fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output {
+ impl_oneshot(secret, seed, input)
+ }
+
+ #[inline(always)]
+ fn large(
+ &self,
+ vector: impl Vector,
+ acc: [u64; 8],
+ last_block: &[u8],
+ last_stripe: &[u8; 64],
+ secret: &Secret,
+ len: usize,
+ ) -> Self::Output {
+ Algorithm(vector).finalize_64(acc, last_block, last_stripe, secret, len)
+ }
+}
+
+#[inline(always)]
+fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+ match input.len() {
+ 241.. => impl_241_plus_bytes(secret, input),
+
+ 129..=240 => impl_129_to_240_bytes(secret, seed, input),
+
+ 17..=128 => impl_17_to_128_bytes(secret, seed, input),
+
+ 9..=16 => impl_9_to_16_bytes(secret, seed, input),
+
+ 4..=8 => impl_4_to_8_bytes(secret, seed, input),
+
+ 1..=3 => impl_1_to_3_bytes(secret, seed, input),
+
+ 0 => impl_0_bytes(secret, seed),
+ }
+}
+
+#[inline(always)]
+fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 {
+ let secret_words = secret.for_64().words_for_0();
+ avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1])
+}
+
+#[inline(always)]
+fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+ assert_input_range!(1..=3, input.len());
+ let combined = impl_1_to_3_bytes_combined(input);
+
+ let secret_words = secret.for_64().words_for_1_to_3();
+
+ let value = {
+ let secret = (secret_words[0] ^ secret_words[1]).into_u64();
+ secret.wrapping_add(seed) ^ combined.into_u64()
+ };
+
+ // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result."
+ avalanche_xxh64(value)
+}
+
+#[inline(always)]
+fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+ assert_input_range!(4..=8, input.len());
+ let input_first = input.first_u32().unwrap();
+ let input_last = input.last_u32().unwrap();
+
+ let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32);
+ let secret_words = secret.for_64().words_for_4_to_8();
+
+ let combined = input_last.into_u64() | (input_first.into_u64() << 32);
+
+ let mut value = {
+ let a = secret_words[0] ^ secret_words[1];
+ let b = a.wrapping_sub(modified_seed);
+ b ^ combined
+ };
+ value ^= value.rotate_left(49) ^ value.rotate_left(24);
+ value = value.wrapping_mul(PRIME_MX2);
+ value ^= (value >> 35).wrapping_add(input.len().into_u64());
+ value = value.wrapping_mul(PRIME_MX2);
+ value ^= value >> 28;
+ value
+}
+
+#[inline(always)]
+fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+ assert_input_range!(9..=16, input.len());
+ let input_first = input.first_u64().unwrap();
+ let input_last = input.last_u64().unwrap();
+
+ let secret_words = secret.for_64().words_for_9_to_16();
+ let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first;
+ let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last;
+ let mul_result = low.into_u128().wrapping_mul(high.into_u128());
+ let value = input
+ .len()
+ .into_u64()
+ .wrapping_add(low.swap_bytes())
+ .wrapping_add(high)
+ .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half());
+
+ avalanche(value)
+}
+
+#[inline]
+fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+ assert_input_range!(17..=128, input.len());
+ let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
+
+ impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| {
+ acc = acc.wrapping_add(mix_step(fwd, &secret[0], seed));
+ acc = acc.wrapping_add(mix_step(bwd, &secret[1], seed));
+ });
+
+ avalanche(acc)
+}
+
+#[inline]
+fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 {
+ assert_input_range!(129..=240, input.len());
+ let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1);
+
+ let (head, _) = input.bp_as_chunks();
+ let mut head = head.iter();
+
+ let ss = secret.for_64().words_for_127_to_240_part1();
+ for (chunk, secret) in head.by_ref().zip(ss).take(8) {
+ acc = acc.wrapping_add(mix_step(chunk, secret, seed));
+ }
+
+ acc = avalanche(acc);
+
+ let ss = secret.for_64().words_for_127_to_240_part2();
+ for (chunk, secret) in head.zip(ss) {
+ acc = acc.wrapping_add(mix_step(chunk, secret, seed));
+ }
+
+ let last_chunk = input.last_chunk().unwrap();
+ let ss = secret.for_64().words_for_127_to_240_part3();
+ acc = acc.wrapping_add(mix_step(last_chunk, ss, seed));
+
+ avalanche(acc)
+}
+
+#[inline]
+fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 {
+ assert_input_range!(241.., input.len());
+ dispatch! {
+ fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64
+ []
+ }
+}
+
+#[inline]
+fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 {
+ Algorithm(vector).oneshot(secret, input, Finalize64)
+}
+
+#[cfg(test)]
+mod test {
+ use std::hash::Hasher as _;
+
+ use crate::xxhash3::test::bytes;
+
+ use super::*;
+
+ const _: () = {
+ const fn is_clone<T: Clone>() {}
+ is_clone::<Hasher>();
+ };
+
+ const EMPTY_BYTES: [u8; 0] = [];
+
+ fn hash_byte_by_byte(input: &[u8]) -> u64 {
+ let mut hasher = Hasher::new();
+ for byte in input.chunks(1) {
+ hasher.write(byte)
+ }
+ hasher.finish()
+ }
+
+ fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 {
+ let mut hasher = Hasher::with_seed(seed);
+ for byte in input.chunks(1) {
+ hasher.write(byte)
+ }
+ hasher.finish()
+ }
+
+ #[test]
+ fn oneshot_empty() {
+ let hash = Hasher::oneshot(&EMPTY_BYTES);
+ assert_eq!(hash, 0x2d06_8005_38d3_94c2);
+ }
+
+ #[test]
+ fn streaming_empty() {
+ let hash = hash_byte_by_byte(&EMPTY_BYTES);
+ assert_eq!(hash, 0x2d06_8005_38d3_94c2);
+ }
+
+ #[test]
+ fn oneshot_1_to_3_bytes() {
+ test_1_to_3_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_1_to_3_bytes() {
+ test_1_to_3_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) {
+ let inputs = bytes![1, 2, 3];
+
+ let expected = [
+ 0xc44b_dff4_074e_ecdb,
+ 0xd664_5fc3_051a_9457,
+ 0x5f42_99fc_161c_9cbb,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_4_to_8_bytes() {
+ test_4_to_8_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_4_to_8_bytes() {
+ test_4_to_8_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) {
+ let inputs = bytes![4, 5, 6, 7, 8];
+
+ let expected = [
+ 0x60da_b036_a582_11f2,
+ 0xb075_753a_84ca_0fbe,
+ 0xa658_4d1d_9a6a_e704,
+ 0x0cd2_084a_6240_6b69,
+ 0x3a1c_2d7c_85af_88f8,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_9_to_16_bytes() {
+ test_9_to_16_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_9_to_16_bytes() {
+ test_9_to_16_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) {
+ let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16];
+
+ let expected = [
+ 0xe961_2598_145b_b9dc,
+ 0xab69_a08e_f83d_8f77,
+ 0x1cf3_96aa_4de6_198d,
+ 0x5ace_6a51_1c10_894b,
+ 0xb7a5_d8a8_309a_2cb9,
+ 0x4cf4_5c94_4a9a_2237,
+ 0x55ec_edc2_b87b_b042,
+ 0x8355_e3a6_f617_70db,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_17_to_128_bytes() {
+ test_17_to_128_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_17_to_128_bytes() {
+ test_17_to_128_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) {
+ let lower_boundary = bytes![17, 18, 19];
+ let chunk_boundary = bytes![31, 32, 33];
+ let upper_boundary = bytes![126, 127, 128];
+
+ let inputs = lower_boundary
+ .iter()
+ .chain(chunk_boundary)
+ .chain(upper_boundary);
+
+ let expected = [
+ // lower_boundary
+ 0x9ef3_41a9_9de3_7328,
+ 0xf691_2490_d4c0_eed5,
+ 0x60e7_2614_3cf5_0312,
+ // chunk_boundary
+ 0x4f36_db8e_4df3_78fd,
+ 0x3523_581f_e96e_4c05,
+ 0xe68c_56ba_8899_1e58,
+ // upper_boundary
+ 0x6c2a_9eb7_459c_dc61,
+ 0x120b_9787_f842_5f2f,
+ 0x85c6_174c_7ff4_c46b,
+ ];
+
+ for (input, expected) in inputs.zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_129_to_240_bytes() {
+ test_129_to_240_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_129_to_240_bytes() {
+ test_129_to_240_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) {
+ let lower_boundary = bytes![129, 130, 131];
+ let upper_boundary = bytes![238, 239, 240];
+
+ let inputs = lower_boundary.iter().chain(upper_boundary);
+
+ let expected = [
+ // lower_boundary
+ 0xec76_42b4_31ba_3e5a,
+ 0x4d32_24b1_0090_8a87,
+ 0xe57f_7ea6_741f_e3a0,
+ // upper_boundary
+ 0x3044_9a0b_4899_dee9,
+ 0x972b_14e3_c46f_214b,
+ 0x375a_384d_957f_e865,
+ ];
+
+ for (input, expected) in inputs.zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_241_plus_bytes() {
+ test_241_plus_bytes(Hasher::oneshot)
+ }
+
+ #[test]
+ fn streaming_241_plus_bytes() {
+ test_241_plus_bytes(hash_byte_by_byte)
+ }
+
+ #[track_caller]
+ fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) {
+ let inputs = bytes![241, 242, 243, 244, 1024, 10240];
+
+ let expected = [
+ 0x02e8_cd95_421c_6d02,
+ 0xddcb_33c4_9405_1832,
+ 0x8835_f952_9193_e3dc,
+ 0xbc17_c91e_c3cf_8d7f,
+ 0xe5d7_8baf_a45b_2aa5,
+ 0xbcd6_3266_df6e_2244,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+
+ #[test]
+ fn oneshot_with_seed() {
+ test_with_seed(Hasher::oneshot_with_seed)
+ }
+
+ #[test]
+ fn streaming_with_seed() {
+ test_with_seed(hash_byte_by_byte_with_seed)
+ }
+
+ #[track_caller]
+ fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) {
+ let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024];
+
+ let expected = [
+ 0x4aed_e683_89c0_e311,
+ 0x78fc_079a_75aa_f3c0,
+ 0x1b73_06b8_9f25_4507,
+ 0x7df7_627f_d1f9_39b6,
+ 0x49ca_0fff_0950_1622,
+ 0x2bfd_caec_30ff_3000,
+ 0xf984_56bc_25be_0901,
+ 0x2483_9f0f_cdf4_d078,
+ ];
+
+ for (input, expected) in inputs.iter().zip(expected) {
+ let hash = f(0xdead_cafe, input);
+ assert_eq!(hash, expected, "input was {} bytes", input.len());
+ }
+ }
+}
diff --git a/crates/twox-hash/src/xxhash64.rs b/crates/twox-hash/src/xxhash64.rs
new file mode 100644
index 0000000..d72d57d
--- /dev/null
+++ b/crates/twox-hash/src/xxhash64.rs
@@ -0,0 +1,687 @@
+//! The implementation of XXH64.
+
+use core::{
+ fmt,
+ hash::{self, BuildHasher},
+ mem,
+};
+
+use crate::IntoU64;
+
+// Keeping these constants in this form to match the C code.
+const PRIME64_1: u64 = 0x9E3779B185EBCA87;
+const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F;
+const PRIME64_3: u64 = 0x165667B19E3779F9;
+const PRIME64_4: u64 = 0x85EBCA77C2B2AE63;
+const PRIME64_5: u64 = 0x27D4EB2F165667C5;
+
+type Lane = u64;
+type Lanes = [Lane; 4];
+type Bytes = [u8; 32];
+
+const BYTES_IN_LANE: usize = mem::size_of::<Bytes>();
+
+#[derive(Clone, PartialEq)]
+struct BufferData(Lanes);
+
+impl BufferData {
+ const fn new() -> Self {
+ Self([0; 4])
+ }
+
+ const fn bytes(&self) -> &Bytes {
+ const _: () = assert!(mem::align_of::<u8>() <= mem::align_of::<Lane>());
+ // SAFETY[bytes]: The alignment of `u64` is at least that of
+ // `u8` and all the values are initialized.
+ unsafe { &*self.0.as_ptr().cast() }
+ }
+
+ fn bytes_mut(&mut self) -> &mut Bytes {
+ // SAFETY: See SAFETY[bytes]
+ unsafe { &mut *self.0.as_mut_ptr().cast() }
+ }
+}
+
+impl fmt::Debug for BufferData {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_list().entries(self.0.iter()).finish()
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+struct Buffer {
+ offset: usize,
+ data: BufferData,
+}
+
+impl Buffer {
+ const fn new() -> Self {
+ Self {
+ offset: 0,
+ data: BufferData::new(),
+ }
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) {
+ // Most of the slice methods we use here have `_unchecked` variants, but
+ //
+ // 1. this method is called one time per `Hasher::write` call
+ // 2. this method early exits if we don't have anything in the buffer
+ //
+ // Because of this, removing the panics via `unsafe` doesn't
+ // have much benefit other than reducing code size by a tiny
+ // bit.
+
+ if self.offset == 0 {
+ return (None, data);
+ };
+
+ let bytes = self.data.bytes_mut();
+ debug_assert!(self.offset <= bytes.len());
+
+ let empty = &mut bytes[self.offset..];
+ let n_to_copy = usize::min(empty.len(), data.len());
+
+ let dst = &mut empty[..n_to_copy];
+
+ let (src, rest) = data.split_at(n_to_copy);
+
+ dst.copy_from_slice(src);
+ self.offset += n_to_copy;
+
+ debug_assert!(self.offset <= bytes.len());
+
+ if self.offset == bytes.len() {
+ self.offset = 0;
+ (Some(&self.data.0), rest)
+ } else {
+ (None, rest)
+ }
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn set(&mut self, data: &[u8]) {
+ if data.is_empty() {
+ return;
+ }
+
+ debug_assert_eq!(self.offset, 0);
+
+ let n_to_copy = data.len();
+
+ let bytes = self.data.bytes_mut();
+ debug_assert!(n_to_copy < bytes.len());
+
+ bytes[..n_to_copy].copy_from_slice(data);
+ self.offset = data.len();
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn remaining(&self) -> &[u8] {
+ &self.data.bytes()[..self.offset]
+ }
+}
+
+#[derive(Clone, PartialEq)]
+struct Accumulators(Lanes);
+
+impl Accumulators {
+ const fn new(seed: u64) -> Self {
+ Self([
+ seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2),
+ seed.wrapping_add(PRIME64_2),
+ seed,
+ seed.wrapping_sub(PRIME64_1),
+ ])
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn write(&mut self, lanes: Lanes) {
+ let [acc1, acc2, acc3, acc4] = &mut self.0;
+ let [lane1, lane2, lane3, lane4] = lanes;
+
+ *acc1 = round(*acc1, lane1.to_le());
+ *acc2 = round(*acc2, lane2.to_le());
+ *acc3 = round(*acc3, lane3.to_le());
+ *acc4 = round(*acc4, lane4.to_le());
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] {
+ while let Some((chunk, rest)) = data.split_first_chunk::<BYTES_IN_LANE>() {
+ // SAFETY: We have the right number of bytes and are
+ // handling the unaligned case.
+ let lanes = unsafe { chunk.as_ptr().cast::<Lanes>().read_unaligned() };
+ self.write(lanes);
+ data = rest;
+ }
+ data
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ const fn finish(&self) -> u64 {
+ let [acc1, acc2, acc3, acc4] = self.0;
+
+ let mut acc = {
+ let acc1 = acc1.rotate_left(1);
+ let acc2 = acc2.rotate_left(7);
+ let acc3 = acc3.rotate_left(12);
+ let acc4 = acc4.rotate_left(18);
+
+ acc1.wrapping_add(acc2)
+ .wrapping_add(acc3)
+ .wrapping_add(acc4)
+ };
+
+ acc = Self::merge_accumulator(acc, acc1);
+ acc = Self::merge_accumulator(acc, acc2);
+ acc = Self::merge_accumulator(acc, acc3);
+ acc = Self::merge_accumulator(acc, acc4);
+
+ acc
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 {
+ acc ^= round(0, acc_n);
+ acc = acc.wrapping_mul(PRIME64_1);
+ acc.wrapping_add(PRIME64_4)
+ }
+}
+
+impl fmt::Debug for Accumulators {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let [acc1, acc2, acc3, acc4] = self.0;
+ f.debug_struct("Accumulators")
+ .field("acc1", &acc1)
+ .field("acc2", &acc2)
+ .field("acc3", &acc3)
+ .field("acc4", &acc4)
+ .finish()
+ }
+}
+
+/// Calculates the 64-bit hash.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Hasher {
+ seed: u64,
+ accumulators: Accumulators,
+ buffer: Buffer,
+ length: u64,
+}
+
+impl Default for Hasher {
+ fn default() -> Self {
+ Self::with_seed(0)
+ }
+}
+
+impl Hasher {
+ /// Hash all data at once. If you can use this function, you may
+ /// see noticable speed gains for certain types of input.
+ #[must_use]
+ // RATIONALE[inline]:
+ //
+ // These `inline`s help unlock a speedup in one benchmark [1] from
+ // ~900µs to ~200µs.
+ //
+ // Further inspection of the disassembly showed that various
+ // helper functions were not being inlined. Avoiding these few
+ // function calls wins us the tiniest performance increase, just
+ // enough so that we are neck-and-neck with (or slightly faster
+ // than!) the C code.
+ //
+ // This results in the entire hash computation being inlined at
+ // the call site.
+ //
+ // [1]: https://github.com/apache/datafusion-comet/pull/575
+ #[inline]
+ pub fn oneshot(seed: u64, data: &[u8]) -> u64 {
+ let len = data.len();
+
+ // Since we know that there's no more data coming, we don't
+ // need to construct the intermediate buffers or copy data to
+ // or from the buffers.
+
+ let mut accumulators = Accumulators::new(seed);
+
+ let data = accumulators.write_many(data);
+
+ Self::finish_with(seed, len.into_u64(), &accumulators, data)
+ }
+
+ /// Constructs the hasher with an initial seed.
+ #[must_use]
+ pub const fn with_seed(seed: u64) -> Self {
+ // Step 1. Initialize internal accumulators
+ Self {
+ seed,
+ accumulators: Accumulators::new(seed),
+ buffer: Buffer::new(),
+ length: 0,
+ }
+ }
+
+ /// The seed this hasher was created with.
+ pub const fn seed(&self) -> u64 {
+ self.seed
+ }
+
+ /// The total number of bytes hashed.
+ pub const fn total_len(&self) -> u64 {
+ self.length
+ }
+
+ #[must_use]
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 {
+ // Step 3. Accumulator convergence
+ let mut acc = if len < BYTES_IN_LANE.into_u64() {
+ seed.wrapping_add(PRIME64_5)
+ } else {
+ accumulators.finish()
+ };
+
+ // Step 4. Add input length
+ acc += len;
+
+ // Step 5. Consume remaining input
+ while let Some((chunk, rest)) = remaining.split_first_chunk() {
+ let lane = u64::from_ne_bytes(*chunk).to_le();
+
+ acc ^= round(0, lane);
+ acc = acc.rotate_left(27).wrapping_mul(PRIME64_1);
+ acc = acc.wrapping_add(PRIME64_4);
+ remaining = rest;
+ }
+
+ while let Some((chunk, rest)) = remaining.split_first_chunk() {
+ let lane = u32::from_ne_bytes(*chunk).to_le().into_u64();
+
+ acc ^= lane.wrapping_mul(PRIME64_1);
+ acc = acc.rotate_left(23).wrapping_mul(PRIME64_2);
+ acc = acc.wrapping_add(PRIME64_3);
+
+ remaining = rest;
+ }
+
+ for &byte in remaining {
+ let lane = byte.into_u64();
+
+ acc ^= lane.wrapping_mul(PRIME64_5);
+ acc = acc.rotate_left(11).wrapping_mul(PRIME64_1);
+ }
+
+ // Step 6. Final mix (avalanche)
+ acc ^= acc >> 33;
+ acc = acc.wrapping_mul(PRIME64_2);
+ acc ^= acc >> 29;
+ acc = acc.wrapping_mul(PRIME64_3);
+ acc ^= acc >> 32;
+
+ acc
+ }
+}
+
+impl hash::Hasher for Hasher {
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn write(&mut self, data: &[u8]) {
+ let len = data.len();
+
+ // Step 2. Process stripes
+ let (buffered_lanes, data) = self.buffer.extend(data);
+
+ if let Some(&lanes) = buffered_lanes {
+ self.accumulators.write(lanes);
+ }
+
+ let data = self.accumulators.write_many(data);
+
+ self.buffer.set(data);
+
+ self.length += len.into_u64();
+ }
+
+ // RATIONALE: See RATIONALE[inline]
+ #[inline]
+ fn finish(&self) -> u64 {
+ Self::finish_with(
+ self.seed,
+ self.length,
+ &self.accumulators,
+ self.buffer.remaining(),
+ )
+ }
+}
+
+// RATIONALE: See RATIONALE[inline]
+#[inline]
+const fn round(mut acc: u64, lane: u64) -> u64 {
+ acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2));
+ acc = acc.rotate_left(31);
+ acc.wrapping_mul(PRIME64_1)
+}
+
+/// Constructs [`Hasher`][] for multiple hasher instances.
+#[derive(Clone)]
+pub struct State(u64);
+
+impl State {
+ /// Constructs the hasher with an initial seed.
+ pub fn with_seed(seed: u64) -> Self {
+ Self(seed)
+ }
+}
+
+impl BuildHasher for State {
+ type Hasher = Hasher;
+
+ fn build_hasher(&self) -> Self::Hasher {
+ Hasher::with_seed(self.0)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use core::{
+ array,
+ hash::{BuildHasherDefault, Hasher as _},
+ };
+ use std::collections::HashMap;
+
+ use super::*;
+
+ const _TRAITS: () = {
+ const fn is_clone<T: Clone>() {}
+ is_clone::<Hasher>();
+ is_clone::<State>();
+ };
+
+ const EMPTY_BYTES: [u8; 0] = [];
+
+ #[test]
+ fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
+ let bytes = [0x9c; 32];
+
+ let mut byte_by_byte = Hasher::with_seed(0);
+ for byte in bytes.chunks(1) {
+ byte_by_byte.write(byte);
+ }
+ let byte_by_byte = byte_by_byte.finish();
+
+ let mut one_chunk = Hasher::with_seed(0);
+ one_chunk.write(&bytes);
+ let one_chunk = one_chunk.finish();
+
+ assert_eq!(byte_by_byte, one_chunk);
+ }
+
+ #[test]
+ fn hash_of_nothing_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(&EMPTY_BYTES);
+ assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
+ }
+
+ #[test]
+ fn hash_of_single_byte_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(&[42]);
+ assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
+ }
+
+ #[test]
+ fn hash_of_multiple_bytes_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(b"Hello, world!\0");
+ assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
+ }
+
+ #[test]
+ fn hash_of_multiple_chunks_matches_c_implementation() {
+ let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(&bytes);
+ assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
+ }
+
+ #[test]
+ fn hash_with_different_seed_matches_c_implementation() {
+ let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
+ hasher.write(&EMPTY_BYTES);
+ assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
+ }
+
+ #[test]
+ fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
+ let bytes: [u8; 100] = array::from_fn(|i| i as u8);
+ let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91);
+ hasher.write(&bytes);
+ assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
+ }
+
+ #[test]
+ fn hashes_with_different_offsets_are_the_same() {
+ let bytes = [0x7c; 4096];
+ let expected = Hasher::oneshot(0, &[0x7c; 64]);
+
+ let the_same = bytes
+ .windows(64)
+ .map(|w| {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(w);
+ hasher.finish()
+ })
+ .all(|h| h == expected);
+ assert!(the_same);
+ }
+
+ #[test]
+ fn can_be_used_in_a_hashmap_with_a_default_seed() {
+ let mut hash: HashMap<_, _, BuildHasherDefault<Hasher>> = Default::default();
+ hash.insert(42, "the answer");
+ assert_eq!(hash.get(&42), Some(&"the answer"));
+ }
+}
+
+#[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
+mod random_impl {
+ use super::*;
+
+ /// Constructs a randomized seed and reuses it for multiple hasher
+ /// instances.
+ #[derive(Clone)]
+ pub struct RandomState(State);
+
+ impl Default for RandomState {
+ fn default() -> Self {
+ Self::new()
+ }
+ }
+
+ impl RandomState {
+ fn new() -> Self {
+ Self(State::with_seed(rand::random()))
+ }
+ }
+
+ impl BuildHasher for RandomState {
+ type Hasher = Hasher;
+
+ fn build_hasher(&self) -> Self::Hasher {
+ self.0.build_hasher()
+ }
+ }
+
+ #[cfg(test)]
+ mod test {
+ use std::collections::HashMap;
+
+ use super::*;
+
+ const _TRAITS: () = {
+ const fn is_clone<T: Clone>() {}
+ is_clone::<RandomState>();
+ };
+
+ #[test]
+ fn can_be_used_in_a_hashmap_with_a_random_seed() {
+ let mut hash: HashMap<_, _, RandomState> = Default::default();
+ hash.insert(42, "the answer");
+ assert_eq!(hash.get(&42), Some(&"the answer"));
+ }
+ }
+}
+
+#[cfg(feature = "random")]
+#[cfg_attr(docsrs, doc(cfg(feature = "random")))]
+pub use random_impl::*;
+
+#[cfg(feature = "serialize")]
+#[cfg_attr(docsrs, doc(cfg(feature = "serialize")))]
+mod serialize_impl {
+ use serde::{Deserialize, Serialize};
+
+ use super::*;
+
+ impl<'de> Deserialize<'de> for Hasher {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ let shim = Deserialize::deserialize(deserializer)?;
+
+ let Shim {
+ total_len,
+ seed,
+ core,
+ buffer,
+ buffer_usage,
+ } = shim;
+ let Core { v1, v2, v3, v4 } = core;
+
+ let mut buffer_data = BufferData::new();
+ buffer_data.bytes_mut().copy_from_slice(&buffer);
+
+ Ok(Hasher {
+ seed,
+ accumulators: Accumulators([v1, v2, v3, v4]),
+ buffer: Buffer {
+ offset: buffer_usage,
+ data: buffer_data,
+ },
+ length: total_len,
+ })
+ }
+ }
+
+ impl Serialize for Hasher {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ let Hasher {
+ seed,
+ ref accumulators,
+ ref buffer,
+ length,
+ } = *self;
+ let [v1, v2, v3, v4] = accumulators.0;
+ let Buffer { offset, ref data } = *buffer;
+ let buffer = *data.bytes();
+
+ let shim = Shim {
+ total_len: length,
+ seed,
+ core: Core { v1, v2, v3, v4 },
+ buffer,
+ buffer_usage: offset,
+ };
+
+ shim.serialize(serializer)
+ }
+ }
+
+ #[derive(Serialize, Deserialize)]
+ struct Shim {
+ total_len: u64,
+ seed: u64,
+ core: Core,
+ buffer: [u8; 32],
+ buffer_usage: usize,
+ }
+
+ #[derive(Serialize, Deserialize)]
+ struct Core {
+ v1: u64,
+ v2: u64,
+ v3: u64,
+ v4: u64,
+ }
+
+ #[cfg(test)]
+ mod test {
+ use std::hash::Hasher as _;
+
+ use super::*;
+
+ type Result<T = (), E = serde_json::Error> = core::result::Result<T, E>;
+
+ #[test]
+ fn test_serialization_cycle() -> Result {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(b"Hello, world!\0");
+ let _ = hasher.finish();
+
+ let serialized = serde_json::to_string(&hasher)?;
+ let unserialized: Hasher = serde_json::from_str(&serialized)?;
+ assert_eq!(hasher, unserialized);
+ Ok(())
+ }
+
+ #[test]
+ fn test_serialization_stability() -> Result {
+ let mut hasher = Hasher::with_seed(0);
+ hasher.write(b"Hello, world!\0");
+ let _ = hasher.finish();
+
+ let expected_serialized = r#"{
+ "total_len": 14,
+ "seed": 0,
+ "core": {
+ "v1": 6983438078262162902,
+ "v2": 14029467366897019727,
+ "v3": 0,
+ "v4": 7046029288634856825
+ },
+ "buffer": [
+ 72, 101, 108, 108, 111, 44, 32, 119,
+ 111, 114, 108, 100, 33, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+ ],
+ "buffer_usage": 14
+ }"#;
+
+ let unserialized: Hasher = serde_json::from_str(expected_serialized)?;
+ assert_eq!(hasher, unserialized);
+
+ let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?;
+ let actual_value = serde_json::to_value(&hasher)?;
+ assert_eq!(expected_value, actual_value);
+
+ Ok(())
+ }
+ }
+}
diff --git a/pseudo_crate/Cargo.lock b/pseudo_crate/Cargo.lock
index 6864911..aae3bd7 100644
--- a/pseudo_crate/Cargo.lock
+++ b/pseudo_crate/Cargo.lock
@@ -469,7 +469,7 @@
"tracing-subscriber",
"try-lock",
"tungstenite",
- "twox-hash",
+ "twox-hash 2.1.0",
"ucd-trie",
"ucs2",
"uefi",
@@ -3751,7 +3751,7 @@
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
dependencies = [
- "twox-hash",
+ "twox-hash 1.6.3",
]
[[package]]
@@ -6363,11 +6363,19 @@
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
- "rand",
"static_assertions",
]
[[package]]
+name = "twox-hash"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908"
+dependencies = [
+ "rand",
+]
+
+[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/pseudo_crate/Cargo.toml b/pseudo_crate/Cargo.toml
index 0faa577..497d7a2 100644
--- a/pseudo_crate/Cargo.toml
+++ b/pseudo_crate/Cargo.toml
@@ -378,7 +378,7 @@
tracing-subscriber = "=0.3.19"
try-lock = "=0.2.5"
tungstenite = "=0.24.0"
-twox-hash = "=1.6.3"
+twox-hash = "=2.1.0"
ucd-trie = "=0.1.7"
ucs2 = "=0.3.3"
uefi = "=0.33.0"