Optimize `InitControlBytesAfterSoo` to have less writes and make them with compile time known size.
X86:
```
name old CYCLES/op new CYCLES/op delta
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 14.8 ± 0% 14.8 ± 0% -0.05% (p=0.001 n=49+50)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 49.2 ± 0% 44.7 ± 0% -9.19% (p=0.000 n=54+50)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4 59.3 ± 0% 57.2 ± 0% -3.61% (p=0.000 n=54+53)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8 56.1 ± 1% 55.1 ± 1% -1.86% (p=0.000 n=54+54)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16 61.8 ± 1% 61.2 ± 1% -0.98% (p=0.000 n=56+55)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32 60.5 ± 1% 60.2 ± 1% -0.41% (p=0.000 n=42+55)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64 57.7 ± 2% 57.5 ± 2% -0.32% (p=0.031 n=55+56)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128 55.2 ± 1% 55.2 ± 1% ~ (p=0.493 n=52+51)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:256 52.9 ± 1% 53.0 ± 1% ~ (p=0.877 n=45+49)
```
ARM (altra,arch=aarch64)
```
name old CYCLES/op new CYCLES/op delta
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 16.9 ± 3% 16.8 ± 3% ~ (p=0.107 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 55.9 ± 1% 51.6 ±11% -7.62% (p=0.000 n=46+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4 61.6 ± 0% 59.9 ± 7% -2.77% (p=0.000 n=33+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8 61.9 ± 0% 61.0 ± 4% -1.47% (p=0.000 n=33+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16 58.0 ± 2% 57.7 ± 3% ~ (p=0.073 n=40+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32 54.4 ± 3% 54.2 ± 3% ~ (p=0.138 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64 51.3 ± 2% 51.1 ± 2% ~ (p=0.110 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128 48.8 ± 2% 48.7 ± 2% ~ (p=0.163 n=57+57)
```
PiperOrigin-RevId: 720984174
Change-Id: I3297a89e678421e4e784af6b5f66b862fd1aae05
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc
index 8911aa3..c815afd 100644
--- a/absl/container/internal/raw_hash_set.cc
+++ b/absl/container/internal/raw_hash_set.cc
@@ -481,13 +481,44 @@
void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
size_t new_capacity) {
assert(is_single_group(new_capacity));
- std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
- NumControlBytes(new_capacity));
- assert(HashSetResizeHelper::SooSlotIndex() == 1);
+ static_assert(HashSetResizeHelper::SooSlotIndex() == 1, "");
// This allows us to avoid branching on had_soo_slot_.
assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
- new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
- new_ctrl[new_capacity] = ctrl_t::kSentinel;
+
+ if (Group::kWidth == 16) {
+ // Initialize the second 8 bytes in the original and mirrored control bytes.
+ // The ranges can overlap.
+ absl::little_endian::Store64(new_ctrl + 8, kMsbs8Bytes);
+ absl::little_endian::Store64(new_ctrl + new_capacity + 8, kMsbs8Bytes);
+ }
+ static constexpr uint64_t kAllEmptyExceptSoo =
+ kMsbs8Bytes ^ (static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kEmpty))
+ << (8 * HashSetResizeHelper::SooSlotIndex()));
+ // Initialize the first 8 bytes in the original control bytes.
+ // The first 8 bytes are all empty except the SOO slot.
+ // The range may overlap with the mirrored control bytes. These bytes will be
+ // overwritten later.
+ uint64_t first_ctrl_bytes =
+ kAllEmptyExceptSoo ^ (static_cast<uint64_t>(static_cast<uint8_t>(h2))
+ << (8 * HashSetResizeHelper::SooSlotIndex()));
+ absl::little_endian::Store64(new_ctrl, first_ctrl_bytes);
+ // Initialize Sentinel byte and the first 7 bytes in the mirrored control
+ // bytes.
+ // We are adding kSentinel as the first byte of the mirrored control bytes.
+ uint64_t mirrored_ctrl_bytes =
+ (first_ctrl_bytes << 8) ^
+ static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kSentinel));
+ absl::little_endian::Store64(new_ctrl + new_capacity, mirrored_ctrl_bytes);
+
+ // Example for capacity 3:
+ // new_ctrl after 2 stores = ????????EEEEEEEEEEE
+ // new_ctrl after 3rd store = E0EEEEEEEEEEEEEEEEE
+ // new_ctrl after 4th store = E0ESE0EEEEEEEEEEEEE
+
+ // Example for capacity 15:
+ // new_ctrl after 2 stores = ????????EEEEEEEE???????EEEEEEEE
+ // new_ctrl after 3rd store = E0EEEEEEEEEEEEEE???????EEEEEEEE
+ // new_ctrl after 4th store = E0EEEEEEEEEEEEESE0EEEEEEEEEEEEE
}
void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 79ccb59..2462ed2 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -2033,7 +2033,7 @@
// index 1 so that when resizing from capacity 1 to 3, we can still have
// random iteration order between the first two inserted elements.
// I.e. it allows inserting the second element at either index 0 or 2.
- static size_t SooSlotIndex() { return 1; }
+ static constexpr size_t SooSlotIndex() { return 1; }
// Allocates a backing array for the hashtable.
// Reads `capacity` and updates all other fields based on the result of