Optimize `InitControlBytesAfterSoo` to have less writes and make them with compile time known size.

X86:
```
name                                                                           old CYCLES/op        new CYCLES/op        delta
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1           14.8 ± 0%            14.8 ± 0%  -0.05%  (p=0.001 n=49+50)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2           49.2 ± 0%            44.7 ± 0%  -9.19%  (p=0.000 n=54+50)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4           59.3 ± 0%            57.2 ± 0%  -3.61%  (p=0.000 n=54+53)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8           56.1 ± 1%            55.1 ± 1%  -1.86%  (p=0.000 n=54+54)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16          61.8 ± 1%            61.2 ± 1%  -0.98%  (p=0.000 n=56+55)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32          60.5 ± 1%            60.2 ± 1%  -0.41%  (p=0.000 n=42+55)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64          57.7 ± 2%            57.5 ± 2%  -0.32%  (p=0.031 n=55+56)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128         55.2 ± 1%            55.2 ± 1%    ~     (p=0.493 n=52+51)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:256         52.9 ± 1%            53.0 ± 1%    ~     (p=0.877 n=45+49)
```

ARM (altra,arch=aarch64)
```
name                                                                           old CYCLES/op        new CYCLES/op        delta
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1           16.9 ± 3%            16.8 ± 3%    ~     (p=0.107 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2           55.9 ± 1%            51.6 ±11%  -7.62%  (p=0.000 n=46+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4           61.6 ± 0%            59.9 ± 7%  -2.77%  (p=0.000 n=33+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8           61.9 ± 0%            61.0 ± 4%  -1.47%  (p=0.000 n=33+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16          58.0 ± 2%            57.7 ± 3%    ~     (p=0.073 n=40+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32          54.4 ± 3%            54.2 ± 3%    ~     (p=0.138 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64          51.3 ± 2%            51.1 ± 2%    ~     (p=0.110 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128         48.8 ± 2%            48.7 ± 2%    ~     (p=0.163 n=57+57)
```

PiperOrigin-RevId: 720984174
Change-Id: I3297a89e678421e4e784af6b5f66b862fd1aae05
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc
index 8911aa3..c815afd 100644
--- a/absl/container/internal/raw_hash_set.cc
+++ b/absl/container/internal/raw_hash_set.cc
@@ -481,13 +481,44 @@
 void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
                                                    size_t new_capacity) {
   assert(is_single_group(new_capacity));
-  std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
-              NumControlBytes(new_capacity));
-  assert(HashSetResizeHelper::SooSlotIndex() == 1);
+  static_assert(HashSetResizeHelper::SooSlotIndex() == 1, "");
   // This allows us to avoid branching on had_soo_slot_.
   assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
-  new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
-  new_ctrl[new_capacity] = ctrl_t::kSentinel;
+
+  if (Group::kWidth == 16) {
+    // Initialize the second 8 bytes in the original and mirrored control bytes.
+    // The ranges can overlap.
+    absl::little_endian::Store64(new_ctrl + 8, kMsbs8Bytes);
+    absl::little_endian::Store64(new_ctrl + new_capacity + 8, kMsbs8Bytes);
+  }
+  static constexpr uint64_t kAllEmptyExceptSoo =
+      kMsbs8Bytes ^ (static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kEmpty))
+                     << (8 * HashSetResizeHelper::SooSlotIndex()));
+  // Initialize the first 8 bytes in the original control bytes.
+  // The first 8 bytes are all empty except the SOO slot.
+  // The range may overlap with the mirrored control bytes. These bytes will be
+  // overwritten later.
+  uint64_t first_ctrl_bytes =
+      kAllEmptyExceptSoo ^ (static_cast<uint64_t>(static_cast<uint8_t>(h2))
+                            << (8 * HashSetResizeHelper::SooSlotIndex()));
+  absl::little_endian::Store64(new_ctrl, first_ctrl_bytes);
+  // Initialize Sentinel byte and the first 7 bytes in the mirrored control
+  // bytes.
+  // We are adding kSentinel as the first byte of the mirrored control bytes.
+  uint64_t mirrored_ctrl_bytes =
+      (first_ctrl_bytes << 8) ^
+      static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kSentinel));
+  absl::little_endian::Store64(new_ctrl + new_capacity, mirrored_ctrl_bytes);
+
+  // Example for capacity 3:
+  // new_ctrl after 2 stores  =       ????????EEEEEEEEEEE
+  // new_ctrl after 3rd store  =      E0EEEEEEEEEEEEEEEEE
+  // new_ctrl after 4th store  =      E0ESE0EEEEEEEEEEEEE
+
+  // Example for capacity 15:
+  // new_ctrl after 2 stores  =       ????????EEEEEEEE???????EEEEEEEE
+  // new_ctrl after 3rd store  =      E0EEEEEEEEEEEEEE???????EEEEEEEE
+  // new_ctrl after 4th store  =      E0EEEEEEEEEEEEESE0EEEEEEEEEEEEE
 }
 
 void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index 79ccb59..2462ed2 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -2033,7 +2033,7 @@
   // index 1 so that when resizing from capacity 1 to 3, we can still have
   // random iteration order between the first two inserted elements.
   // I.e. it allows inserting the second element at either index 0 or 2.
-  static size_t SooSlotIndex() { return 1; }
+  static constexpr size_t SooSlotIndex() { return 1; }
 
   // Allocates a backing array for the hashtable.
   // Reads `capacity` and updates all other fields based on the result of