Fix incorrect CRC returned by AcceleratedCrcMemcpyEngine when kRegions == 1

This bug does not affect any users currently since AcceleratedCrcMemcpyEngine
is never configured with a single region currently.

Before this CL, if the number of regions for the AcceleratedCrcMemcpyEngine was
set to one, the CRC for the sole region would be incorrectly concatenated onto
itself and corrupted.

PiperOrigin-RevId: 561663848
Change-Id: Ibfc596306ab07db906d2e3ecf6eea3f6cb9f1b2b
diff --git a/absl/crc/internal/crc_memcpy_test.cc b/absl/crc/internal/crc_memcpy_test.cc
index bbdcd20..a4c6a01 100644
--- a/absl/crc/internal/crc_memcpy_test.cc
+++ b/absl/crc/internal/crc_memcpy_test.cc
@@ -107,6 +107,9 @@
         *(base_data + i) =
             static_cast<char>(absl::Uniform<unsigned char>(gen_));
       }
+      SCOPED_TRACE(absl::StrCat("engine=<", GetParam().vector_lanes, ",",
+                                GetParam().integer_lanes, ">, ", "size=", size,
+                                ", source_alignment=", source_alignment));
       absl::crc32c_t initial_crc =
           absl::crc32c_t{absl::Uniform<uint32_t>(gen_)};
       absl::crc32c_t experiment_crc =
@@ -138,6 +141,9 @@
         *(base_data + i) =
             static_cast<char>(absl::Uniform<unsigned char>(gen_));
       }
+      SCOPED_TRACE(absl::StrCat("engine=<", GetParam().vector_lanes, ",",
+                                GetParam().integer_lanes, ">, ", "size=", size,
+                                ", destination_alignment=", dest_alignment));
       absl::crc32c_t initial_crc =
           absl::crc32c_t{absl::Uniform<uint32_t>(gen_)};
       absl::crc32c_t experiment_crc =
@@ -161,6 +167,7 @@
                          ::testing::Values(
                              // Tests for configurations that may occur in prod.
                              TestParams{X86, 3, 0}, TestParams{X86, 1, 2},
+                             TestParams{X86, 1, 0},
                              // Fallback test.
                              TestParams{FALLBACK, 0, 0},
                              // Non Temporal
diff --git a/absl/crc/internal/crc_memcpy_x86_64.cc b/absl/crc/internal/crc_memcpy_x86_64.cc
index c984cf9..c4ccd47 100644
--- a/absl/crc/internal/crc_memcpy_x86_64.cc
+++ b/absl/crc/internal/crc_memcpy_x86_64.cc
@@ -159,6 +159,7 @@
     void* __restrict dst, const void* __restrict src, std::size_t length,
     crc32c_t initial_crc) const {
   constexpr std::size_t kRegions = vec_regions + int_regions;
+  static_assert(kRegions > 0, "Must specify at least one region.");
   constexpr uint32_t kCrcDataXor = uint32_t{0xffffffff};
   constexpr std::size_t kBlockSize = sizeof(__m128i);
   constexpr std::size_t kCopyRoundSize = kRegions * kBlockSize;
@@ -314,6 +315,21 @@
   src_bytes += region_size * (kRegions - 1);
   dst_bytes += region_size * (kRegions - 1);
 
+  // Copy and CRC the tail through the XMM registers.
+  std::size_t tail_blocks = tail_size / kBlockSize;
+  LargeTailCopy<0, 1>(&crcs[kRegions - 1], &dst_bytes, &src_bytes, 0,
+                      tail_blocks);
+
+  // Final tail copy for under 16 bytes.
+  crcs[kRegions - 1] =
+      ShortCrcCopy(dst_bytes, src_bytes, tail_size - tail_blocks * kBlockSize,
+                   crcs[kRegions - 1]);
+
+  if (kRegions == 1) {
+    // If there is only one region, finalize and return its CRC.
+    return crc32c_t{static_cast<uint32_t>(crcs[0]) ^ kCrcDataXor};
+  }
+
   // Finalize the first CRCs: XOR the internal CRCs by the XOR mask to undo the
   // XOR done before doing block copy + CRCs.
   for (size_t i = 0; i + 1 < kRegions; i++) {
@@ -326,16 +342,6 @@
     full_crc = ConcatCrc32c(full_crc, crcs[i], region_size);
   }
 
-  // Copy and CRC the tail through the XMM registers.
-  std::size_t tail_blocks = tail_size / kBlockSize;
-  LargeTailCopy<0, 1>(&crcs[kRegions - 1], &dst_bytes, &src_bytes, 0,
-                      tail_blocks);
-
-  // Final tail copy for under 16 bytes.
-  crcs[kRegions - 1] =
-      ShortCrcCopy(dst_bytes, src_bytes, tail_size - tail_blocks * kBlockSize,
-                   crcs[kRegions - 1]);
-
   // Finalize and concatenate the final CRC, then return.
   crcs[kRegions - 1] =
       crc32c_t{static_cast<uint32_t>(crcs[kRegions - 1]) ^ kCrcDataXor};
@@ -427,6 +433,8 @@
     return std::make_unique<AcceleratedCrcMemcpyEngine<3, 0>>();
   } else if (vector == 1 && integer == 2) {
     return std::make_unique<AcceleratedCrcMemcpyEngine<1, 2>>();
+  } else if (vector == 1 && integer == 0) {
+    return std::make_unique<AcceleratedCrcMemcpyEngine<1, 0>>();
   }
   return nullptr;
 }