[zlib] Only create needed vectors for crc_fold_512

The original x86 specific folding CRC code contributed back in 2015
used a macro pair (CRC_LOAD/CRC_SAVE) to load and store data vectors that is
reused along the code.

Modern compilers (e.g. VS2022) will complain about unused code past the return
in crc_fold_512to32 which only needs the first 4 sets of vectors
(xmm_crc0 .. xmm_crc3) and this patch fixes the issue.

For reference, see warning C4702:
https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4702?view=msvc-170

Bug: 1426252
Change-Id: I9636a7705f7f7c869b5bcfc4f7d35795614c7d23
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4409500
Reviewed-by: Hans Wennborg <hans@chromium.org>
Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
Reviewed-by: Chris Blume <cblume@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1128743}
NOKEYCHECK=True
GitOrigin-RevId: 955a57f0b03d7a2b6266d535f9e6ba03ba26a340
diff --git a/crc_folding.c b/crc_folding.c
index ee31d49..1b4f4e1 100644
--- a/crc_folding.c
+++ b/crc_folding.c
@@ -435,7 +435,10 @@
     unsigned crc;
     __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
 
-    CRC_LOAD(s)
+    __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
+    __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
+    __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
+    __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
 
     /*
      * k1
@@ -491,7 +494,6 @@
 
     crc = _mm_extract_epi32(xmm_crc3, 2);
     return ~crc;
-    CRC_SAVE(s)
 }
 
 #endif  /* CRC32_SIMD_SSE42_PCLMUL */