| /* crc32.c -- compute the CRC-32 of a data stream |
| * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler |
| * For conditions of distribution and use, see copyright notice in zlib.h |
| * |
| * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster |
| * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing |
| * tables for updating the shift register in one step with three exclusive-ors |
| * instead of four steps with four exclusive-ors. This results in about a |
| * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. |
| */ |
| |
| /* @(#) $Id$ */ |
| |
| /* |
| Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore |
| protection on the static variables used to control the first-use generation |
| of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should |
| first call get_crc_table() to initialize the tables before allowing more than |
| one thread to use crc32(). |
| |
| DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. |
| */ |
| |
| #ifdef MAKECRCH |
| # include <stdio.h> |
| # ifndef DYNAMIC_CRC_TABLE |
| # define DYNAMIC_CRC_TABLE |
| # endif /* !DYNAMIC_CRC_TABLE */ |
| #endif /* MAKECRCH */ |
| |
| #include "deflate.h" |
| #include "cpu_features.h" |
| #include "zutil.h" /* for STDC and FAR definitions */ |
| |
| #if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32) |
| #include "crc32_simd.h" |
| #endif |
| |
| /* Definitions for doing the crc four data bytes at a time. */ |
| #if !defined(NOBYFOUR) && defined(Z_U4) |
| # define BYFOUR |
| #endif |
| #ifdef BYFOUR |
| local unsigned long crc32_little OF((unsigned long, |
| const unsigned char FAR *, z_size_t)); |
| local unsigned long crc32_big OF((unsigned long, |
| const unsigned char FAR *, z_size_t)); |
| # define TBLS 8 |
| #else |
| # define TBLS 1 |
| #endif /* BYFOUR */ |
| |
| /* Local functions for crc concatenation */ |
| local unsigned long gf2_matrix_times OF((unsigned long *mat, |
| unsigned long vec)); |
| local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); |
| local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); |
| |
| |
| #ifdef DYNAMIC_CRC_TABLE |
| |
| local volatile int crc_table_empty = 1; |
| local z_crc_t FAR crc_table[TBLS][256]; |
| local void make_crc_table OF((void)); |
| #ifdef MAKECRCH |
| local void write_table OF((FILE *, const z_crc_t FAR *)); |
| #endif /* MAKECRCH */ |
| /* |
| Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: |
| x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. |
| |
| Polynomials over GF(2) are represented in binary, one bit per coefficient, |
| with the lowest powers in the most significant bit. Then adding polynomials |
| is just exclusive-or, and multiplying a polynomial by x is a right shift by |
| one. If we call the above polynomial p, and represent a byte as the |
| polynomial q, also with the lowest power in the most significant bit (so the |
| byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, |
| where a mod b means the remainder after dividing a by b. |
| |
| This calculation is done using the shift-register method of multiplying and |
| taking the remainder. The register is initialized to zero, and for each |
| incoming bit, x^32 is added mod p to the register if the bit is a one (where |
| x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by |
| x (which is shifting right by one and adding x^32 mod p if the bit shifted |
| out is a one). We start with the highest power (least significant bit) of |
| q and repeat for all eight bits of q. |
| |
| The first table is simply the CRC of all possible eight bit values. This is |
| all the information needed to generate CRCs on data a byte at a time for all |
| combinations of CRC register values and incoming bytes. The remaining tables |
| allow for word-at-a-time CRC calculation for both big-endian and little- |
| endian machines, where a word is four bytes. |
| */ |
| local void make_crc_table() |
| { |
| z_crc_t c; |
| int n, k; |
| z_crc_t poly; /* polynomial exclusive-or pattern */ |
| /* terms of polynomial defining this crc (except x^32): */ |
| static volatile int first = 1; /* flag to limit concurrent making */ |
| static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; |
| |
| /* See if another task is already doing this (not thread-safe, but better |
| than nothing -- significantly reduces duration of vulnerability in |
| case the advice about DYNAMIC_CRC_TABLE is ignored) */ |
| if (first) { |
| first = 0; |
| |
| /* make exclusive-or pattern from polynomial (0xedb88320UL) */ |
| poly = 0; |
| for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) |
| poly |= (z_crc_t)1 << (31 - p[n]); |
| |
| /* generate a crc for every 8-bit value */ |
| for (n = 0; n < 256; n++) { |
| c = (z_crc_t)n; |
| for (k = 0; k < 8; k++) |
| c = c & 1 ? poly ^ (c >> 1) : c >> 1; |
| crc_table[0][n] = c; |
| } |
| |
| #ifdef BYFOUR |
| /* generate crc for each value followed by one, two, and three zeros, |
| and then the byte reversal of those as well as the first table */ |
| for (n = 0; n < 256; n++) { |
| c = crc_table[0][n]; |
| crc_table[4][n] = ZSWAP32(c); |
| for (k = 1; k < 4; k++) { |
| c = crc_table[0][c & 0xff] ^ (c >> 8); |
| crc_table[k][n] = c; |
| crc_table[k + 4][n] = ZSWAP32(c); |
| } |
| } |
| #endif /* BYFOUR */ |
| |
| crc_table_empty = 0; |
| } |
| else { /* not first */ |
| /* wait for the other guy to finish (not efficient, but rare) */ |
| while (crc_table_empty) |
| ; |
| } |
| |
| #ifdef MAKECRCH |
| /* write out CRC tables to crc32.h */ |
| { |
| FILE *out; |
| |
| out = fopen("crc32.h", "w"); |
| if (out == NULL) return; |
| fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); |
| fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); |
| fprintf(out, "local const z_crc_t FAR "); |
| fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); |
| write_table(out, crc_table[0]); |
| # ifdef BYFOUR |
| fprintf(out, "#ifdef BYFOUR\n"); |
| for (k = 1; k < 8; k++) { |
| fprintf(out, " },\n {\n"); |
| write_table(out, crc_table[k]); |
| } |
| fprintf(out, "#endif\n"); |
| # endif /* BYFOUR */ |
| fprintf(out, " }\n};\n"); |
| fclose(out); |
| } |
| #endif /* MAKECRCH */ |
| } |
| |
| #ifdef MAKECRCH |
| local void write_table(out, table) |
| FILE *out; |
| const z_crc_t FAR *table; |
| { |
| int n; |
| |
| for (n = 0; n < 256; n++) |
| fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", |
| (unsigned long)(table[n]), |
| n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); |
| } |
| #endif /* MAKECRCH */ |
| |
| #else /* !DYNAMIC_CRC_TABLE */ |
| /* ======================================================================== |
| * Tables of CRC-32s of all single-byte values, made by make_crc_table(). |
| */ |
| #include "crc32.h" |
| #endif /* DYNAMIC_CRC_TABLE */ |
| |
| /* ========================================================================= |
| * This function can be used by asm versions of crc32() |
| */ |
| const z_crc_t FAR * ZEXPORT get_crc_table() |
| { |
| #ifdef DYNAMIC_CRC_TABLE |
| if (crc_table_empty) |
| make_crc_table(); |
| #endif /* DYNAMIC_CRC_TABLE */ |
| return (const z_crc_t FAR *)crc_table; |
| } |
| |
| /* ========================================================================= */ |
| #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) |
| #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 |
| |
| /* ========================================================================= */ |
| unsigned long ZEXPORT crc32_z(crc, buf, len) |
| unsigned long crc; |
| const unsigned char FAR *buf; |
| z_size_t len; |
| { |
| /* |
| * zlib convention is to call crc32(0, NULL, 0); before making |
| * calls to crc32(). So this is a good, early (and infrequent) |
| * place to cache CPU features if needed for those later, more |
| * interesting crc32() calls. |
| */ |
| #if defined(CRC32_SIMD_SSE42_PCLMUL) |
| /* |
| * Use x86 sse4.2+pclmul SIMD to compute the crc32. Since this |
| * routine can be freely used, check CPU features here. |
| */ |
| if (buf == Z_NULL) { |
| if (!len) /* Assume user is calling crc32(0, NULL, 0); */ |
| cpu_check_features(); |
| return 0UL; |
| } |
| |
| if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) { |
| /* crc32 16-byte chunks */ |
| z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK; |
| crc = ~crc32_sse42_simd_(buf, chunk_size, ~(uint32_t)crc); |
| /* check remaining data */ |
| len -= chunk_size; |
| if (!len) |
| return crc; |
| /* Fall into the default crc32 for the remaining data. */ |
| buf += chunk_size; |
| } |
| #else |
| if (buf == Z_NULL) { |
| return 0UL; |
| } |
| #endif /* CRC32_SIMD_SSE42_PCLMUL */ |
| |
| #ifdef DYNAMIC_CRC_TABLE |
| if (crc_table_empty) |
| make_crc_table(); |
| #endif /* DYNAMIC_CRC_TABLE */ |
| |
| #ifdef BYFOUR |
| if (sizeof(void *) == sizeof(ptrdiff_t)) { |
| z_crc_t endian; |
| |
| endian = 1; |
| if (*((unsigned char *)(&endian))) |
| return crc32_little(crc, buf, len); |
| else |
| return crc32_big(crc, buf, len); |
| } |
| #endif /* BYFOUR */ |
| crc = crc ^ 0xffffffffUL; |
| while (len >= 8) { |
| DO8; |
| len -= 8; |
| } |
| if (len) do { |
| DO1; |
| } while (--len); |
| return crc ^ 0xffffffffUL; |
| } |
| |
| /* ========================================================================= */ |
| unsigned long ZEXPORT crc32(crc, buf, len) |
| unsigned long crc; |
| const unsigned char FAR *buf; |
| uInt len; |
| { |
| #if defined(CRC32_ARMV8_CRC32) |
| /* We got to verify ARM CPU features, so exploit the common usage pattern |
| * of calling this function with Z_NULL for an initial valid crc value. |
| * This allows to cache the result of the feature check and avoid extraneous |
| * function calls. |
| * TODO: try to move this to crc32_z if we don't loose performance on ARM. |
| */ |
| if (buf == Z_NULL) { |
| if (!len) /* Assume user is calling crc32(0, NULL, 0); */ |
| cpu_check_features(); |
| return 0UL; |
| } |
| |
| if (arm_cpu_enable_crc32) |
| return armv8_crc32_little(crc, buf, len); |
| #endif |
| return crc32_z(crc, buf, len); |
| } |
| |
| #ifdef BYFOUR |
| |
| /* |
| This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit |
| integer pointer type. This violates the strict aliasing rule, where a |
| compiler can assume, for optimization purposes, that two pointers to |
| fundamentally different types won't ever point to the same memory. This can |
| manifest as a problem only if one of the pointers is written to. This code |
| only reads from those pointers. So long as this code remains isolated in |
| this compilation unit, there won't be a problem. For this reason, this code |
| should not be copied and pasted into a compilation unit in which other code |
| writes to the buffer that is passed to these routines. |
| */ |
| |
| /* ========================================================================= */ |
| #define DOLIT4 c ^= *buf4++; \ |
| c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ |
| crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] |
| #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 |
| |
| /* ========================================================================= */ |
| local unsigned long crc32_little(crc, buf, len) |
| unsigned long crc; |
| const unsigned char FAR *buf; |
| z_size_t len; |
| { |
| register z_crc_t c; |
| register const z_crc_t FAR *buf4; |
| |
| c = (z_crc_t)crc; |
| c = ~c; |
| while (len && ((ptrdiff_t)buf & 3)) { |
| c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); |
| len--; |
| } |
| |
| buf4 = (const z_crc_t FAR *)(const void FAR *)buf; |
| while (len >= 32) { |
| DOLIT32; |
| len -= 32; |
| } |
| while (len >= 4) { |
| DOLIT4; |
| len -= 4; |
| } |
| buf = (const unsigned char FAR *)buf4; |
| |
| if (len) do { |
| c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); |
| } while (--len); |
| c = ~c; |
| return (unsigned long)c; |
| } |
| |
| /* ========================================================================= */ |
| #define DOBIG4 c ^= *buf4++; \ |
| c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ |
| crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] |
| #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 |
| |
| /* ========================================================================= */ |
| local unsigned long crc32_big(crc, buf, len) |
| unsigned long crc; |
| const unsigned char FAR *buf; |
| z_size_t len; |
| { |
| register z_crc_t c; |
| register const z_crc_t FAR *buf4; |
| |
| c = ZSWAP32((z_crc_t)crc); |
| c = ~c; |
| while (len && ((ptrdiff_t)buf & 3)) { |
| c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); |
| len--; |
| } |
| |
| buf4 = (const z_crc_t FAR *)(const void FAR *)buf; |
| while (len >= 32) { |
| DOBIG32; |
| len -= 32; |
| } |
| while (len >= 4) { |
| DOBIG4; |
| len -= 4; |
| } |
| buf = (const unsigned char FAR *)buf4; |
| |
| if (len) do { |
| c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); |
| } while (--len); |
| c = ~c; |
| return (unsigned long)(ZSWAP32(c)); |
| } |
| |
| #endif /* BYFOUR */ |
| |
| #define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ |
| |
| /* ========================================================================= */ |
| local unsigned long gf2_matrix_times(mat, vec) |
| unsigned long *mat; |
| unsigned long vec; |
| { |
| unsigned long sum; |
| |
| sum = 0; |
| while (vec) { |
| if (vec & 1) |
| sum ^= *mat; |
| vec >>= 1; |
| mat++; |
| } |
| return sum; |
| } |
| |
| /* ========================================================================= */ |
| local void gf2_matrix_square(square, mat) |
| unsigned long *square; |
| unsigned long *mat; |
| { |
| int n; |
| |
| for (n = 0; n < GF2_DIM; n++) |
| square[n] = gf2_matrix_times(mat, mat[n]); |
| } |
| |
| /* ========================================================================= */ |
| local uLong crc32_combine_(crc1, crc2, len2) |
| uLong crc1; |
| uLong crc2; |
| z_off64_t len2; |
| { |
| int n; |
| unsigned long row; |
| unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ |
| unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ |
| |
| /* degenerate case (also disallow negative lengths) */ |
| if (len2 <= 0) |
| return crc1; |
| |
| /* put operator for one zero bit in odd */ |
| odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
| row = 1; |
| for (n = 1; n < GF2_DIM; n++) { |
| odd[n] = row; |
| row <<= 1; |
| } |
| |
| /* put operator for two zero bits in even */ |
| gf2_matrix_square(even, odd); |
| |
| /* put operator for four zero bits in odd */ |
| gf2_matrix_square(odd, even); |
| |
| /* apply len2 zeros to crc1 (first square will put the operator for one |
| zero byte, eight zero bits, in even) */ |
| do { |
| /* apply zeros operator for this bit of len2 */ |
| gf2_matrix_square(even, odd); |
| if (len2 & 1) |
| crc1 = gf2_matrix_times(even, crc1); |
| len2 >>= 1; |
| |
| /* if no more bits set, then done */ |
| if (len2 == 0) |
| break; |
| |
| /* another iteration of the loop with odd and even swapped */ |
| gf2_matrix_square(odd, even); |
| if (len2 & 1) |
| crc1 = gf2_matrix_times(odd, crc1); |
| len2 >>= 1; |
| |
| /* if no more bits set, then done */ |
| } while (len2 != 0); |
| |
| /* return combined crc */ |
| crc1 ^= crc2; |
| return crc1; |
| } |
| |
| /* ========================================================================= */ |
| uLong ZEXPORT crc32_combine(crc1, crc2, len2) |
| uLong crc1; |
| uLong crc2; |
| z_off_t len2; |
| { |
| return crc32_combine_(crc1, crc2, len2); |
| } |
| |
| uLong ZEXPORT crc32_combine64(crc1, crc2, len2) |
| uLong crc1; |
| uLong crc2; |
| z_off64_t len2; |
| { |
| return crc32_combine_(crc1, crc2, len2); |
| } |
| |
| ZLIB_INTERNAL void crc_reset(deflate_state *const s) |
| { |
| #ifdef ADLER32_SIMD_SSSE3 |
| if (x86_cpu_enable_simd) { |
| crc_fold_init(s); |
| return; |
| } |
| #endif |
| s->strm->adler = crc32(0L, Z_NULL, 0); |
| } |
| |
| ZLIB_INTERNAL void crc_finalize(deflate_state *const s) |
| { |
| #ifdef ADLER32_SIMD_SSSE3 |
| if (x86_cpu_enable_simd) |
| s->strm->adler = crc_fold_512to32(s); |
| #endif |
| } |
| |
| ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) |
| { |
| #ifdef ADLER32_SIMD_SSSE3 |
| if (x86_cpu_enable_simd) { |
| crc_fold_copy(strm->state, dst, strm->next_in, size); |
| return; |
| } |
| #endif |
| zmemcpy(dst, strm->next_in, size); |
| strm->adler = crc32(strm->adler, dst, size); |
| } |