| // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 |
| /* |
| * erofs-utils/lib/kite_deflate.c |
| * |
| * Copyright (C) 2023, Alibaba Cloud |
| * Copyright (C) 2023, Gao Xiang <xiang@kernel.org> |
| */ |
| #include "erofs/defs.h" |
| #include "erofs/print.h" |
| #include <errno.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <ctype.h> |
| #include <stdio.h> |
| |
| unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2, |
| unsigned long sz); |
| |
| #ifdef TEST |
| #define kite_dbg(x, ...) fprintf(stderr, x "\n", ##__VA_ARGS__) |
| #else |
| #define kite_dbg(x, ...) |
| #endif |
| |
| #define kHistorySize32 (1U << 15) |
| |
| #define kNumLenSymbols32 256 |
| #define kNumLenSymbolsMax kNumLenSymbols32 |
| |
| #define kSymbolEndOfBlock 256 |
| #define kSymbolMatch (kSymbolEndOfBlock + 1) |
| #define kNumLenSlots 29 |
| #define kMainTableSize (kSymbolMatch + kNumLenSlots) |
| |
| #define kFixedLenTableSize (kSymbolMatch + 31) |
| #define FixedDistTableSize 32 |
| |
| #define kMainTableSize (kSymbolMatch + kNumLenSlots) |
| #define kDistTableSize32 30 |
| |
| #define kNumLitLenCodesMin 257 |
| #define kNumDistCodesMin 1 |
| |
| #define kNumLensCodesMin 4 |
| #define kLensTableSize 19 |
| |
| #define kMatchMinLen 3 |
| #define kMatchMaxLen32 kNumLenSymbols32 + kMatchMinLen - 1 |
| |
| static u32 kstaticHuff_mainCodes[kFixedLenTableSize]; |
| static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = { |
| [0 ... 143] = 8, [144 ... 255] = 9, |
| [256 ... 279] = 7, [280 ... 287] = 8, |
| }; |
| static u32 kstaticHuff_distCodes[kFixedLenTableSize]; |
| |
| const u8 kLenStart32[kNumLenSlots] = |
| {0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255}; |
| |
| const u8 kLenExtraBits32[kNumLenSlots] = |
| {0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, |
| 5, 5, 5, 0}; |
| |
| /* First normalized distance for each code (0 = distance of 1) */ |
| const u32 kDistStart[kDistTableSize32] = |
| {0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768, |
| 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576}; |
| |
| /* extra bits for each distance code */ |
| const u8 kDistExtraBits[kDistTableSize32] = |
| {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; |
| |
| const u8 kCodeLengthAlphabetOrder[kLensTableSize] = |
| {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; |
| |
| const u8 kLevelExtraBits[3] = {2, 3, 7}; |
| |
| const unsigned int kTableDirectLevels = 16; |
| const unsigned int kBitLensRepNumber_3_6 = kTableDirectLevels; |
| const unsigned int kBitLens0Number_3_10 = kBitLensRepNumber_3_6 + 1; |
| const unsigned int kBitLens0Number_11_138 = kBitLens0Number_3_10 + 1; |
| |
| #define kStored 0 |
| #define kFixedHuffman 1 |
| #define kDynamicHuffman 2 |
| |
| struct kite_deflate_symbol { |
| u16 len, dist; |
| }; |
| |
| struct kite_deflate_table { |
| u32 mainCodes[kMainTableSize]; |
| u8 litLenLevels[kMainTableSize]; |
| u32 distCodes[kDistTableSize32]; |
| u8 distLevels[kDistTableSize32]; |
| u32 levelCodes[kLensTableSize]; |
| u8 levelLens[kLensTableSize]; |
| |
| u8 numdistlens, numblcodes; |
| u16 numlitlens; |
| }; |
| |
| struct kite_deflate { |
| struct kite_deflate_table *tab; |
| const u8 *in; |
| u8 *out; |
| |
| u32 inlen, outlen; |
| u32 pos_in, pos_out; |
| u32 inflightbits; |
| u8 bitpos; |
| u8 numHuffBits; |
| u32 symbols; |
| |
| u32 costbits, startpos; |
| u8 encode_mode; |
| bool freq_changed, lastblock; |
| |
| /* Previous match for lazy matching */ |
| bool prev_valid; |
| u16 prev_longest; |
| |
| u32 mainFreqs[kMainTableSize]; |
| u32 distFreqs[kDistTableSize32]; |
| struct kite_deflate_table tables[2]; |
| |
| /* don't reset the following fields */ |
| struct kite_matchfinder *mf; |
| struct kite_deflate_symbol *sym; |
| u32 max_symbols; |
| bool lazy_search; |
| }; |
| |
| #define ZLIB_DISTANCE_TOO_FAR 4096 |
| |
| static u8 g_LenSlots[kNumLenSymbolsMax]; |
| |
| #define kNumLogBits 9 // do not change it |
| static u8 g_FastPos[1 << kNumLogBits]; |
| |
| static void writebits(struct kite_deflate *s, unsigned int v, u8 bits) |
| { |
| unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos; |
| |
| s->inflightbits |= (v << s->bitpos) & (!rem - 1); |
| if (bits > rem) { |
| u8 *out = s->out + s->pos_out; |
| |
| out[0] = s->inflightbits & 0xff; |
| out[1] = (s->inflightbits >> 8) & 0xff; |
| out[2] = (s->inflightbits >> 16) & 0xff; |
| out[3] = (s->inflightbits >> 24) & 0xff; |
| s->pos_out += 4; |
| DBG_BUGON(s->pos_out > s->outlen); |
| s->inflightbits = v >> rem; |
| s->bitpos = bits - rem; |
| return; |
| } |
| s->bitpos += bits; |
| } |
| |
| static void flushbits(struct kite_deflate *s) |
| { |
| u8 *out = s->out + s->pos_out; |
| |
| if (!s->bitpos) |
| return; |
| out[0] = s->inflightbits & 0xff; |
| if (s->bitpos >= 8) { |
| out[1] = (s->inflightbits >> 8) & 0xff; |
| if (s->bitpos >= 16) { |
| out[2] = (s->inflightbits >> 16) & 0xff; |
| if (s->bitpos >= 24) |
| out[3] = (s->inflightbits >> 24) & 0xff; |
| } |
| } |
| s->pos_out += round_up(s->bitpos, 8) >> 3; |
| DBG_BUGON(s->pos_out > s->outlen); |
| s->bitpos = 0; |
| s->inflightbits = 0; |
| } |
| |
| #define kMaxLen 16 |
| |
| static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes, |
| const u32 *bl_count) |
| { |
| u32 nextCodes[kMaxLen + 1]; /* next code value for each bit length */ |
| unsigned int code = 0; /* running code value */ |
| unsigned int bits, k; |
| |
| for (bits = 1; bits <= kMaxLen; ++bits) { |
| code = (code + bl_count[bits - 1]) << 1; |
| nextCodes[bits] = code; |
| } |
| |
| DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen); |
| |
| for (k = 0; k < nr_codes; ++k) |
| p[k] = nextCodes[lens[k]]++; |
| } |
| |
| static u32 deflate_reversebits_one(u32 code, u8 bits) |
| { |
| unsigned int x = code; |
| |
| x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1); |
| x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2); |
| x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4); |
| |
| return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits); |
| } |
| |
| static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n) |
| { |
| while (n) { |
| u32 code = *codes; |
| |
| *codes++ = deflate_reversebits_one(code, *lens++); |
| --n; |
| } |
| } |
| |
| static void kite_deflate_init_once(void) |
| { |
| static const u32 static_bl_count[kMaxLen + 1] = { |
| [7] = 279 - 256 + 1, |
| [8] = (143 + 1) + (287 - 280 + 1), |
| [9] = 255 - 144 + 1, |
| }; |
| unsigned int i, c, j, k; |
| |
| if (kstaticHuff_distCodes[31]) |
| return; |
| deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes, |
| kFixedLenTableSize, static_bl_count); |
| Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels, |
| kFixedLenTableSize); |
| |
| for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i) |
| kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5); |
| |
| for (i = 0; i < kNumLenSlots; i++) { |
| c = kLenStart32[i]; |
| j = 1 << kLenExtraBits32[i]; |
| |
| for (k = 0; k < j; k++, c++) |
| g_LenSlots[c] = (u8)i; |
| } |
| |
| c = 0; |
| for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) { |
| k = 1 << kDistExtraBits[i]; |
| for (j = 0; j < k; j++) |
| g_FastPos[c++] = i; |
| } |
| } |
| |
| static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs) |
| { |
| int n; /* iterates over all tree elements */ |
| int prevlen = -1; /* last emitted length */ |
| int curlen; /* length of current code */ |
| int nextlen = lens[0]; /* length of next code */ |
| int count = 0; /* repeat count of the current code */ |
| int max_count = 7; /* max repeat count */ |
| int min_count = 4; /* min repeat count */ |
| |
| if (!nextlen) |
| max_count = 138, min_count = 3; |
| |
| for (n = 0; n < numlens; n++) { |
| curlen = nextlen; |
| nextlen = n + 1 < numlens ? lens[n + 1] : -1; |
| ++count; |
| |
| if (count < max_count && curlen == nextlen) |
| continue; |
| if (count < min_count) { |
| freqs[curlen] += count; |
| } else if (curlen != 0) { |
| if (curlen != prevlen) |
| freqs[curlen]++; |
| freqs[kBitLensRepNumber_3_6]++; |
| } else if (count <= 10) { |
| freqs[kBitLens0Number_3_10]++; |
| } else { |
| freqs[kBitLens0Number_11_138]++; |
| } |
| |
| count = 0; |
| prevlen = curlen; |
| if (!nextlen) |
| max_count = 138, min_count = 3; |
| else if (curlen == nextlen) |
| max_count = 6, min_count = 3; |
| else |
| max_count = 7, min_count = 4; |
| } |
| } |
| |
| static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens, |
| unsigned int numlens) |
| { |
| int n; /* iterates over all tree elements */ |
| int prevlen = -1; /* last emitted length */ |
| int curlen; /* length of current code */ |
| int nextlen = lens[0]; /* length of next code */ |
| int count = 0; /* repeat count of the current code */ |
| int max_count = 7; /* max repeat count */ |
| int min_count = 4; /* min repeat count */ |
| const u8 *bl_lens = s->tab->levelLens; |
| const u32 *bl_codes = s->tab->levelCodes; |
| |
| if (!nextlen) |
| max_count = 138, min_count = 3; |
| |
| for (n = 0; n < numlens; n++) { |
| curlen = nextlen; |
| nextlen = n + 1 < numlens ? lens[n + 1] : -1; |
| ++count; |
| |
| if (count < max_count && curlen == nextlen) |
| continue; |
| if (count < min_count) { |
| do { |
| writebits(s, bl_codes[curlen], bl_lens[curlen]); |
| } while (--count); |
| } else if (curlen) { |
| if (curlen != prevlen) { |
| writebits(s, bl_codes[curlen], bl_lens[curlen]); |
| count--; |
| } |
| writebits(s, bl_codes[kBitLensRepNumber_3_6], |
| bl_lens[kBitLensRepNumber_3_6]); |
| writebits(s, count - 3, 2); |
| } else if (count <= 10) { |
| writebits(s, bl_codes[kBitLens0Number_3_10], |
| bl_lens[kBitLens0Number_3_10]); |
| writebits(s, count - 3, 3); |
| } else { |
| writebits(s, bl_codes[kBitLens0Number_11_138], |
| bl_lens[kBitLens0Number_11_138]); |
| writebits(s, count - 11, 7); |
| } |
| |
| count = 0; |
| prevlen = curlen; |
| if (!nextlen) |
| max_count = 138, min_count = 3; |
| else if (curlen == nextlen) |
| max_count = 6, min_count = 3; |
| else |
| max_count = 7, min_count = 4; |
| } |
| } |
| |
| static void kite_deflate_setfixedtrees(struct kite_deflate *s) |
| { |
| writebits(s, (kFixedHuffman << 1) + s->lastblock, 3); |
| } |
| |
| static void kite_deflate_sendtrees(struct kite_deflate *s) |
| { |
| struct kite_deflate_table *t = s->tab; |
| unsigned int i; |
| |
| writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3); |
| writebits(s, t->numlitlens - kNumLitLenCodesMin, 5); |
| writebits(s, t->numdistlens - kNumDistCodesMin, 5); |
| writebits(s, t->numblcodes - kNumLensCodesMin, 4); |
| |
| for (i = 0; i < t->numblcodes; i++) |
| writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3); |
| |
| Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize); |
| kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens); |
| kite_deflate_sendtree(s, t->distLevels, t->numdistlens); |
| } |
| |
| static inline unsigned int deflateDistSlot(unsigned int pos) |
| { |
| const unsigned int zz = (kNumLogBits - 1) & |
| ((((1U << kNumLogBits) - 1) - pos) >> (31 - 3)); |
| |
| return g_FastPos[pos >> zz] + (zz * 2); |
| } |
| |
| static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed) |
| { |
| int i; |
| u32 *mainCodes, *distCodes; |
| const u8 *litLenLevels, *distLevels; |
| |
| if (!fixed) { |
| struct kite_deflate_table *t = s->tab; |
| |
| mainCodes = t->mainCodes; distCodes = t->distCodes; |
| litLenLevels = t->litLenLevels; distLevels = t->distLevels; |
| |
| Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize); |
| Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32); |
| } else { |
| mainCodes = kstaticHuff_mainCodes; |
| distCodes = kstaticHuff_distCodes; |
| |
| litLenLevels = kstaticHuff_litLenLevels; |
| } |
| |
| for (i = 0; i < s->symbols; ++i) { |
| struct kite_deflate_symbol *sym = &s->sym[i]; |
| |
| if (sym->len < kMatchMinLen) { /* literal */ |
| writebits(s, mainCodes[sym->dist], |
| litLenLevels[sym->dist]); |
| } else { |
| unsigned int lenSlot, distSlot; |
| unsigned int lc = sym->len - kMatchMinLen; |
| |
| lenSlot = g_LenSlots[lc]; |
| writebits(s, mainCodes[kSymbolMatch + lenSlot], |
| litLenLevels[kSymbolMatch + lenSlot]); |
| writebits(s, lc - kLenStart32[lenSlot], |
| kLenExtraBits32[lenSlot]); |
| |
| distSlot = deflateDistSlot(sym->dist - 1); |
| writebits(s, distCodes[distSlot], |
| fixed ? 5 : distLevels[distSlot]); |
| writebits(s, sym->dist - 1 - kDistStart[distSlot], |
| kDistExtraBits[distSlot]); |
| } |
| } |
| writebits(s, mainCodes[kSymbolEndOfBlock], |
| litLenLevels[kSymbolEndOfBlock]); |
| } |
| |
| static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num) |
| { |
| u32 price = 0; |
| |
| while (num) { |
| price += (*lens++) * (*freqs++); |
| --num; |
| } |
| return price; |
| } |
| |
| static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num, |
| const u8 *extraBits, u32 extraBase) |
| { |
| return Huffman_GetPrice(freqs, lens, num) + |
| Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase); |
| } |
| |
| /* Adapted from C/HuffEnc.c (7zip) for now */ |
| #define HeapSortDown(p, k, size, temp) \ |
| { for (;;) { \ |
| size_t s = (k << 1); \ |
| if (s > size) break; \ |
| if (s < size && p[s + 1] > p[s]) s++; \ |
| if (temp >= p[s]) break; \ |
| p[k] = p[s]; k = s; \ |
| } p[k] = temp; } |
| |
| static void HeapSort(u32 *p, size_t size) |
| { |
| if (size <= 1) |
| return; |
| p--; |
| { |
| size_t i = size / 2; |
| do |
| { |
| u32 temp = p[i]; |
| size_t k = i; |
| HeapSortDown(p, k, size, temp) |
| } |
| while (--i != 0); |
| } |
| /* |
| do |
| { |
| size_t k = 1; |
| UInt32 temp = p[size]; |
| p[size--] = p[1]; |
| HeapSortDown(p, k, size, temp) |
| } |
| while (size > 1); |
| */ |
| while (size > 3) |
| { |
| u32 temp = p[size]; |
| size_t k = (p[3] > p[2]) ? 3 : 2; |
| p[size--] = p[1]; |
| p[1] = p[k]; |
| HeapSortDown(p, k, size, temp) |
| } |
| { |
| u32 temp = p[size]; |
| p[size] = p[1]; |
| if (size > 2 && p[2] < temp) |
| { |
| p[1] = p[2]; |
| p[2] = temp; |
| } |
| else |
| p[1] = temp; |
| } |
| } |
| |
| #define NUM_BITS 10 |
| #define MASK (((unsigned)1 << NUM_BITS) - 1) |
| |
| static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens, |
| unsigned int numSymbols, unsigned int maxLen) |
| { |
| u32 num, i; |
| |
| num = 0; |
| /* if (maxLen > 10) maxLen = 10; */ |
| |
| for (i = 0; i < numSymbols; i++) { |
| u32 freq = freqs[i]; |
| |
| if (!freq) |
| lens[i] = 0; |
| else |
| p[num++] = i | (freq << NUM_BITS); |
| } |
| HeapSort(p, num); |
| |
| if (num < 2) { |
| unsigned int minCode = 0, maxCode = 1; |
| |
| if (num == 1) { |
| maxCode = (unsigned int)p[0] & MASK; |
| if (!maxCode) |
| maxCode++; |
| } |
| p[minCode] = 0; |
| p[maxCode] = 1; |
| lens[minCode] = lens[maxCode] = 1; |
| return; |
| } |
| |
| { |
| u32 b, e, i; |
| |
| i = b = e = 0; |
| do { |
| u32 n, m, freq; |
| |
| n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; |
| freq = (p[n] & ~MASK); |
| p[n] = (p[n] & MASK) | (e << NUM_BITS); |
| m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; |
| freq += (p[m] & ~MASK); |
| p[m] = (p[m] & MASK) | (e << NUM_BITS); |
| p[e] = (p[e] & MASK) | freq; |
| e++; |
| } while (num - e > 1); |
| |
| { |
| u32 lenCounters[kMaxLen + 1]; |
| |
| for (i = 0; i <= kMaxLen; i++) |
| lenCounters[i] = 0; |
| |
| p[--e] &= MASK; |
| lenCounters[1] = 2; |
| while (e > 0) { |
| u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; |
| |
| p[e] = (p[e] & MASK) | (len << NUM_BITS); |
| if (len >= maxLen) |
| for (len = maxLen - 1; lenCounters[len] == 0; len--); |
| lenCounters[len]--; |
| lenCounters[(size_t)len + 1] += 2; |
| } |
| |
| { |
| u32 len; |
| |
| i = 0; |
| for (len = maxLen; len != 0; len--) { |
| u32 k; |
| for (k = lenCounters[len]; k != 0; k--) |
| lens[p[i++] & MASK] = (u8)len; |
| } |
| } |
| deflate_genhuffcodes(lens, p, numSymbols, lenCounters); |
| } |
| } |
| } |
| |
| static void kite_deflate_fixdynblock(struct kite_deflate *s) |
| { |
| struct kite_deflate_table *t = s->tab; |
| unsigned int numlitlens, numdistlens, numblcodes; |
| u32 levelFreqs[kLensTableSize] = {0}; |
| u32 opt_mainlen; |
| |
| if (!s->freq_changed) |
| return; |
| |
| /* in order to match zlib */ |
| s->numHuffBits = kMaxLen; |
| // s->numHuffBits = (s->symbols > 18000 ? 12 : |
| // (s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9))); |
| |
| Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels, |
| kMainTableSize, s->numHuffBits); |
| Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels, |
| kDistTableSize32, s->numHuffBits); |
| |
| /* code lengths for the literal/length alphabet */ |
| numlitlens = kMainTableSize; |
| while (numlitlens > kNumLitLenCodesMin && |
| !t->litLenLevels[numlitlens - 1]) |
| --numlitlens; |
| |
| /* code lengths for the distance alphabet */ |
| numdistlens = kDistTableSize32; |
| while (numdistlens > kNumDistCodesMin && |
| !t->distLevels[numdistlens - 1]) |
| --numdistlens; |
| |
| kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs); |
| kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs); |
| Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens, |
| kLensTableSize, 7); |
| numblcodes = kLensTableSize; |
| while (numblcodes > kNumLensCodesMin && |
| !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]]) |
| --numblcodes; |
| |
| t->numlitlens = numlitlens; |
| t->numdistlens = numdistlens; |
| t->numblcodes = numblcodes; |
| |
| opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels, |
| kMainTableSize, kLenExtraBits32, kSymbolMatch) + |
| Huffman_GetPriceEx(s->distFreqs, t->distLevels, |
| kDistTableSize32, kDistExtraBits, 0); |
| s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes + |
| Huffman_GetPriceEx(levelFreqs, t->levelLens, |
| kLensTableSize, kLevelExtraBits, kTableDirectLevels) + |
| opt_mainlen; |
| s->freq_changed = false; |
| } |
| |
| |
| /* |
| * an array used used by the LZ-based encoder to hold the length-distance pairs |
| * found by LZ matchfinder. |
| */ |
| struct kite_match { |
| unsigned int len; |
| unsigned int dist; |
| }; |
| |
| struct kite_matchfinder { |
| /* pointer to buffer with data to be compressed */ |
| const u8 *buffer; |
| |
| /* indicate the first byte that doesn't contain valid input data */ |
| const u8 *end; |
| |
| /* LZ matchfinder hash chain representation */ |
| u32 *hash, *chain; |
| |
| u32 base; |
| |
| /* indicate the next byte to run through the match finder */ |
| u32 offset; |
| |
| u32 cyclic_pos; |
| |
| /* maximum length of a match that the matchfinder will try to find. */ |
| u16 nice_len; |
| |
| /* the total sliding window size */ |
| u16 wsiz; |
| |
| /* how many rounds a matchfinder searches on a hash chain for */ |
| u16 depth; |
| |
| /* do not perform lazy search no less than this match length */ |
| u16 max_lazy; |
| |
| /* reduce lazy search no less than this match length */ |
| u8 good_len; |
| |
| /* current match for lazy matching */ |
| struct kite_match *matches; |
| struct kite_match matches_matrix[2][4]; |
| }; |
| |
| /* |
| * This mysterious table is just the CRC of each possible byte. It can be |
| * computed using the standard bit-at-a-time methods. The polynomial can |
| * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. |
| * Add the implicit x^16, and you have the standard CRC-CCITT. |
| */ |
| u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = { |
| 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, |
| 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, |
| 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, |
| 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, |
| 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, |
| 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, |
| 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, |
| 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, |
| 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, |
| 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, |
| 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, |
| 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, |
| 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, |
| 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, |
| 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, |
| 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, |
| 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, |
| 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, |
| 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, |
| 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, |
| 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, |
| 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, |
| 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, |
| 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, |
| 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, |
| 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, |
| 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, |
| 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, |
| 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, |
| 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, |
| 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, |
| 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 |
| }; |
| |
| int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen) |
| { |
| const u8 *cur = mf->buffer + mf->offset; |
| const u8 *qbase = mf->buffer - mf->base; |
| u32 curMatch; |
| unsigned int v, hv, i, k, p, wsiz; |
| |
| if (mf->end - cur < bestlen + 1) |
| return 0; |
| |
| v = get_unaligned((u16 *)cur); |
| hv = v ^ crc_ccitt_table[cur[2]]; |
| curMatch = mf->hash[hv]; |
| p = mf->base + mf->offset; |
| mf->hash[hv] = p; |
| mf->chain[mf->cyclic_pos] = curMatch; |
| wsiz = mf->wsiz; |
| k = 1; |
| |
| if (depth) { |
| unsigned int wpos = wsiz + mf->cyclic_pos; |
| |
| hv = min_t(unsigned int, mf->nice_len, mf->end - cur); |
| DBG_BUGON(hv > kMatchMaxLen32); |
| do { |
| unsigned int diff = p - curMatch; |
| const u8 *q; |
| |
| if (diff >= wsiz) |
| break; |
| |
| q = qbase + curMatch; |
| curMatch = mf->chain[(wpos - diff) & (wsiz - 1)]; |
| if (v == get_unaligned((u16 *)q) && (bestlen < 3 || ( |
| get_unaligned((u16 *)(cur + bestlen - 1)) == |
| get_unaligned((u16 *)(q + bestlen - 1)) && |
| !memcmp(cur + 3, q + 3, bestlen - 3)))) { |
| DBG_BUGON(cur[2] != q[2]); |
| i = erofs_memcmp2(cur + bestlen + 1, |
| q + bestlen + 1, hv - bestlen - 1); |
| bestlen += 1 + i; |
| |
| k -= (k >= ARRAY_SIZE(mf->matches_matrix[0])); |
| mf->matches[k++] = (struct kite_match) { |
| .len = bestlen, |
| .dist = diff, |
| }; |
| if (bestlen >= hv) |
| break; |
| } |
| } while (--depth); |
| } |
| mf->offset++; |
| mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1); |
| return k - 1; |
| } |
| |
| /* let's align with zlib */ |
| static const struct kite_matchfinder_cfg { |
| u16 good_length; /* reduce lazy search above this match length */ |
| u16 max_lazy; /* do not perform lazy search above this match length */ |
| u16 nice_length; /* quit search above this match length */ |
| u16 depth; |
| bool lazy_search; |
| } kite_mfcfg[10] = { |
| /* good lazy nice depth */ |
| /* 0 */ {0, 0, 0, 0, false}, /* store only [unsupported] */ |
| /* 1 */ {4, 4, 8, 4, false}, /* maximum speed, no lazy matches */ |
| /* 2 */ {4, 5, 16, 8, false}, |
| /* 3 */ {4, 6, 32, 32, false}, |
| |
| /* 4 */ {4, 4, 16, 16, true}, /* lazy matches */ |
| /* 5 */ {8, 16, 32, 32, true}, |
| /* 6 */ {8, 16, 128, 128, true}, |
| /* 7 */ {8, 32, 128, 256, true}, |
| /* 8 */ {32, 128, 258, 1024, true}, |
| /* 9 */ {32, 258, 258, 4096, true}, /* maximum compression */ |
| }; |
| |
| static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level) |
| { |
| const struct kite_matchfinder_cfg *cfg; |
| |
| if (!level || level >= ARRAY_SIZE(kite_mfcfg)) |
| return -EINVAL; |
| cfg = &kite_mfcfg[level]; |
| |
| if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz) |
| return -EINVAL; |
| |
| mf->hash = calloc(0x10000, sizeof(mf->hash[0])); |
| if (!mf->hash) |
| return -ENOMEM; |
| |
| mf->chain = malloc(sizeof(mf->chain[0]) * wsiz); |
| if (!mf->chain) { |
| free(mf->hash); |
| mf->hash = NULL; |
| return -ENOMEM; |
| } |
| mf->wsiz = wsiz; |
| |
| mf->good_len = cfg->good_length; |
| mf->nice_len = cfg->nice_length; |
| mf->depth = cfg->depth; |
| mf->max_lazy = cfg->max_lazy; |
| return cfg->lazy_search; |
| } |
| |
| static void kite_mf_reset(struct kite_matchfinder *mf, |
| const void *buffer, const void *end) |
| { |
| mf->buffer = buffer; |
| mf->end = end; |
| |
| /* |
| * Set the initial value as max_distance + 1. This would avoid hash |
| * zero initialization. |
| */ |
| mf->base += mf->offset + kHistorySize32 + 1; |
| |
| mf->offset = 0; |
| mf->cyclic_pos = 0; |
| |
| mf->matches = mf->matches_matrix[0]; |
| mf->matches_matrix[0][0].len = |
| mf->matches_matrix[1][0].len = kMatchMinLen - 1; |
| } |
| |
| static bool deflate_count_code(struct kite_deflate *s, bool literal, |
| unsigned int lenSlot, unsigned int distSlot) |
| { |
| struct kite_deflate_table *t = s->tab; |
| unsigned int lenbase = (literal ? 0 : kSymbolMatch); |
| u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos; |
| bool recalc = false; |
| unsigned int bits; |
| |
| s->freq_changed = true; |
| ++s->mainFreqs[lenbase + lenSlot]; |
| if (!literal) |
| ++s->distFreqs[distSlot]; |
| |
| if (s->encode_mode == 1) { |
| if (literal) { |
| bits = kstaticHuff_litLenLevels[lenSlot]; |
| goto out; |
| } |
| bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] + |
| kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot]; |
| goto out; |
| } |
| |
| /* XXX: more ideas to be done later */ |
| recalc |= (!literal && !t->distLevels[distSlot]); |
| recalc |= !t->litLenLevels[lenbase + lenSlot]; |
| if (recalc) { |
| kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm', |
| lenSlot, distSlot); |
| s->tab = s->tables + (s->tab == s->tables); |
| kite_deflate_fixdynblock(s); |
| bits = 0; |
| goto out; |
| } |
| |
| if (literal) { |
| bits = t->litLenLevels[lenSlot]; |
| goto out; |
| } |
| |
| bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] + |
| t->litLenLevels[kSymbolMatch + lenSlot] + |
| kLenExtraBits32[lenSlot]; |
| out: |
| if (rem < s->costbits + bits) { |
| --s->mainFreqs[lenbase + lenSlot]; |
| if (!literal) |
| --s->distFreqs[distSlot]; |
| if (recalc) |
| s->tab = s->tables + (s->tab == s->tables); |
| return false; |
| } |
| s->costbits += bits; |
| return true; |
| } |
| |
| static bool kite_deflate_tally(struct kite_deflate *s, |
| struct kite_match *match) |
| { |
| struct kite_deflate_symbol *sym = s->sym + s->symbols; |
| u32 fixedcost = ~0; |
| bool hassp; |
| |
| *sym = (struct kite_deflate_symbol) { |
| .len = match->len, |
| .dist = match->dist, |
| }; |
| |
| retry: |
| if (sym->len < kMatchMinLen) { |
| hassp = deflate_count_code(s, true, sym->dist, 0); |
| } else { |
| unsigned int lc = sym->len - kMatchMinLen; |
| unsigned int lenSlot = g_LenSlots[lc]; |
| unsigned int distSlot = deflateDistSlot(sym->dist - 1); |
| |
| hassp = deflate_count_code(s, false, lenSlot, distSlot); |
| } |
| |
| if (!hassp) { |
| if (s->encode_mode == 1) { |
| fixedcost = s->costbits; |
| s->encode_mode = 2; |
| goto retry; |
| } |
| s->lastblock = true; |
| if (fixedcost <= s->costbits) |
| s->encode_mode = 1; |
| return true; |
| } |
| ++s->symbols; |
| return false; |
| } |
| |
| static void kite_deflate_writestore(struct kite_deflate *s) |
| { |
| bool fb = !s->startpos && !s->bitpos; |
| unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos; |
| |
| do { |
| unsigned int len = min_t(unsigned int, totalsiz, 65535); |
| |
| totalsiz -= len; |
| writebits(s, (fb << 3) | (kStored << 1) | |
| (s->lastblock && !totalsiz), 3 + fb); |
| flushbits(s); |
| writebits(s, len, 16); |
| writebits(s, len ^ 0xffff, 16); |
| flushbits(s); |
| memcpy(s->out + s->pos_out, s->in + s->startpos, len); |
| s->pos_out += len; |
| s->startpos += len; |
| } while (totalsiz); |
| } |
| |
| static void kite_deflate_endblock(struct kite_deflate *s) |
| { |
| if (s->encode_mode == 1) { |
| u32 fixedcost = s->costbits; |
| unsigned int storelen, storeblocks, storecost; |
| |
| kite_deflate_fixdynblock(s); |
| if (fixedcost > s->costbits) |
| s->encode_mode = 2; |
| else |
| s->costbits = fixedcost; |
| |
| storelen = s->pos_in - s->prev_valid - s->startpos; |
| storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U); |
| storecost = (8 - s->bitpos) + storeblocks - 1 + |
| storeblocks * 32 + storelen * 8; |
| if (s->costbits > storecost) { |
| s->costbits = storecost; |
| s->encode_mode = 0; |
| } |
| } |
| |
| s->lastblock |= (s->costbits + s->bitpos >= |
| (s->outlen - s->pos_out) * 8); |
| } |
| |
| static void kite_deflate_startblock(struct kite_deflate *s) |
| { |
| memset(s->mainFreqs, 0, sizeof(s->mainFreqs)); |
| memset(s->distFreqs, 0, sizeof(s->distFreqs)); |
| memset(s->tables, 0, sizeof(s->tables[0])); |
| s->symbols = 0; |
| s->mainFreqs[kSymbolEndOfBlock]++; |
| s->encode_mode = 1; |
| s->tab = s->tables; |
| s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock]; |
| } |
| |
| static bool kite_deflate_commitblock(struct kite_deflate *s) |
| { |
| if (s->encode_mode == 1) { |
| kite_deflate_setfixedtrees(s); |
| kite_deflate_writeblock(s, true); |
| } else if (s->encode_mode == 2) { |
| kite_deflate_sendtrees(s); |
| kite_deflate_writeblock(s, false); |
| } else { |
| kite_deflate_writestore(s); |
| } |
| s->startpos = s->pos_in - s->prev_valid; |
| return s->lastblock; |
| } |
| |
| static bool kite_deflate_fast(struct kite_deflate *s) |
| { |
| struct kite_matchfinder *mf = s->mf; |
| |
| kite_deflate_startblock(s); |
| while (1) { |
| int matches = kite_mf_getmatches_hc3(mf, mf->depth, |
| kMatchMinLen - 1); |
| |
| if (matches) { |
| unsigned int len = mf->matches[matches].len; |
| unsigned int dist = mf->matches[matches].dist; |
| |
| if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR) |
| goto nomatch; |
| |
| kite_dbg("%u matches found: longest [%u,%u] of distance %u", |
| matches, s->pos_in, s->pos_in + len - 1, dist); |
| |
| if (kite_deflate_tally(s, mf->matches + matches)) |
| break; |
| s->pos_in += len; |
| /* skip the rest bytes */ |
| while (--len) |
| (void)kite_mf_getmatches_hc3(mf, 0, 0); |
| } else { |
| nomatch: |
| mf->matches[0].dist = s->in[s->pos_in]; |
| if (isprint(s->in[s->pos_in])) |
| kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in); |
| else |
| kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in); |
| |
| if (kite_deflate_tally(s, mf->matches)) |
| break; |
| ++s->pos_in; |
| } |
| |
| s->lastblock |= (s->pos_in >= s->inlen); |
| if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) { |
| kite_deflate_endblock(s); |
| break; |
| } |
| } |
| return kite_deflate_commitblock(s); |
| } |
| |
| static bool kite_deflate_slow(struct kite_deflate *s) |
| { |
| struct kite_matchfinder *mf = s->mf; |
| bool flush = false; |
| |
| kite_deflate_startblock(s); |
| while (1) { |
| struct kite_match *prev_matches = mf->matches; |
| unsigned int len = kMatchMinLen - 1; |
| int matches; |
| unsigned int len0; |
| |
| mf->matches = mf->matches_matrix[ |
| mf->matches == mf->matches_matrix[0]]; |
| mf->matches[0].dist = s->in[s->pos_in]; |
| |
| len0 = prev_matches[s->prev_longest].len; |
| if (len0 < mf->max_lazy) { |
| matches = kite_mf_getmatches_hc3(mf, mf->depth >> |
| (len0 >= mf->good_len), len0); |
| if (matches) { |
| len = mf->matches[matches].len; |
| if (len == kMatchMinLen && |
| mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) { |
| matches = 0; |
| len = kMatchMinLen - 1; |
| } |
| } |
| } else { |
| matches = 0; |
| (void)kite_mf_getmatches_hc3(mf, 0, 0); |
| } |
| |
| if (len < len0) { |
| if (kite_deflate_tally(s, |
| prev_matches + s->prev_longest)) |
| break; |
| |
| s->pos_in += --len0; |
| /* skip the rest bytes */ |
| while (--len0) |
| (void)kite_mf_getmatches_hc3(mf, 0, 0); |
| s->prev_valid = false; |
| s->prev_longest = 0; |
| } else { |
| if (!s->prev_valid) |
| s->prev_valid = true; |
| else if (kite_deflate_tally(s, prev_matches)) |
| break; |
| ++s->pos_in; |
| s->prev_longest = matches; |
| } |
| |
| s->lastblock |= (s->pos_in >= s->inlen); |
| if (s->pos_in >= s->inlen) { |
| flush = true; |
| break; |
| } |
| if (s->symbols >= s->max_symbols) { |
| kite_deflate_endblock(s); |
| break; |
| } |
| } |
| |
| if (flush && s->prev_valid) { |
| (void)kite_deflate_tally(s, mf->matches + s->prev_longest); |
| s->prev_valid = false; |
| } |
| return kite_deflate_commitblock(s); |
| } |
| |
| void kite_deflate_end(struct kite_deflate *s) |
| { |
| if (s->mf) { |
| if (s->mf->hash) |
| free(s->mf->hash); |
| if (s->mf->chain) |
| free(s->mf->chain); |
| free(s->mf); |
| } |
| if (s->sym) |
| free(s->sym); |
| free(s); |
| } |
| |
| struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size) |
| { |
| struct kite_deflate *s; |
| int err; |
| |
| kite_deflate_init_once(); |
| s = calloc(1, sizeof(*s)); |
| if (!s) |
| return ERR_PTR(-ENOMEM); |
| |
| s->max_symbols = 16384; |
| s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols); |
| if (!s->sym) { |
| err = -ENOMEM; |
| goto err_out; |
| } |
| |
| s->mf = malloc(sizeof(*s->mf)); |
| if (!s->mf) { |
| err = -ENOMEM; |
| goto err_out; |
| } |
| |
| if (!dict_size) |
| dict_size = kHistorySize32; |
| |
| err = kite_mf_init(s->mf, dict_size, level); |
| if (err < 0) |
| goto err_out; |
| |
| s->lazy_search = err; |
| return s; |
| err_out: |
| if (s->mf) |
| free(s->mf); |
| if (s->sym) |
| free(s->sym); |
| free(s); |
| return ERR_PTR(err); |
| } |
| |
| int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out, |
| unsigned int *srcsize, unsigned int target_dstsize) |
| { |
| memset(s, 0, offsetof(struct kite_deflate, mainFreqs)); |
| s->in = in; |
| s->inlen = *srcsize; |
| s->out = out; |
| s->outlen = target_dstsize; |
| kite_mf_reset(s->mf, in, in + s->inlen); |
| |
| if (s->lazy_search) |
| while (!kite_deflate_slow(s)); |
| else |
| while (!kite_deflate_fast(s)); |
| flushbits(s); |
| |
| *srcsize = s->startpos; |
| return s->pos_out; |
| } |
| |
| #if TEST |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <sys/mman.h> |
| |
| int main(int argc, char *argv[]) |
| { |
| int fd; |
| u64 filelength; |
| u8 out[1048576], *buf; |
| int dstsize = 4096; |
| unsigned int srcsize, outsize; |
| struct kite_deflate *s; |
| |
| fd = open(argv[1], O_RDONLY); |
| if (fd < 0) |
| return -errno; |
| if (argc > 2) |
| dstsize = atoi(argv[2]); |
| filelength = lseek(fd, 0, SEEK_END); |
| |
| s = kite_deflate_init(9, 0); |
| if (IS_ERR(s)) |
| return PTR_ERR(s); |
| |
| filelength = lseek(fd, 0, SEEK_END); |
| buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0); |
| if (buf == MAP_FAILED) |
| return -errno; |
| close(fd); |
| |
| srcsize = filelength; |
| outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize); |
| fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644); |
| write(fd, out, outsize); |
| close(fd); |
| kite_deflate_end(s); |
| return 0; |
| } |
| #endif |