| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * All rights reserved. |
| * |
| * This source code is licensed under both the BSD-style license (found in the |
| * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| * in the COPYING file in the root directory of this source tree). |
| * You may select, at your option, one of the above-listed licenses. |
| */ |
| |
| /** |
| * This fuzz target performs a zstd round-trip test by generating an arbitrary |
| * array of sequences, generating the associated source buffer, calling |
| * ZSTD_compressSequences(), and then decompresses and compares the result with |
| * the original generated source buffer. |
| */ |
| |
| #define ZSTD_STATIC_LINKING_ONLY |
| |
| #include <stddef.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <time.h> |
| #include "fuzz_helpers.h" |
| #include "zstd_helpers.h" |
| #include "fuzz_data_producer.h" |
| |
| static ZSTD_CCtx* cctx = NULL; |
| static ZSTD_DCtx* dctx = NULL; |
| static void* literalsBuffer = NULL; |
| static void* generatedSrc = NULL; |
| static ZSTD_Sequence* generatedSequences = NULL; |
| |
| static void* dictBuffer = NULL; |
| static ZSTD_CDict* cdict = NULL; |
| static ZSTD_DDict* ddict = NULL; |
| |
| #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */ |
| #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */ |
| #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ |
| #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */ |
| #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */ |
| |
| /* Deterministic random number generator */ |
| #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) |
| static uint32_t FUZZ_RDG_rand(uint32_t* src) |
| { |
| static const uint32_t prime1 = 2654435761U; |
| static const uint32_t prime2 = 2246822519U; |
| uint32_t rand32 = *src; |
| rand32 *= prime1; |
| rand32 ^= prime2; |
| rand32 = FUZZ_RDG_rotl32(rand32, 13); |
| *src = rand32; |
| return rand32 >> 5; |
| } |
| |
| /* Make a pseudorandom string - this simple function exists to avoid |
| * taking a dependency on datagen.h to have RDG_genBuffer(). |
| */ |
| static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) { |
| const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; |
| uint32_t seed = FUZZ_dataProducer_uint32(producer); |
| if (size) { |
| for (size_t n = 0; n < size; n++) { |
| int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1); |
| str[n] = charset[key]; |
| } |
| } |
| return str; |
| } |
| |
| /* Returns size of source buffer */ |
| static size_t decodeSequences(void* dst, size_t nbSequences, |
| size_t literalsSize, |
| const void* dict, size_t dictSize, |
| ZSTD_sequenceFormat_e mode) |
| { |
| const uint8_t* litPtr = literalsBuffer; |
| const uint8_t* const litBegin = literalsBuffer; |
| const uint8_t* const litEnd = litBegin + literalsSize; |
| const uint8_t* dictPtr = dict; |
| uint8_t* op = dst; |
| const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; |
| size_t generatedSrcBufferSize = 0; |
| size_t bytesWritten = 0; |
| |
| for (size_t i = 0; i < nbSequences; ++i) { |
| /* block boundary */ |
| if (generatedSequences[i].offset == 0) |
| FUZZ_ASSERT(generatedSequences[i].matchLength == 0); |
| |
| if (litPtr + generatedSequences[i].litLength > litEnd) { |
| litPtr = litBegin; |
| } |
| memcpy(op, litPtr, generatedSequences[i].litLength); |
| bytesWritten += generatedSequences[i].litLength; |
| op += generatedSequences[i].litLength; |
| litPtr += generatedSequences[i].litLength; |
| |
| /* Copy over the match */ |
| { size_t matchLength = generatedSequences[i].matchLength; |
| size_t j = 0; |
| size_t k = 0; |
| if (dictSize != 0) { |
| if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */ |
| size_t dictOffset = generatedSequences[i].offset - bytesWritten; |
| size_t matchInDict = MIN(matchLength, dictOffset); |
| for (; k < matchInDict; ++k) { |
| op[k] = dictPtr[dictSize - dictOffset + k]; |
| } |
| matchLength -= matchInDict; |
| op += matchInDict; |
| } |
| } |
| for (; j < matchLength; ++j) { |
| op[j] = op[j - generatedSequences[i].offset]; |
| } |
| op += j; |
| FUZZ_ASSERT(generatedSequences[i].matchLength == j + k); |
| bytesWritten += generatedSequences[i].matchLength; |
| } |
| } |
| generatedSrcBufferSize = bytesWritten; |
| FUZZ_ASSERT(litPtr <= litEnd); |
| if (mode == ZSTD_sf_noBlockDelimiters) { |
| const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr); |
| if (lastLLSize <= oend - op) { |
| memcpy(op, litPtr, lastLLSize); |
| generatedSrcBufferSize += lastLLSize; |
| } } |
| return generatedSrcBufferSize; |
| } |
| |
| /* Returns nb sequences generated |
| * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode. |
| * However, it can fail with ZSTD_sf_explicitBlockDelimiters, |
| * due to potential lack of space in |
| */ |
| static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, |
| size_t literalsSizeLimit, size_t dictSize, |
| size_t windowLog, ZSTD_sequenceFormat_e mode) |
| { |
| const uint32_t repCode = 0; /* not used by sequence ingestion api */ |
| size_t windowSize = 1ULL << windowLog; |
| size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); |
| uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; |
| uint32_t bytesGenerated = 0; |
| uint32_t nbSeqGenerated = 0; |
| uint32_t isFirstSequence = 1; |
| uint32_t blockSize = 0; |
| |
| if (mode == ZSTD_sf_explicitBlockDelimiters) { |
| /* ensure that no sequence can be larger than one block */ |
| literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2); |
| matchLengthMax = MIN(matchLengthMax, blockSizeMax/2); |
| } |
| |
| while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */ |
| && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE |
| && !FUZZ_dataProducer_empty(producer)) { |
| uint32_t matchLength; |
| uint32_t matchBound = matchLengthMax; |
| uint32_t offset; |
| uint32_t offsetBound; |
| const uint32_t minLitLength = (isFirstSequence && (dictSize == 0)); |
| const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit); |
| bytesGenerated += litLength; |
| if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { |
| break; |
| } |
| offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize; |
| offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); |
| if (dictSize > 0 && bytesGenerated <= windowSize) { |
| /* Prevent match length from being such that it would be associated with an offset too large |
| * from the decoder's perspective. If not possible (match would be too small), |
| * then reduce the offset if necessary. |
| */ |
| const size_t bytesToReachWindowSize = windowSize - bytesGenerated; |
| if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) { |
| const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound; |
| offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound); |
| } else { |
| matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize); |
| } |
| } |
| matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound); |
| bytesGenerated += matchLength; |
| if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { |
| break; |
| } |
| { ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; |
| const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength); |
| #define SPLITPROB 6000 |
| #define SPLITMARK 5234 |
| const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK); |
| if (mode == ZSTD_sf_explicitBlockDelimiters) { |
| const size_t seqSize = seq.litLength + seq.matchLength; |
| if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */ |
| const ZSTD_Sequence endBlock = {0, 0, 0, 0}; |
| generatedSequences[nbSeqGenerated++] = endBlock; |
| blockSize = seqSize; |
| } |
| if (split) { |
| const ZSTD_Sequence endBlock = {0, lastLits, 0, 0}; |
| generatedSequences[nbSeqGenerated++] = endBlock; |
| assert(lastLits <= seq.litLength); |
| seq.litLength -= lastLits; |
| blockSize = seqSize - lastLits; |
| } else { |
| blockSize += seqSize; |
| } |
| } |
| generatedSequences[nbSeqGenerated++] = seq; |
| isFirstSequence = 0; |
| } |
| } |
| |
| if (mode == ZSTD_sf_explicitBlockDelimiters) { |
| /* always end sequences with a block delimiter */ |
| const ZSTD_Sequence endBlock = {0, 0, 0, 0}; |
| assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ); |
| generatedSequences[nbSeqGenerated++] = endBlock; |
| } |
| return nbSeqGenerated; |
| } |
| |
| static size_t roundTripTest(void* result, size_t resultCapacity, |
| void* compressed, size_t compressedCapacity, |
| const void* src, size_t srcSize, |
| const ZSTD_Sequence* seqs, size_t seqSize, |
| unsigned hasDict, |
| ZSTD_sequenceFormat_e mode) |
| { |
| size_t cSize; |
| size_t dSize; |
| |
| if (hasDict) { |
| FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict)); |
| FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict)); |
| } |
| |
| cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, |
| seqs, seqSize, |
| src, srcSize); |
| if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall) |
| && (mode == ZSTD_sf_explicitBlockDelimiters) ) { |
| /* Valid scenario : in explicit delimiter mode, |
| * it might be possible for the compressed size to outgrow dstCapacity. |
| * In which case, it's still a valid fuzzer scenario, |
| * but no roundtrip shall be possible */ |
| return 0; |
| } |
| /* round-trip */ |
| FUZZ_ZASSERT(cSize); |
| dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); |
| FUZZ_ZASSERT(dSize); |
| FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size"); |
| FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!"); |
| return dSize; |
| } |
| |
| int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) |
| { |
| void* rBuf; |
| size_t rBufSize; |
| void* cBuf; |
| size_t cBufSize; |
| size_t generatedSrcSize; |
| size_t nbSequences; |
| size_t dictSize = 0; |
| unsigned hasDict; |
| unsigned wLog; |
| int cLevel; |
| ZSTD_sequenceFormat_e mode; |
| |
| FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size); |
| FUZZ_ASSERT(producer); |
| |
| if (!cctx) { |
| cctx = ZSTD_createCCtx(); |
| FUZZ_ASSERT(cctx); |
| } |
| if (!dctx) { |
| dctx = ZSTD_createDCtx(); |
| FUZZ_ASSERT(dctx); |
| } |
| |
| /* Generate window log first so we don't generate offsets too large */ |
| wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); |
| cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22); |
| mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1); |
| |
| ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode); |
| ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach); |
| |
| if (!literalsBuffer) { |
| literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE); |
| FUZZ_ASSERT(literalsBuffer); |
| literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer); |
| } |
| |
| if (!dictBuffer) { /* Generate global dictionary buffer */ |
| ZSTD_compressionParameters cParams; |
| |
| /* Generate a large dictionary buffer */ |
| dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1); |
| FUZZ_ASSERT(dictBuffer); |
| |
| /* Create global cdict and ddict */ |
| cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE); |
| cParams.minMatch = ZSTD_MINMATCH_MIN; |
| cParams.hashLog = ZSTD_HASHLOG_MIN; |
| cParams.chainLog = ZSTD_CHAINLOG_MIN; |
| |
| cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem); |
| ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem); |
| FUZZ_ASSERT(cdict); |
| FUZZ_ASSERT(ddict); |
| } |
| |
| FUZZ_ASSERT(cdict); |
| FUZZ_ASSERT(ddict); |
| |
| hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
| if (hasDict) { |
| dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE; |
| } |
| |
| if (!generatedSequences) { |
| generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); |
| } |
| if (!generatedSrc) { |
| generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); |
| } |
| |
| nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode); |
| generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode); |
| |
| /* Note : in explicit block delimiters mode, |
| * the fuzzer might generate a lot of small blocks. |
| * In which case, the final compressed size might be > ZSTD_compressBound(). |
| * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity. |
| * The test just doesn't roundtrip. */ |
| cBufSize = ZSTD_compressBound(generatedSrcSize); |
| cBuf = FUZZ_malloc(cBufSize); |
| |
| rBufSize = generatedSrcSize; |
| rBuf = FUZZ_malloc(rBufSize); |
| |
| { const size_t result = roundTripTest(rBuf, rBufSize, |
| cBuf, cBufSize, |
| generatedSrc, generatedSrcSize, |
| generatedSequences, nbSequences, |
| hasDict, mode); |
| FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */ |
| } |
| |
| free(rBuf); |
| free(cBuf); |
| FUZZ_dataProducer_free(producer); |
| #ifndef STATEFUL_FUZZING |
| ZSTD_freeCCtx(cctx); cctx = NULL; |
| ZSTD_freeDCtx(dctx); dctx = NULL; |
| free(generatedSequences); generatedSequences = NULL; |
| free(generatedSrc); generatedSrc = NULL; |
| free(literalsBuffer); literalsBuffer = NULL; |
| #endif |
| return 0; |
| } |