added in struct to keep functions more orderly
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index dba2771..76949a6 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -236,8 +236,7 @@
typedef struct {
int useDict;
U32 dictID;
- size_t dictSize;
- BYTE* fullDict;
+ size_t dictContentSize;
BYTE* dictContent;
} dictInfo;
/*-*******************************************************
@@ -249,7 +248,7 @@
} opts; /* advanced options on generation */
/* Generate and write a random frame header */
-static void writeFrameHeader(U32* seed, frame_t* frame, int genDict, U32 dictID)
+static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
{
BYTE* const op = frame->data;
size_t pos = 0;
@@ -315,7 +314,7 @@
pos += 4;
{
- int dictBits = genDict ? 3 : 0;
+ int dictBits = info.useDict ? 3 : 0;
BYTE const frameHeaderDescriptor =
(BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
op[pos++] = frameHeaderDescriptor;
@@ -324,8 +323,8 @@
if (!singleSegment) {
op[pos++] = windowByte;
}
- if(genDict) {
- MEM_writeLE32(op + pos, (U32) dictID);
+ if(info.useDict) {
+ MEM_writeLE32(op + pos, (U32) info.dictID);
pos += 4;
}
if (contentSizeFlag) {
@@ -618,7 +617,7 @@
/* Randomly generate sequence commands */
static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
- size_t contentSize, size_t literalsSize, int genDict, size_t dictSize, BYTE* dictContent)
+ size_t contentSize, size_t literalsSize, dictInfo info)
{
/* The total length of all the matches */
size_t const remainingMatch = contentSize - literalsSize;
@@ -675,10 +674,10 @@
MIN(frame->header.windowSize,
(size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
1;
- if (genDict && (RAND(seed) & 1)) {
+ if (info.useDict && (RAND(seed) & 1)) {
/* need to occasionally generate offsets that go past the start */
/* we still need to be within the windowSize however */
- U32 const lenPastStart = RAND(seed) % dictSize;
+ U32 const lenPastStart = RAND(seed) % info.dictContentSize;
offset = MIN(frame->header.windowSize, offset+lenPastStart);
}
offsetCode = offset + ZSTD_REP_MOVE;
@@ -696,13 +695,13 @@
repIndex = MIN(2, offsetCode + 1);
}
}
- } while (((!genDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
+ } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
{ size_t j;
for (j = 0; j < matchLen; j++) {
if ((void*)(srcPtr - offset) < (void*)frame->srcStart) {
/* copy from dictionary instead of literals */
- *srcPtr = *(dictContent + dictSize - (offset-(srcPtr-(BYTE*)frame->srcStart)));
+ *srcPtr = *(info.dictContent + info.dictContentSize - (offset-(srcPtr-(BYTE*)frame->srcStart)));
}
else {
*srcPtr = *(srcPtr-offset);
@@ -956,7 +955,7 @@
}
static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
- size_t literalsSize, int genDict, size_t dictSize, BYTE* dictContent)
+ size_t literalsSize, dictInfo info)
{
seqStore_t seqStore;
size_t numSequences;
@@ -965,14 +964,14 @@
initSeqStore(&seqStore);
/* randomly generate sequences */
- numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, genDict, dictSize, dictContent);
+ numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
/* write them out to the frame data */
CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
return numSequences;
}
-static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, int genDict, size_t dictSize, BYTE* dictContent)
+static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
{
BYTE* const blockStart = (BYTE*)frame->data;
size_t literalsSize;
@@ -984,7 +983,7 @@
DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize);
- nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, genDict, dictSize, dictContent);
+ nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq);
@@ -992,7 +991,7 @@
}
static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
- int lastBlock, int genDict, size_t dictSize, BYTE* dictContent)
+ int lastBlock, dictInfo info)
{
int const blockTypeDesc = RAND(seed) % 8;
size_t blockSize;
@@ -1032,7 +1031,7 @@
frame->oldStats = frame->stats;
frame->data = op;
- compressedSize = writeCompressedBlock(seed, frame, contentSize, genDict, dictSize, dictContent);
+ compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
if (compressedSize > contentSize) {
blockType = 0;
memcpy(op, frame->src, contentSize);
@@ -1058,7 +1057,7 @@
frame->data = op;
}
-static void writeBlocks(U32* seed, frame_t* frame, int genDict, size_t dictSize, BYTE* dictContent)
+static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
{
size_t contentLeft = frame->header.contentSize;
size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize);
@@ -1081,7 +1080,7 @@
}
}
- writeBlock(seed, frame, blockContentSize, lastBlock, genDict, dictSize, dictContent);
+ writeBlock(seed, frame, blockContentSize, lastBlock, info);
contentLeft -= blockContentSize;
if (lastBlock) break;
@@ -1146,19 +1145,92 @@
}
/* Return the final seed */
-static U32 generateFrame(U32 seed, frame_t* fr, int genDict, size_t dictSize, BYTE* dictContent, U32 dictID)
+static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
{
/* generate a complete frame */
DISPLAYLEVEL(1, "frame seed: %u\n", seed);
initFrame(fr);
- writeFrameHeader(&seed, fr, genDict, dictID);
- writeBlocks(&seed, fr, genDict, dictSize, dictContent);
+ writeFrameHeader(&seed, fr, info);
+ writeBlocks(&seed, fr, info);
writeChecksum(fr);
return seed;
}
+/*_*******************************************************
+* Dictionary Helper Functions
+*********************************************************/
+/* returns 0 if successful, otherwise returns 1 upon error */
+static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict){
+ const size_t headerSize = dictSize/4;
+ const size_t dictContentSize = dictSize - dictSize/4;
+ BYTE* const dictContent = fullDict + headerSize;
+
+ /* use 3/4 of dictionary for content, save rest for header/entropy tables */
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
+ DISPLAY("Error: dictionary size is too small\n");
+ return 1;
+ }
+
+ /* fill in dictionary content */
+ RAND_buffer(&seed, (void*)dictContent, dictContentSize);
+
+ /* allocate space for samples */
+ {
+ size_t dictWriteSize = 0;
+ unsigned const numSamples = 4;
+ BYTE* const samples = malloc(5000*sizeof(BYTE));
+ size_t* const sampleSizes = malloc(numSamples*sizeof(size_t));
+ if (samples == NULL || sampleSizes == NULL) {
+ DISPLAY("Error: could not generate samples for the dictionary.\n");
+ return 1;
+ }
+
+ /* generate samples */
+ unsigned i = 1;
+ size_t currSize = 1;
+ BYTE* curr = samples;
+ while (i <= 4) {
+ *(sampleSizes + i - 1) = currSize;
+ for (size_t j = 0; j < currSize; j++) {
+ *(curr++) = (BYTE)i;
+ }
+ i++;
+ currSize *= 16;
+ }
+
+ /* set dictionary params */
+ ZDICT_params_t zdictParams;
+ memset(&zdictParams, 0, sizeof(zdictParams));
+ zdictParams.dictID = dictID;
+ zdictParams.notificationLevel = 1;
+
+ /* finalize dictionary with random samples */
+ dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
+ dictContent, dictContentSize,
+ samples, sampleSizes, numSamples,
+ zdictParams);
+ free(samples);
+ free(sampleSizes);
+ if (dictWriteSize != dictSize && ZDICT_isError(dictWriteSize)) {
+ DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
+ /* allocate space statically */
+ dictInfo dictOp;
+ memset((void*)(&dictOp), 0, sizeof(dictOp));
+ dictOp.useDict = useDict;
+ dictOp.dictContentSize = dictContentSize;
+ dictOp.dictContent = dictContent;
+ dictOp.dictID = dictID;
+ return dictOp;
+}
/*-*******************************************************
* Test Mode
@@ -1240,7 +1312,10 @@
else
DISPLAYUPDATE("\r%u ", fnum);
- seed = generateFrame(seed, &fr, 0, 0, NULL, 0);
+ {
+ dictInfo const info = initDictInfo(0, 0, NULL, 0);
+ generateFrame(seed, &fr, info);
+ }
{ size_t const r = testDecodeSimple(&fr);
if (ZSTD_isError(r)) {
@@ -1264,80 +1339,6 @@
return 0;
}
-/*_*******************************************************
-* Dictionary Helper Functions
-*********************************************************/
-/* returns 0 if successful, otherwise returns 1 upon error */
-static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict){
- const size_t headerSize = dictSize/4;
- const size_t dictContentSize = dictSize - dictSize/4;
- BYTE* const dictContent = fullDict + headerSize;
-
- /* use 3/4 of dictionary for content, save rest for header/entropy tables */
- if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
- DISPLAY("Error: dictionary size is too small\n");
- return 1;
- }
-
- /* fill in dictionary content */
- RAND_buffer(&seed, (void*)dictContent, dictContentSize);
-
- /* allocate space for samples */
- {
- size_t dictWriteSize = 0;
- unsigned const numSamples = 4;
- BYTE* const samples = malloc(5000*sizeof(BYTE));
- size_t* const sampleSizes = malloc(numSamples*sizeof(size_t));
- if (samples == NULL || sampleSizes == NULL) {
- DISPLAY("Error: could not generate samples for the dictionary.\n");
- return 1;
- }
-
- /* generate samples */
- unsigned i = 1;
- size_t currSize = 1;
- BYTE* curr = samples;
- while (i <= 4) {
- *(sampleSizes + i - 1) = currSize;
- for (size_t j = 0; j < currSize; j++) {
- *(curr++) = (BYTE)i;
- }
- i++;
- currSize *= 16;
- }
-
- /* set dictionary params */
- ZDICT_params_t zdictParams;
- memset(&zdictParams, 0, sizeof(zdictParams));
- zdictParams.dictID = dictID;
- zdictParams.notificationLevel = 1;
-
- /* finalize dictionary with random samples */
- dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
- dictContent, dictContentSize,
- samples, sampleSizes, numSamples,
- zdictParams);
- free(samples);
- free(sampleSizes);
- if (dictWriteSize != dictSize && ZDICT_isError(dictWriteSize)) {
- DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
- return 1;
- }
- }
- return 0;
-}
-
-static dictInfo initDictInfo(int useDict, size_t dictSize, BYTE* fullDict, U32 seed){
- /* allocate space statically */
- dictInfo dictOp;
- memset((void*)(&dictOp), 0, sizeof(dictOp));
- dictOp.useDict = useDict;
- dictOp.dictSize = dictSize;
- dictOp.fullDict = fullDict;
- dictOp.dictContent = fullDict + dictSize/4;
- if (useDict) dictOp.dictID = RAND(&seed);
- return dictOp;
-}
/*-*******************************************************
* File I/O
*********************************************************/
@@ -1349,7 +1350,10 @@
DISPLAY("seed: %u\n", seed);
- generateFrame(seed, &fr, 0, 0, NULL, 0);
+ {
+ dictInfo const info = initDictInfo(0, 0, NULL, 0);
+ generateFrame(seed, &fr, info);
+ }
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
if (origPath) {
@@ -1371,7 +1375,10 @@
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
- seed = generateFrame(seed, &fr, 0, 0, NULL, 0);
+ {
+ dictInfo const info = initDictInfo(0, 0, NULL, 0);
+ generateFrame(seed, &fr, info);
+ }
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n");
@@ -1437,7 +1444,8 @@
{
size_t dictContentSize = dictSize-dictSize/4;
BYTE* const dictContent = fullDict+dictSize/4;
- seed = generateFrame(seed, &fr, 1, dictContentSize, dictContent, dictID);
+ dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
+ seed = generateFrame(seed, &fr, info);
}
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n");
@@ -1545,7 +1553,7 @@
int testMode = 0;
const char* path = NULL;
const char* origPath = NULL;
- int genDict = 0;
+ int useDict = 0;
unsigned dictSize = (10 << 10); /* 10 kB default */
int argNb;
@@ -1610,7 +1618,7 @@
} else if (strcmp(argument, "train-dict") == 0) {
argument += 11;
dictSize = readInt(&argument);
- genDict = 1;
+ useDict = 1;
} else {
advancedUsage(argv[0]);
return 1;
@@ -1642,9 +1650,9 @@
return 1;
}
- if (numFiles == 0 && genDict == 0) {
+ if (numFiles == 0 && useDict == 0) {
return generateFile(seed, path, origPath);
- } else if (genDict == 0){
+ } else if (useDict == 0){
return generateCorpus(seed, numFiles, path, origPath);
} else {
/* should generate files with a dictionary */