blob: 636504d2027813b507a2eb4c69fbdcfbc7540a55 [file] [log] [blame]
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.archivepatcher.generator;
import com.google.archivepatcher.shared.DefaultDeflateCompatibilityWindow;
import com.google.archivepatcher.shared.JreDeflateParameters;
import com.google.archivepatcher.shared.MultiViewInputStreamFactory;
import com.google.archivepatcher.shared.RandomAccessFileInputStream;
import com.google.archivepatcher.shared.RandomAccessFileInputStreamFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import java.util.zip.ZipException;
/**
* Divines information about the compression used for a resource that has been compressed with a
* deflate-compatible algorithm. This implementation produces results that are valid within the
* {@link DefaultDeflateCompatibilityWindow}.
*/
public class DefaultDeflateCompressionDiviner {
/**
* The levels to try for each strategy, in the order to attempt them.
*/
private final Map<Integer, List<Integer>> levelsByStrategy = getLevelsByStrategy();
/**
* A simple struct that contains a {@link MinimalZipEntry} describing a specific entry from a zip
* archive along with an optional accompanying {@link JreDeflateParameters} describing the
* original compression settings that were used to generate the compressed data in that entry.
*/
public static class DivinationResult {
/**
* The {@link MinimalZipEntry} for the result; never null.
*/
public final MinimalZipEntry minimalZipEntry;
/**
* The {@link JreDeflateParameters} for the result, possibly null. This value is only set if
* {@link MinimalZipEntry#isDeflateCompressed()} is true <em>and</em> the compression settings
* were successfully divined.
*/
public final JreDeflateParameters divinedParameters;
/**
* Creates a new result with the specified fields.
* @param minimalZipEntry the zip entry
* @param divinedParameters the parameters
*/
public DivinationResult(
MinimalZipEntry minimalZipEntry, JreDeflateParameters divinedParameters) {
if (minimalZipEntry == null) {
throw new IllegalArgumentException("minimalZipEntry cannot be null");
}
this.minimalZipEntry = minimalZipEntry;
this.divinedParameters = divinedParameters;
}
}
/**
* Load the specified archive and attempt to divine deflate parameters for all entries within.
* @param archiveFile the archive file to work on
* @return a list of results for each entry in the archive, in file order (not central directory
* order). There is exactly one result per entry, regardless of whether or not that entry is
* compressed. Callers can filter results by checking
* {@link MinimalZipEntry#getCompressionMethod()} to see if the result is or is not compressed,
* and by checking whether a non-null {@link JreDeflateParameters} was obtained.
* @throws IOException if unable to read or parse the file
* @see DivinationResult
*/
public List<DivinationResult> divineDeflateParameters(File archiveFile) throws IOException {
List<DivinationResult> results = new ArrayList<DivinationResult>();
for (MinimalZipEntry minimalZipEntry : MinimalZipArchive.listEntries(archiveFile)) {
JreDeflateParameters divinedParameters = null;
if (minimalZipEntry.isDeflateCompressed()) {
// TODO(andrewhayden): Reuse streams to avoid churning file descriptors
RandomAccessFileInputStreamFactory rafisFactory =
new RandomAccessFileInputStreamFactory(
archiveFile,
minimalZipEntry.getFileOffsetOfCompressedData(),
minimalZipEntry.getCompressedSize());
divinedParameters = divineDeflateParameters(rafisFactory);
}
results.add(new DivinationResult(minimalZipEntry, divinedParameters));
}
return results;
}
/**
* Returns an unmodifiable map whose keys are deflate strategies and whose values are the levels
* that make sense to try with the corresponding strategy, in the recommended testing order.
*
* <ul>
* <li>For strategy 0, levels 1 through 9 (inclusive) are included.
* <li>For strategy 1, levels 4 through 9 (inclusive) are included. Levels 1, 2 and 3 are
* excluded because they behave the same under strategy 0.
* <li>For strategy 2, only level 1 is included because the level is ignored under strategy 2.
* </ul>
*
* @return such a mapping
*/
protected Map<Integer, List<Integer>> getLevelsByStrategy() {
final Map<Integer, List<Integer>> levelsByStrategy = new HashMap<Integer, List<Integer>>();
// The best order for the levels is simply the order of popularity in the world, which is
// expected to be default (6), maximum compression (9), and fastest (1).
// The rest of the levels are rarely encountered and their order is mostly irrelevant.
levelsByStrategy.put(0, Collections.unmodifiableList(Arrays.asList(6, 9, 1, 4, 2, 3, 5, 7, 8)));
levelsByStrategy.put(1, Collections.unmodifiableList(Arrays.asList(6, 9, 4, 5, 7, 8)));
levelsByStrategy.put(2, Collections.singletonList(1));
return Collections.unmodifiableMap(levelsByStrategy);
}
/**
* Determines the original {@link JreDeflateParameters} that were used to compress a given piece
* of deflated data.
* @param compressedDataInputStreamFactory a {@link MultiViewInputStreamFactory} that can provide
* multiple independent {@link InputStream} instances for the compressed data; the streams
* produced must support {@link InputStream#mark(int)} and it is recommended that
* {@link RandomAccessFileInputStream} instances be provided for efficiency if a backing file is
* available. The stream will be reset for each recompression attempt that is required.
* @return the parameters that can be used to replicate the compressed data in the
* {@link DefaultDeflateCompatibilityWindow}, if any; otherwise <code>null</code>. Note that
* <code>null</code> is also returned in the case of <em>corrupt</em> zip data since, by
* definition, it cannot be replicated via any combination of normal deflate parameters.
* @throws IOException if there is a problem reading the data, i.e. if the file contents are
* changed while reading
*/
public JreDeflateParameters divineDeflateParameters(
MultiViewInputStreamFactory<?> compressedDataInputStreamFactory) throws IOException {
InputStream compressedDataIn = compressedDataInputStreamFactory.newStream();
if (!compressedDataIn.markSupported()) {
try {
compressedDataIn.close();
} catch (Exception ignored) {
// Nothing to do.
}
throw new IllegalArgumentException("input stream must support mark(int)");
}
// Record the input stream position to return to it each time a prediction is needed.
compressedDataIn.mark(0); // The argument to mark is ignored and irrelevant
// Make a copy of the stream for matching bytes of compressed input
InputStream matchingCompressedDataIn = compressedDataInputStreamFactory.newStream();
matchingCompressedDataIn.mark(0); // The argument to mark is ignored and irrelevant
byte[] copyBuffer = new byte[32768];
try {
// Iterate over all relevant combinations of nowrap, strategy and level.
for (boolean nowrap : new boolean[] {true, false}) {
Inflater inflater = new Inflater(nowrap);
Deflater deflater = new Deflater(0, nowrap);
for (int strategy : new int[] {0, 1, 2}) {
deflater.setStrategy(strategy);
// Strategy 2 does not have the concept of levels, so vacuously call it 1.
List<Integer> levelsToSearch = levelsByStrategy.get(strategy);
for (int levelIndex = 0; levelIndex < levelsToSearch.size(); levelIndex++) {
int level = levelsToSearch.get(levelIndex);
deflater.setLevel(level);
inflater.reset();
deflater.reset();
compressedDataIn.reset();
matchingCompressedDataIn.reset();
try {
if (matches(
compressedDataIn, inflater, deflater, matchingCompressedDataIn, copyBuffer)) {
return JreDeflateParameters.of(level, strategy, nowrap);
}
} catch (ZipException e) {
// Parse error in input. The only possibilities are corruption or the wrong nowrap.
// Skip all remaining levels and strategies.
levelIndex = levelsToSearch.size();
strategy = 2;
}
} // end of iteration on level
} // end of iteration on strategy
} // end of iteration on nowrap
} finally {
try {
compressedDataIn.close();
} catch (Exception ignored) {
// Don't care.
}
try {
matchingCompressedDataIn.close();
} catch (Exception ignored) {
// Don't care.
}
}
return null;
}
/**
* Check if the specified deflater will produce the same compressed data as the byte stream in
* compressedDataIn and returns true if so.
* @param compressedDataIn the stream of compressed data to read and reproduce
* @param inflater the inflater for uncompressing the stream
* @param deflater the deflater for recompressing the output of the inflater
* @param matchingStreamInput an independent but identical view of the bytes in compressedDataIn
* @param copyBuffer buffer to use for copying bytes between the inflater and the deflater
* @return true if the specified deflater reproduces the bytes in compressedDataIn, otherwise
* false
* @throws IOException if anything goes wrong; in particular, {@link ZipException} is thrown if
* there is a problem parsing compressedDataIn
*/
private boolean matches(
InputStream compressedDataIn,
Inflater inflater,
Deflater deflater,
InputStream matchingStreamInput,
byte[] copyBuffer)
throws IOException {
MatchingOutputStream matcher = new MatchingOutputStream(matchingStreamInput, 32768);
// This stream will deliberately be left open because closing it would close the
// underlying compressedDataIn stream, which is not desired.
InflaterInputStream inflaterIn = new InflaterInputStream(compressedDataIn, inflater, 32768);
DeflaterOutputStream out = new DeflaterOutputStream(matcher, deflater, 32768);
int numRead = 0;
try {
while ((numRead = inflaterIn.read(copyBuffer)) >= 0) {
out.write(copyBuffer, 0, numRead);
}
// When done, all bytes have been successfully recompressed. For sanity, check that
// the matcher has consumed the same number of bytes and arrived at EOF as well.
out.finish();
out.flush();
matcher.expectEof();
// At this point the data in the compressed output stream was a perfect match for the
// data in the compressed input stream; the answer has been found.
return true;
} catch (MismatchException e) {
// Fast-fail case when the compressed output stream doesn't match the compressed input
// stream. These are not the parameters you're looking for!
} finally {
try {
out.close();
} catch (Exception ignored) {
// Don't care.
}
}
return false;
}
}