blob: 8a1371af93d41d9937e1941a176eb2975171b373 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.compressors.lz77support;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.utils.ByteUtils;
import org.apache.commons.compress.utils.CountingInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.compress.utils.InputStreamStatistics;
/**
* Encapsulates code common to LZ77 decompressors.
*
* <p>Assumes the stream consists of blocks of literal data and
* back-references (called copies) in any order. Of course the first
* block must be a literal block for the scheme to work - unless the
* {@link #prefill prefill} method has been used to provide initial
* data that is never returned by {@link #read read} but only used for
* back-references.</p>
*
* <p>Subclasses must override the three-arg {@link #read read} method
* as the no-arg version delegates to it and the default
* implementation delegates to the no-arg version, leading to infinite
* mutual recursion and a {@code StackOverflowError} otherwise.</p>
*
* <p>The contract for subclasses' {@code read} implementation is:</p>
* <ul>
*
* <li>keep track of the current state of the stream. Is it inside a
* literal block or a back-reference or in-between blocks?</li>
*
* <li>Use {@link #readOneByte} to access the underlying stream
* directly.</li>
*
* <li>If a new literal block starts, use {@link #startLiteral} to
* tell this class about it and read the literal data using {@link
* #readLiteral} until it returns {@code 0}. {@link
* #hasMoreDataInBlock} will return {@code false} before the next
* call to {@link #readLiteral} would return {@code 0}.</li>
*
* <li>If a new back-reference starts, use {@link #startBackReference} to
* tell this class about it and read the literal data using {@link
* #readBackReference} until it returns {@code 0}. {@link
* #hasMoreDataInBlock} will return {@code false} before the next
* call to {@link #readBackReference} would return {@code 0}.</li>
*
* <li>If the end of the stream has been reached, return {@code -1}
* as this class' methods will never do so themselves.</li>
*
* </ul>
*
* <p>{@link #readOneByte} and {@link #readLiteral} update the counter
* for bytes read.</p>
*
* @since 1.14
*/
public abstract class AbstractLZ77CompressorInputStream extends CompressorInputStream
implements InputStreamStatistics {
/** Size of the window - must be bigger than the biggest offset expected. */
private final int windowSize;
/**
* Buffer to write decompressed bytes to for back-references, will
* be three times windowSize big.
*
* <p>Three times so we can slide the whole buffer a windowSize to
* the left once we've read twice windowSize and still have enough
* data inside of it to satisfy back-references.</p>
*/
private final byte[] buf;
/** One behind the index of the last byte in the buffer that was written, i.e. the next position to write to */
private int writeIndex;
/** Index of the next byte to be read. */
private int readIndex;
/** The underlying stream to read compressed data from */
private final CountingInputStream in;
/** Number of bytes still to be read from the current literal or back-reference. */
private long bytesRemaining;
/** Offset of the current back-reference. */
private int backReferenceOffset;
/** uncompressed size */
private int size = 0;
// used in no-arg read method
private final byte[] oneByte = new byte[1];
/**
* Supplier that delegates to {@link #readOneByte}.
*/
protected final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() {
@Override
public int getAsByte() throws IOException {
return readOneByte();
}
};
/**
* Creates a new LZ77 input stream.
*
* @param is
* An InputStream to read compressed data from
* @param windowSize
* Size of the window kept for back-references, must be bigger than the biggest offset expected.
*
* @throws IOException if reading fails
*/
public AbstractLZ77CompressorInputStream(final InputStream is, int windowSize) throws IOException {
this.in = new CountingInputStream(is);
this.windowSize = windowSize;
buf = new byte[3 * windowSize];
writeIndex = readIndex = 0;
bytesRemaining = 0;
}
/** {@inheritDoc} */
@Override
public int read() throws IOException {
return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
}
/** {@inheritDoc} */
@Override
public void close() throws IOException {
in.close();
}
/** {@inheritDoc} */
@Override
public int available() {
return writeIndex - readIndex;
}
/**
* Get the uncompressed size of the stream
*
* @return the uncompressed size
*/
public int getSize() {
return size;
}
/**
* Adds some initial data to fill the window with.
*
* <p>This is used if the stream has been cut into blocks and
* back-references of one block may refer to data of the previous
* block(s). One such example is the LZ4 frame format using block
* dependency.</p>
*
* @param data the data to fill the window with.
* @throws IllegalStateException if the stream has already started to read data
*/
public void prefill(byte[] data) {
if (writeIndex != 0) {
throw new IllegalStateException("the stream has already been read from, can't prefill anymore");
}
// we don't need more data than the big offset could refer to, so cap it
int len = Math.min(windowSize, data.length);
// we need the last data as we are dealing with *back*-references
System.arraycopy(data, data.length - len, buf, 0, len);
writeIndex += len;
readIndex += len;
}
/**
* @since 1.17
*/
@Override
public long getCompressedCount() {
return in.getBytesRead();
}
/**
* Used by subclasses to signal the next block contains the given
* amount of literal data.
* @param length the length of the block
*/
protected final void startLiteral(long length) {
bytesRemaining = length;
}
/**
* Is there still data remaining inside the current block?
* @return true if there is still data remaining inside the current block.
*/
protected final boolean hasMoreDataInBlock() {
return bytesRemaining > 0;
}
/**
* Reads data from the current literal block.
* @param b buffer to write data to
* @param off offset to start writing to
* @param len maximum amount of data to read
* @return number of bytes read, may be 0. Will never return -1 as
* EOF-detection is the responsibility of the subclass
* @throws IOException if the underlying stream throws or signals
* an EOF before the amount of data promised for the block have
* been read
*/
protected final int readLiteral(final byte[] b, final int off, final int len) throws IOException {
final int avail = available();
if (len > avail) {
tryToReadLiteral(len - avail);
}
return readFromBuffer(b, off, len);
}
private void tryToReadLiteral(int bytesToRead) throws IOException {
// min of "what is still inside the literal", "what does the user want" and "how muc can fit into the buffer"
final int reallyTryToRead = Math.min((int) Math.min(bytesToRead, bytesRemaining),
buf.length - writeIndex);
final int bytesRead = reallyTryToRead > 0
? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead)
: 0 /* happens for bytesRemaining == 0 */;
count(bytesRead);
if (reallyTryToRead != bytesRead) {
throw new IOException("Premature end of stream reading literal");
}
writeIndex += reallyTryToRead;
bytesRemaining -= reallyTryToRead;
}
private int readFromBuffer(final byte[] b, final int off, final int len) {
final int readable = Math.min(len, available());
if (readable > 0) {
System.arraycopy(buf, readIndex, b, off, readable);
readIndex += readable;
if (readIndex > 2 * windowSize) {
slideBuffer();
}
}
size += readable;
return readable;
}
private void slideBuffer() {
System.arraycopy(buf, windowSize, buf, 0, windowSize * 2);
writeIndex -= windowSize;
readIndex -= windowSize;
}
/**
* Used by subclasses to signal the next block contains a back-reference with the given coordinates.
* @param offset the offset of the back-reference
* @param length the length of the back-reference
*/
protected final void startBackReference(int offset, long length) {
backReferenceOffset = offset;
bytesRemaining = length;
}
/**
* Reads data from the current back-reference.
* @param b buffer to write data to
* @param off offset to start writing to
* @param len maximum amount of data to read
* @return number of bytes read, may be 0. Will never return -1 as
* EOF-detection is the responsibility of the subclass
*/
protected final int readBackReference(final byte[] b, final int off, final int len) {
final int avail = available();
if (len > avail) {
tryToCopy(len - avail);
}
return readFromBuffer(b, off, len);
}
private void tryToCopy(int bytesToCopy) {
// this will fit into the buffer without sliding and not
// require more than is available inside the back-reference
int copy = Math.min((int) Math.min(bytesToCopy, bytesRemaining),
buf.length - writeIndex);
if (copy == 0) {
// NOP
} else if (backReferenceOffset == 1) { // pretty common special case
final byte last = buf[writeIndex - 1];
Arrays.fill(buf, writeIndex, writeIndex + copy, last);
writeIndex += copy;
} else if (copy < backReferenceOffset) {
System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, copy);
writeIndex += copy;
} else {
// back-reference overlaps with the bytes created from it
// like go back two bytes and then copy six (by copying
// the last two bytes three time).
final int fullRots = copy / backReferenceOffset;
for (int i = 0; i < fullRots; i++) {
System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, backReferenceOffset);
writeIndex += backReferenceOffset;
}
final int pad = copy - (backReferenceOffset * fullRots);
if (pad > 0) {
System.arraycopy(buf, writeIndex - backReferenceOffset, buf, writeIndex, pad);
writeIndex += pad;
}
}
bytesRemaining -= copy;
}
/**
* Reads a single byte from the real input stream and ensures the data is accounted for.
*
* @return the byte read as value between 0 and 255 or -1 if EOF has been reached.
* @throws IOException if the underlying stream throws
*/
protected final int readOneByte() throws IOException {
final int b = in.read();
if (b != -1) {
count(1);
return b & 0xFF;
}
return -1;
}
}