| /* |
| * SeekableXZInputStream |
| * |
| * Author: Lasse Collin <lasse.collin@tukaani.org> |
| * |
| * This file has been put into the public domain. |
| * You can do whatever you want with this file. |
| */ |
| |
| package org.tukaani.xz; |
| |
| import java.util.Arrays; |
| import java.util.ArrayList; |
| import java.io.DataInputStream; |
| import java.io.IOException; |
| import java.io.EOFException; |
| import org.tukaani.xz.common.DecoderUtil; |
| import org.tukaani.xz.common.StreamFlags; |
| import org.tukaani.xz.check.Check; |
| import org.tukaani.xz.index.IndexDecoder; |
| import org.tukaani.xz.index.BlockInfo; |
| |
| /** |
| * Decompresses a .xz file in random access mode. |
| * This supports decompressing concatenated .xz files. |
| * <p> |
| * Each .xz file consist of one or more Streams. Each Stream consist of zero |
| * or more Blocks. Each Stream contains an Index of Streams' Blocks. |
| * The Indexes from all Streams are loaded in RAM by a constructor of this |
| * class. A typical .xz file has only one Stream, and parsing its Index will |
| * need only three or four seeks. |
| * <p> |
| * To make random access possible, the data in a .xz file must be splitted |
| * into multiple Blocks of reasonable size. Decompression can only start at |
| * a Block boundary. When seeking to an uncompressed position that is not at |
| * a Block boundary, decompression starts at the beginning of the Block and |
| * throws away data until the target position is reached. Thus, smaller Blocks |
| * mean faster seeks to arbitrary uncompressed positions. On the other hand, |
| * smaller Blocks mean worse compression. So one has to make a compromise |
| * between random access speed and compression ratio. |
| * <p> |
| * Implementation note: This class uses linear search to locate the correct |
| * Stream from the data structures in RAM. It was the simplest to implement |
| * and should be fine as long as there aren't too many Streams. The correct |
| * Block inside a Stream is located using binary search and thus is fast |
| * even with a huge number of Blocks. |
| * |
| * <h4>Memory usage</h4> |
| * <p> |
| * The amount of memory needed for the Indexes is taken into account when |
| * checking the memory usage limit. Each Stream is calculated to need at |
| * least 1 KiB of memory and each Block 16 bytes of memory, rounded up |
| * to the next kibibyte. So unless the file has a huge number of Streams or |
| * Blocks, these don't take significant amount of memory. |
| * |
| * <h4>Creating random-accessible .xz files</h4> |
| * <p> |
| * When using {@link XZOutputStream}, a new Block can be started by calling |
| * its {@link XZOutputStream#endBlock() endBlock} method. If you know |
| * that the decompressor will only need to seek to certain uncompressed |
| * positions, it can be a good idea to start a new Block at (some of) these |
| * positions (and only at these positions to get better compression ratio). |
| * <p> |
| * liblzma in XZ Utils supports starting a new Block with |
| * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded |
| * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha |
| * also added the option <code>--block-size=SIZE</code> to the xz command |
| * line tool. XZ Utils 5.1.2alpha added a partial implementation of |
| * <code>--block-list=SIZES</code> which allows specifying sizes of |
| * individual Blocks. |
| * |
| * @see SeekableFileInputStream |
| * @see XZInputStream |
| * @see XZOutputStream |
| */ |
| public class SeekableXZInputStream extends SeekableInputStream { |
| /** |
| * Cache for big arrays. |
| */ |
| private final ArrayCache arrayCache; |
| |
| /** |
| * The input stream containing XZ compressed data. |
| */ |
| private SeekableInputStream in; |
| |
| /** |
| * Memory usage limit after the memory usage of the IndexDecoders have |
| * been substracted. |
| */ |
| private final int memoryLimit; |
| |
| /** |
| * Memory usage of the IndexDecoders. |
| * <code>memoryLimit + indexMemoryUsage</code> equals the original |
| * memory usage limit that was passed to the constructor. |
| */ |
| private int indexMemoryUsage = 0; |
| |
| /** |
| * List of IndexDecoders, one for each Stream in the file. |
| * The list is in reverse order: The first element is |
| * the last Stream in the file. |
| */ |
| private final ArrayList<IndexDecoder> streams |
| = new ArrayList<IndexDecoder>(); |
| |
| /** |
| * Bitmask of all Check IDs seen. |
| */ |
| private int checkTypes = 0; |
| |
| /** |
| * Uncompressed size of the file (all Streams). |
| */ |
| private long uncompressedSize = 0; |
| |
| /** |
| * Uncompressed size of the largest XZ Block in the file. |
| */ |
| private long largestBlockSize = 0; |
| |
| /** |
| * Number of XZ Blocks in the file. |
| */ |
| private int blockCount = 0; |
| |
| /** |
| * Size and position information about the current Block. |
| * If there are no Blocks, all values will be <code>-1</code>. |
| */ |
| private final BlockInfo curBlockInfo; |
| |
| /** |
| * Temporary (and cached) information about the Block whose information |
| * is queried via <code>getBlockPos</code> and related functions. |
| */ |
| private final BlockInfo queriedBlockInfo; |
| |
| /** |
| * Integrity Check in the current XZ Stream. The constructor leaves |
| * this to point to the Check of the first Stream. |
| */ |
| private Check check; |
| |
| /** |
| * Flag indicating if the integrity checks will be verified. |
| */ |
| private final boolean verifyCheck; |
| |
| /** |
| * Decoder of the current XZ Block, if any. |
| */ |
| private BlockInputStream blockDecoder = null; |
| |
| /** |
| * Current uncompressed position. |
| */ |
| private long curPos = 0; |
| |
| /** |
| * Target position for seeking. |
| */ |
| private long seekPos; |
| |
| /** |
| * True when <code>seek(long)</code> has been called but the actual |
| * seeking hasn't been done yet. |
| */ |
| private boolean seekNeeded = false; |
| |
| /** |
| * True when end of the file was reached. This can be cleared by |
| * calling <code>seek(long)</code>. |
| */ |
| private boolean endReached = false; |
| |
| /** |
| * Pending exception from an earlier error. |
| */ |
| private IOException exception = null; |
| |
| /** |
| * Temporary buffer for read(). This avoids reallocating memory |
| * on every read() call. |
| */ |
| private final byte[] tempBuf = new byte[1]; |
| |
| /** |
| * Creates a new seekable XZ decompressor without a memory usage limit. |
| * |
| * @param in seekable input stream containing one or more |
| * XZ Streams; the whole input stream is used |
| * |
| * @throws XZFormatException |
| * input is not in the XZ format |
| * |
| * @throws CorruptedInputException |
| * XZ data is corrupt or truncated |
| * |
| * @throws UnsupportedOptionsException |
| * XZ headers seem valid but they specify |
| * options not supported by this implementation |
| * |
| * @throws EOFException |
| * less than 6 bytes of input was available |
| * from <code>in</code>, or (unlikely) the size |
| * of the underlying stream got smaller while |
| * this was reading from it |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| */ |
| public SeekableXZInputStream(SeekableInputStream in) |
| throws IOException { |
| this(in, -1); |
| } |
| |
| /** |
| * Creates a new seekable XZ decompressor without a memory usage limit. |
| * <p> |
| * This is identical to |
| * <code>SeekableXZInputStream(SeekableInputStream)</code> except that |
| * this also takes the <code>arrayCache</code> argument. |
| * |
| * @param in seekable input stream containing one or more |
| * XZ Streams; the whole input stream is used |
| * |
| * @param arrayCache cache to be used for allocating large arrays |
| * |
| * @throws XZFormatException |
| * input is not in the XZ format |
| * |
| * @throws CorruptedInputException |
| * XZ data is corrupt or truncated |
| * |
| * @throws UnsupportedOptionsException |
| * XZ headers seem valid but they specify |
| * options not supported by this implementation |
| * |
| * @throws EOFException |
| * less than 6 bytes of input was available |
| * from <code>in</code>, or (unlikely) the size |
| * of the underlying stream got smaller while |
| * this was reading from it |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| * |
| * @since 1.7 |
| */ |
| public SeekableXZInputStream(SeekableInputStream in, ArrayCache arrayCache) |
| throws IOException { |
| this(in, -1, arrayCache); |
| } |
| |
| /** |
| * Creates a new seekable XZ decomporessor with an optional |
| * memory usage limit. |
| * |
| * @param in seekable input stream containing one or more |
| * XZ Streams; the whole input stream is used |
| * |
| * @param memoryLimit memory usage limit in kibibytes (KiB) |
| * or <code>-1</code> to impose no |
| * memory usage limit |
| * |
| * @throws XZFormatException |
| * input is not in the XZ format |
| * |
| * @throws CorruptedInputException |
| * XZ data is corrupt or truncated |
| * |
| * @throws UnsupportedOptionsException |
| * XZ headers seem valid but they specify |
| * options not supported by this implementation |
| * |
| * @throws MemoryLimitException |
| * decoded XZ Indexes would need more memory |
| * than allowed by the memory usage limit |
| * |
| * @throws EOFException |
| * less than 6 bytes of input was available |
| * from <code>in</code>, or (unlikely) the size |
| * of the underlying stream got smaller while |
| * this was reading from it |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| */ |
| public SeekableXZInputStream(SeekableInputStream in, int memoryLimit) |
| throws IOException { |
| this(in, memoryLimit, true); |
| } |
| |
| /** |
| * Creates a new seekable XZ decomporessor with an optional |
| * memory usage limit. |
| * <p> |
| * This is identical to |
| * <code>SeekableXZInputStream(SeekableInputStream,int)</code> |
| * except that this also takes the <code>arrayCache</code> argument. |
| * |
| * @param in seekable input stream containing one or more |
| * XZ Streams; the whole input stream is used |
| * |
| * @param memoryLimit memory usage limit in kibibytes (KiB) |
| * or <code>-1</code> to impose no |
| * memory usage limit |
| * |
| * @param arrayCache cache to be used for allocating large arrays |
| * |
| * @throws XZFormatException |
| * input is not in the XZ format |
| * |
| * @throws CorruptedInputException |
| * XZ data is corrupt or truncated |
| * |
| * @throws UnsupportedOptionsException |
| * XZ headers seem valid but they specify |
| * options not supported by this implementation |
| * |
| * @throws MemoryLimitException |
| * decoded XZ Indexes would need more memory |
| * than allowed by the memory usage limit |
| * |
| * @throws EOFException |
| * less than 6 bytes of input was available |
| * from <code>in</code>, or (unlikely) the size |
| * of the underlying stream got smaller while |
| * this was reading from it |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| * |
| * @since 1.7 |
| */ |
| public SeekableXZInputStream(SeekableInputStream in, int memoryLimit, |
| ArrayCache arrayCache) |
| throws IOException { |
| this(in, memoryLimit, true, arrayCache); |
| } |
| |
| /** |
| * Creates a new seekable XZ decomporessor with an optional |
| * memory usage limit and ability to disable verification |
| * of integrity checks. |
| * <p> |
| * Note that integrity check verification should almost never be disabled. |
| * Possible reasons to disable integrity check verification: |
| * <ul> |
| * <li>Trying to recover data from a corrupt .xz file.</li> |
| * <li>Speeding up decompression. This matters mostly with SHA-256 |
| * or with files that have compressed extremely well. It's recommended |
| * that integrity checking isn't disabled for performance reasons |
| * unless the file integrity is verified externally in some other |
| * way.</li> |
| * </ul> |
| * <p> |
| * <code>verifyCheck</code> only affects the integrity check of |
| * the actual compressed data. The CRC32 fields in the headers |
| * are always verified. |
| * |
| * @param in seekable input stream containing one or more |
| * XZ Streams; the whole input stream is used |
| * |
| * @param memoryLimit memory usage limit in kibibytes (KiB) |
| * or <code>-1</code> to impose no |
| * memory usage limit |
| * |
| * @param verifyCheck if <code>true</code>, the integrity checks |
| * will be verified; this should almost never |
| * be set to <code>false</code> |
| * |
| * @throws XZFormatException |
| * input is not in the XZ format |
| * |
| * @throws CorruptedInputException |
| * XZ data is corrupt or truncated |
| * |
| * @throws UnsupportedOptionsException |
| * XZ headers seem valid but they specify |
| * options not supported by this implementation |
| * |
| * @throws MemoryLimitException |
| * decoded XZ Indexes would need more memory |
| * than allowed by the memory usage limit |
| * |
| * @throws EOFException |
| * less than 6 bytes of input was available |
| * from <code>in</code>, or (unlikely) the size |
| * of the underlying stream got smaller while |
| * this was reading from it |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| * |
| * @since 1.6 |
| */ |
| public SeekableXZInputStream(SeekableInputStream in, int memoryLimit, |
| boolean verifyCheck) |
| throws IOException { |
| this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache()); |
| } |
| |
| /** |
| * Creates a new seekable XZ decomporessor with an optional |
| * memory usage limit and ability to disable verification |
| * of integrity checks. |
| * <p> |
| * This is identical to |
| * <code>SeekableXZInputStream(SeekableInputStream,int,boolean)</code> |
| * except that this also takes the <code>arrayCache</code> argument. |
| * |
| * @param in seekable input stream containing one or more |
| * XZ Streams; the whole input stream is used |
| * |
| * @param memoryLimit memory usage limit in kibibytes (KiB) |
| * or <code>-1</code> to impose no |
| * memory usage limit |
| * |
| * @param verifyCheck if <code>true</code>, the integrity checks |
| * will be verified; this should almost never |
| * be set to <code>false</code> |
| * |
| * @param arrayCache cache to be used for allocating large arrays |
| * |
| * @throws XZFormatException |
| * input is not in the XZ format |
| * |
| * @throws CorruptedInputException |
| * XZ data is corrupt or truncated |
| * |
| * @throws UnsupportedOptionsException |
| * XZ headers seem valid but they specify |
| * options not supported by this implementation |
| * |
| * @throws MemoryLimitException |
| * decoded XZ Indexes would need more memory |
| * than allowed by the memory usage limit |
| * |
| * @throws EOFException |
| * less than 6 bytes of input was available |
| * from <code>in</code>, or (unlikely) the size |
| * of the underlying stream got smaller while |
| * this was reading from it |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| * |
| * @since 1.7 |
| */ |
| public SeekableXZInputStream(SeekableInputStream in, int memoryLimit, |
| boolean verifyCheck, ArrayCache arrayCache) |
| throws IOException { |
| this.arrayCache = arrayCache; |
| this.verifyCheck = verifyCheck; |
| this.in = in; |
| DataInputStream inData = new DataInputStream(in); |
| |
| // Check the magic bytes in the beginning of the file. |
| { |
| in.seek(0); |
| byte[] buf = new byte[XZ.HEADER_MAGIC.length]; |
| inData.readFully(buf); |
| if (!Arrays.equals(buf, XZ.HEADER_MAGIC)) |
| throw new XZFormatException(); |
| } |
| |
| // Get the file size and verify that it is a multiple of 4 bytes. |
| long pos = in.length(); |
| if ((pos & 3) != 0) |
| throw new CorruptedInputException( |
| "XZ file size is not a multiple of 4 bytes"); |
| |
| // Parse the headers starting from the end of the file. |
| byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; |
| long streamPadding = 0; |
| |
| while (pos > 0) { |
| if (pos < DecoderUtil.STREAM_HEADER_SIZE) |
| throw new CorruptedInputException(); |
| |
| // Read the potential Stream Footer. |
| in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE); |
| inData.readFully(buf); |
| |
| // Skip Stream Padding four bytes at a time. |
| // Skipping more at once would be faster, |
| // but usually there isn't much Stream Padding. |
| if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00 |
| && buf[11] == 0x00) { |
| streamPadding += 4; |
| pos -= 4; |
| continue; |
| } |
| |
| // It's not Stream Padding. Update pos. |
| pos -= DecoderUtil.STREAM_HEADER_SIZE; |
| |
| // Decode the Stream Footer and check if Backward Size |
| // looks reasonable. |
| StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf); |
| if (streamFooter.backwardSize >= pos) |
| throw new CorruptedInputException( |
| "Backward Size in XZ Stream Footer is too big"); |
| |
| // Check that the Check ID is supported. Store it in case this |
| // is the first Stream in the file. |
| check = Check.getInstance(streamFooter.checkType); |
| |
| // Remember which Check IDs have been seen. |
| checkTypes |= 1 << streamFooter.checkType; |
| |
| // Seek to the beginning of the Index. |
| in.seek(pos - streamFooter.backwardSize); |
| |
| // Decode the Index field. |
| IndexDecoder index; |
| try { |
| index = new IndexDecoder(in, streamFooter, streamPadding, |
| memoryLimit); |
| } catch (MemoryLimitException e) { |
| // IndexDecoder doesn't know how much memory we had |
| // already needed so we need to recreate the exception. |
| assert memoryLimit >= 0; |
| throw new MemoryLimitException( |
| e.getMemoryNeeded() + indexMemoryUsage, |
| memoryLimit + indexMemoryUsage); |
| } |
| |
| // Update the memory usage and limit counters. |
| indexMemoryUsage += index.getMemoryUsage(); |
| if (memoryLimit >= 0) { |
| memoryLimit -= index.getMemoryUsage(); |
| assert memoryLimit >= 0; |
| } |
| |
| // Remember the uncompressed size of the largest Block. |
| if (largestBlockSize < index.getLargestBlockSize()) |
| largestBlockSize = index.getLargestBlockSize(); |
| |
| // Calculate the offset to the beginning of this XZ Stream and |
| // check that it looks sane. |
| long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE; |
| if (pos < off) |
| throw new CorruptedInputException("XZ Index indicates " |
| + "too big compressed size for the XZ Stream"); |
| |
| // Seek to the beginning of this Stream. |
| pos -= off; |
| in.seek(pos); |
| |
| // Decode the Stream Header. |
| inData.readFully(buf); |
| StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf); |
| |
| // Verify that the Stream Header matches the Stream Footer. |
| if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter)) |
| throw new CorruptedInputException( |
| "XZ Stream Footer does not match Stream Header"); |
| |
| // Update the total uncompressed size of the file and check that |
| // it doesn't overflow. |
| uncompressedSize += index.getUncompressedSize(); |
| if (uncompressedSize < 0) |
| throw new UnsupportedOptionsException("XZ file is too big"); |
| |
| // Update the Block count and check that it fits into an int. |
| blockCount += index.getRecordCount(); |
| if (blockCount < 0) |
| throw new UnsupportedOptionsException( |
| "XZ file has over " + Integer.MAX_VALUE + " Blocks"); |
| |
| // Add this Stream to the list of Streams. |
| streams.add(index); |
| |
| // Reset to be ready to parse the next Stream. |
| streamPadding = 0; |
| } |
| |
| assert pos == 0; |
| |
| // Save it now that indexMemoryUsage has been substracted from it. |
| this.memoryLimit = memoryLimit; |
| |
| // Store the relative offsets of the Streams. This way we don't |
| // need to recalculate them in this class when seeking; the |
| // IndexDecoder instances will handle them. |
| IndexDecoder prev = streams.get(streams.size() - 1); |
| for (int i = streams.size() - 2; i >= 0; --i) { |
| IndexDecoder cur = streams.get(i); |
| cur.setOffsets(prev); |
| prev = cur; |
| } |
| |
| // Initialize curBlockInfo to point to the first Stream. |
| // The blockNumber will be left to -1 so that .hasNext() |
| // and .setNext() work to get the first Block when starting |
| // to decompress from the beginning of the file. |
| IndexDecoder first = streams.get(streams.size() - 1); |
| curBlockInfo = new BlockInfo(first); |
| |
| // queriedBlockInfo needs to be allocated too. The Stream used for |
| // initialization doesn't matter though. |
| queriedBlockInfo = new BlockInfo(first); |
| } |
| |
| /** |
| * Gets the types of integrity checks used in the .xz file. |
| * Multiple checks are possible only if there are multiple |
| * concatenated XZ Streams. |
| * <p> |
| * The returned value has a bit set for every check type that is present. |
| * For example, if CRC64 and SHA-256 were used, the return value is |
| * <code>(1 << XZ.CHECK_CRC64) |
| * | (1 << XZ.CHECK_SHA256)</code>. |
| */ |
| public int getCheckTypes() { |
| return checkTypes; |
| } |
| |
| /** |
| * Gets the amount of memory in kibibytes (KiB) used by |
| * the data structures needed to locate the XZ Blocks. |
| * This is usually useless information but since it is calculated |
| * for memory usage limit anyway, it is nice to make it available to too. |
| */ |
| public int getIndexMemoryUsage() { |
| return indexMemoryUsage; |
| } |
| |
| /** |
| * Gets the uncompressed size of the largest XZ Block in bytes. |
| * This can be useful if you want to check that the file doesn't |
| * have huge XZ Blocks which could make seeking to arbitrary offsets |
| * very slow. Note that huge Blocks don't automatically mean that |
| * seeking would be slow, for example, seeking to the beginning of |
| * any Block is always fast. |
| */ |
| public long getLargestBlockSize() { |
| return largestBlockSize; |
| } |
| |
| /** |
| * Gets the number of Streams in the .xz file. |
| * |
| * @since 1.3 |
| */ |
| public int getStreamCount() { |
| return streams.size(); |
| } |
| |
| /** |
| * Gets the number of Blocks in the .xz file. |
| * |
| * @since 1.3 |
| */ |
| public int getBlockCount() { |
| return blockCount; |
| } |
| |
| /** |
| * Gets the uncompressed start position of the given Block. |
| * |
| * @throws IndexOutOfBoundsException if |
| * <code>blockNumber < 0</code> or |
| * <code>blockNumber >= getBlockCount()</code>. |
| * |
| * @since 1.3 |
| */ |
| public long getBlockPos(int blockNumber) { |
| locateBlockByNumber(queriedBlockInfo, blockNumber); |
| return queriedBlockInfo.uncompressedOffset; |
| } |
| |
| /** |
| * Gets the uncompressed size of the given Block. |
| * |
| * @throws IndexOutOfBoundsException if |
| * <code>blockNumber < 0</code> or |
| * <code>blockNumber >= getBlockCount()</code>. |
| * |
| * @since 1.3 |
| */ |
| public long getBlockSize(int blockNumber) { |
| locateBlockByNumber(queriedBlockInfo, blockNumber); |
| return queriedBlockInfo.uncompressedSize; |
| } |
| |
| /** |
| * Gets the position where the given compressed Block starts in |
| * the underlying .xz file. |
| * This information is rarely useful to the users of this class. |
| * |
| * @throws IndexOutOfBoundsException if |
| * <code>blockNumber < 0</code> or |
| * <code>blockNumber >= getBlockCount()</code>. |
| * |
| * @since 1.3 |
| */ |
| public long getBlockCompPos(int blockNumber) { |
| locateBlockByNumber(queriedBlockInfo, blockNumber); |
| return queriedBlockInfo.compressedOffset; |
| } |
| |
| /** |
| * Gets the compressed size of the given Block. |
| * This together with the uncompressed size can be used to calculate |
| * the compression ratio of the specific Block. |
| * |
| * @throws IndexOutOfBoundsException if |
| * <code>blockNumber < 0</code> or |
| * <code>blockNumber >= getBlockCount()</code>. |
| * |
| * @since 1.3 |
| */ |
| public long getBlockCompSize(int blockNumber) { |
| locateBlockByNumber(queriedBlockInfo, blockNumber); |
| return (queriedBlockInfo.unpaddedSize + 3) & ~3; |
| } |
| |
| /** |
| * Gets integrity check type (Check ID) of the given Block. |
| * |
| * @throws IndexOutOfBoundsException if |
| * <code>blockNumber < 0</code> or |
| * <code>blockNumber >= getBlockCount()</code>. |
| * |
| * @see #getCheckTypes() |
| * |
| * @since 1.3 |
| */ |
| public int getBlockCheckType(int blockNumber) { |
| locateBlockByNumber(queriedBlockInfo, blockNumber); |
| return queriedBlockInfo.getCheckType(); |
| } |
| |
| /** |
| * Gets the number of the Block that contains the byte at the given |
| * uncompressed position. |
| * |
| * @throws IndexOutOfBoundsException if |
| * <code>pos < 0</code> or |
| * <code>pos >= length()</code>. |
| * |
| * @since 1.3 |
| */ |
| public int getBlockNumber(long pos) { |
| locateBlockByPos(queriedBlockInfo, pos); |
| return queriedBlockInfo.blockNumber; |
| } |
| |
| /** |
| * Decompresses the next byte from this input stream. |
| * |
| * @return the next decompressed byte, or <code>-1</code> |
| * to indicate the end of the compressed stream |
| * |
| * @throws CorruptedInputException |
| * @throws UnsupportedOptionsException |
| * @throws MemoryLimitException |
| * |
| * @throws XZIOException if the stream has been closed |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| */ |
| public int read() throws IOException { |
| return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); |
| } |
| |
| /** |
| * Decompresses into an array of bytes. |
| * <p> |
| * If <code>len</code> is zero, no bytes are read and <code>0</code> |
| * is returned. Otherwise this will try to decompress <code>len</code> |
| * bytes of uncompressed data. Less than <code>len</code> bytes may |
| * be read only in the following situations: |
| * <ul> |
| * <li>The end of the compressed data was reached successfully.</li> |
| * <li>An error is detected after at least one but less than |
| * <code>len</code> bytes have already been successfully |
| * decompressed. The next call with non-zero <code>len</code> |
| * will immediately throw the pending exception.</li> |
| * <li>An exception is thrown.</li> |
| * </ul> |
| * |
| * @param buf target buffer for uncompressed data |
| * @param off start offset in <code>buf</code> |
| * @param len maximum number of uncompressed bytes to read |
| * |
| * @return number of bytes read, or <code>-1</code> to indicate |
| * the end of the compressed stream |
| * |
| * @throws CorruptedInputException |
| * @throws UnsupportedOptionsException |
| * @throws MemoryLimitException |
| * |
| * @throws XZIOException if the stream has been closed |
| * |
| * @throws IOException may be thrown by <code>in</code> |
| */ |
| public int read(byte[] buf, int off, int len) throws IOException { |
| if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) |
| throw new IndexOutOfBoundsException(); |
| |
| if (len == 0) |
| return 0; |
| |
| if (in == null) |
| throw new XZIOException("Stream closed"); |
| |
| if (exception != null) |
| throw exception; |
| |
| int size = 0; |
| |
| try { |
| if (seekNeeded) |
| seek(); |
| |
| if (endReached) |
| return -1; |
| |
| while (len > 0) { |
| if (blockDecoder == null) { |
| seek(); |
| if (endReached) |
| break; |
| } |
| |
| int ret = blockDecoder.read(buf, off, len); |
| |
| if (ret > 0) { |
| curPos += ret; |
| size += ret; |
| off += ret; |
| len -= ret; |
| } else if (ret == -1) { |
| blockDecoder = null; |
| } |
| } |
| } catch (IOException e) { |
| // We know that the file isn't simply truncated because we could |
| // parse the Indexes in the constructor. So convert EOFException |
| // to CorruptedInputException. |
| if (e instanceof EOFException) |
| e = new CorruptedInputException(); |
| |
| exception = e; |
| if (size == 0) |
| throw e; |
| } |
| |
| return size; |
| } |
| |
| /** |
| * Returns the number of uncompressed bytes that can be read |
| * without blocking. The value is returned with an assumption |
| * that the compressed input data will be valid. If the compressed |
| * data is corrupt, <code>CorruptedInputException</code> may get |
| * thrown before the number of bytes claimed to be available have |
| * been read from this input stream. |
| * |
| * @return the number of uncompressed bytes that can be read |
| * without blocking |
| */ |
| public int available() throws IOException { |
| if (in == null) |
| throw new XZIOException("Stream closed"); |
| |
| if (exception != null) |
| throw exception; |
| |
| if (endReached || seekNeeded || blockDecoder == null) |
| return 0; |
| |
| return blockDecoder.available(); |
| } |
| |
| /** |
| * Closes the stream and calls <code>in.close()</code>. |
| * If the stream was already closed, this does nothing. |
| * <p> |
| * This is equivalent to <code>close(true)</code>. |
| * |
| * @throws IOException if thrown by <code>in.close()</code> |
| */ |
| public void close() throws IOException { |
| close(true); |
| } |
| |
| /** |
| * Closes the stream and optionally calls <code>in.close()</code>. |
| * If the stream was already closed, this does nothing. |
| * If <code>close(false)</code> has been called, a further |
| * call of <code>close(true)</code> does nothing (it doesn't call |
| * <code>in.close()</code>). |
| * <p> |
| * If you don't want to close the underlying <code>InputStream</code>, |
| * there is usually no need to worry about closing this stream either; |
| * it's fine to do nothing and let the garbage collector handle it. |
| * However, if you are using {@link ArrayCache}, <code>close(false)</code> |
| * can be useful to put the allocated arrays back to the cache without |
| * closing the underlying <code>InputStream</code>. |
| * <p> |
| * Note that if you successfully reach the end of the stream |
| * (<code>read</code> returns <code>-1</code>), the arrays are |
| * automatically put back to the cache by that <code>read</code> call. In |
| * this situation <code>close(false)</code> is redundant (but harmless). |
| * |
| * @throws IOException if thrown by <code>in.close()</code> |
| * |
| * @since 1.7 |
| */ |
| public void close(boolean closeInput) throws IOException { |
| if (in != null) { |
| if (blockDecoder != null) { |
| blockDecoder.close(); |
| blockDecoder = null; |
| } |
| |
| try { |
| if (closeInput) |
| in.close(); |
| } finally { |
| in = null; |
| } |
| } |
| } |
| |
| /** |
| * Gets the uncompressed size of this input stream. If there are multiple |
| * XZ Streams, the total uncompressed size of all XZ Streams is returned. |
| */ |
| public long length() { |
| return uncompressedSize; |
| } |
| |
| /** |
| * Gets the current uncompressed position in this input stream. |
| * |
| * @throws XZIOException if the stream has been closed |
| */ |
| public long position() throws IOException { |
| if (in == null) |
| throw new XZIOException("Stream closed"); |
| |
| return seekNeeded ? seekPos : curPos; |
| } |
| |
| /** |
| * Seeks to the specified absolute uncompressed position in the stream. |
| * This only stores the new position, so this function itself is always |
| * very fast. The actual seek is done when <code>read</code> is called |
| * to read at least one byte. |
| * <p> |
| * Seeking past the end of the stream is possible. In that case |
| * <code>read</code> will return <code>-1</code> to indicate |
| * the end of the stream. |
| * |
| * @param pos new uncompressed read position |
| * |
| * @throws XZIOException |
| * if <code>pos</code> is negative, or |
| * if stream has been closed |
| */ |
| public void seek(long pos) throws IOException { |
| if (in == null) |
| throw new XZIOException("Stream closed"); |
| |
| if (pos < 0) |
| throw new XZIOException("Negative seek position: " + pos); |
| |
| seekPos = pos; |
| seekNeeded = true; |
| } |
| |
| /** |
| * Seeks to the beginning of the given XZ Block. |
| * |
| * @throws XZIOException |
| * if <code>blockNumber < 0</code> or |
| * <code>blockNumber >= getBlockCount()</code>, |
| * or if stream has been closed |
| * |
| * @since 1.3 |
| */ |
| public void seekToBlock(int blockNumber) throws IOException { |
| if (in == null) |
| throw new XZIOException("Stream closed"); |
| |
| if (blockNumber < 0 || blockNumber >= blockCount) |
| throw new XZIOException("Invalid XZ Block number: " + blockNumber); |
| |
| // This is a bit silly implementation. Here we locate the uncompressed |
| // offset of the specified Block, then when doing the actual seek in |
| // seek(), we need to find the Block number based on seekPos. |
| seekPos = getBlockPos(blockNumber); |
| seekNeeded = true; |
| } |
| |
| /** |
| * Does the actual seeking. This is also called when <code>read</code> |
| * needs a new Block to decode. |
| */ |
| private void seek() throws IOException { |
| // If seek(long) wasn't called, we simply need to get the next Block |
| // from the same Stream. If there are no more Blocks in this Stream, |
| // then we behave as if seek(long) had been called. |
| if (!seekNeeded) { |
| if (curBlockInfo.hasNext()) { |
| curBlockInfo.setNext(); |
| initBlockDecoder(); |
| return; |
| } |
| |
| seekPos = curPos; |
| } |
| |
| seekNeeded = false; |
| |
| // Check if we are seeking to or past the end of the file. |
| if (seekPos >= uncompressedSize) { |
| curPos = seekPos; |
| |
| if (blockDecoder != null) { |
| blockDecoder.close(); |
| blockDecoder = null; |
| } |
| |
| endReached = true; |
| return; |
| } |
| |
| endReached = false; |
| |
| // Locate the Block that contains the uncompressed target position. |
| locateBlockByPos(curBlockInfo, seekPos); |
| |
| // Seek in the underlying stream and create a new Block decoder |
| // only if really needed. We can skip it if the current position |
| // is already in the correct Block and the target position hasn't |
| // been decompressed yet. |
| // |
| // NOTE: If curPos points to the beginning of this Block, it's |
| // because it was left there after decompressing an earlier Block. |
| // In that case, decoding of the current Block hasn't been started |
| // yet. (Decoding of a Block won't be started until at least one |
| // byte will also be read from it.) |
| if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) { |
| // Seek to the beginning of the Block. |
| in.seek(curBlockInfo.compressedOffset); |
| |
| // Since it is possible that this Block is from a different |
| // Stream than the previous Block, initialize a new Check. |
| check = Check.getInstance(curBlockInfo.getCheckType()); |
| |
| // Create a new Block decoder. |
| initBlockDecoder(); |
| curPos = curBlockInfo.uncompressedOffset; |
| } |
| |
| // If the target wasn't at a Block boundary, decompress and throw |
| // away data to reach the target position. |
| if (seekPos > curPos) { |
| // NOTE: The "if" below is there just in case. In this situation, |
| // blockDecoder.skip will always skip the requested amount |
| // or throw an exception. |
| long skipAmount = seekPos - curPos; |
| if (blockDecoder.skip(skipAmount) != skipAmount) |
| throw new CorruptedInputException(); |
| |
| curPos = seekPos; |
| } |
| } |
| |
| /** |
| * Locates the Block that contains the given uncompressed position. |
| */ |
| private void locateBlockByPos(BlockInfo info, long pos) { |
| if (pos < 0 || pos >= uncompressedSize) |
| throw new IndexOutOfBoundsException( |
| "Invalid uncompressed position: " + pos); |
| |
| // Locate the Stream that contains the target position. |
| IndexDecoder index; |
| for (int i = 0; ; ++i) { |
| index = streams.get(i); |
| if (index.hasUncompressedOffset(pos)) |
| break; |
| } |
| |
| // Locate the Block from the Stream that contains the target position. |
| index.locateBlock(info, pos); |
| |
| assert (info.compressedOffset & 3) == 0; |
| assert info.uncompressedSize > 0; |
| assert pos >= info.uncompressedOffset; |
| assert pos < info.uncompressedOffset + info.uncompressedSize; |
| } |
| |
| /** |
| * Locates the given Block and stores information about it |
| * to <code>info</code>. |
| */ |
| private void locateBlockByNumber(BlockInfo info, int blockNumber) { |
| // Validate. |
| if (blockNumber < 0 || blockNumber >= blockCount) |
| throw new IndexOutOfBoundsException( |
| "Invalid XZ Block number: " + blockNumber); |
| |
| // Skip the search if info already points to the correct Block. |
| if (info.blockNumber == blockNumber) |
| return; |
| |
| // Search the Stream that contains the given Block and then |
| // search the Block from that Stream. |
| for (int i = 0; ; ++i) { |
| IndexDecoder index = streams.get(i); |
| if (index.hasRecord(blockNumber)) { |
| index.setBlockInfo(info, blockNumber); |
| return; |
| } |
| } |
| } |
| |
| /** |
| * Initializes a new BlockInputStream. This is a helper function for |
| * <code>seek()</code>. |
| */ |
| private void initBlockDecoder() throws IOException { |
| try { |
| // Set it to null first so that GC can collect it if memory |
| // runs tight when initializing a new BlockInputStream. |
| if (blockDecoder != null) { |
| blockDecoder.close(); |
| blockDecoder = null; |
| } |
| |
| blockDecoder = new BlockInputStream( |
| in, check, verifyCheck, memoryLimit, |
| curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize, |
| arrayCache); |
| } catch (MemoryLimitException e) { |
| // BlockInputStream doesn't know how much memory we had |
| // already needed so we need to recreate the exception. |
| assert memoryLimit >= 0; |
| throw new MemoryLimitException( |
| e.getMemoryNeeded() + indexMemoryUsage, |
| memoryLimit + indexMemoryUsage); |
| } catch (IndexIndicatorException e) { |
| // It cannot be Index so the file must be corrupt. |
| throw new CorruptedInputException(); |
| } |
| } |
| } |