src/org/tukaani/xz/LZMA2InputStream.java - platform/external/xz-java - Git at Google

 /*
  * LZMA2InputStream
  *
  * Authors: Lasse Collin <lasse.collin@tukaani.org>
  *          Igor Pavlov <http://7-zip.org/>
  *
  * This file has been put into the public domain.
  * You can do whatever you want with this file.
  */

 package org.tukaani.xz;

 import java.io.InputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
 import org.tukaani.xz.lz.LZDecoder;
 import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
 import org.tukaani.xz.lzma.LZMADecoder;

 /**
  * Decompresses a raw LZMA2 stream (no XZ headers).
  */
 public class LZMA2InputStream extends InputStream {
     /**
      * Smallest valid LZMA2 dictionary size.
      * <p>
      * Very tiny dictionaries would be a performance problem, so
      * the minimum is 4 KiB.
      */
     public static final int DICT_SIZE_MIN = 4096;

     /**
      * Largest dictionary size supported by this implementation.
      * <p>
      * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
      * This implementation supports only 16 bytes less than 2 GiB for raw
      * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
      * limitation is due to Java using signed 32-bit integers for array
      * indexing. The limitation shouldn't matter much in practice since so
      * huge dictionaries are not normally used.
      */
     public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;

     private static final int COMPRESSED_SIZE_MAX = 1 << 16;

     private final ArrayCache arrayCache;
     private DataInputStream in;

     private LZDecoder lz;
     private RangeDecoderFromBuffer rc;
     private LZMADecoder lzma;

     private int uncompressedSize = 0;
     private boolean isLZMAChunk = false;

     private boolean needDictReset = true;
     private boolean needProps = true;
     private boolean endReached = false;

     private IOException exception = null;

     private final byte[] tempBuf = new byte[1];

     /**
      * Gets approximate decompressor memory requirements as kibibytes for
      * the given dictionary size.
      *
      * @param       dictSize    LZMA2 dictionary size as bytes, must be
      *                          in the range [<code>DICT_SIZE_MIN</code>,
      *                          <code>DICT_SIZE_MAX</code>]
      *
      * @return      approximate memory requirements as kibibytes (KiB)
      */
     public static int getMemoryUsage(int dictSize) {
         // The base state is around 30-40 KiB (probabilities etc.),
         // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
         // and LZ decoder needs a dictionary buffer.
         return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
     }

     private static int getDictSize(int dictSize) {
         if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
             throw new IllegalArgumentException(
                     "Unsupported dictionary size " + dictSize);

         // Round dictionary size upward to a multiple of 16. This way LZMA
         // can use LZDecoder.getPos() for calculating LZMA's posMask.
         // Note that this check is needed only for raw LZMA2 streams; it is
         // redundant with .xz.
         return (dictSize + 15) & ~15;
     }

     /**
      * Creates a new input stream that decompresses raw LZMA2 data
      * from <code>in</code>.
      * <p>
      * The caller needs to know the dictionary size used when compressing;
      * the dictionary size isn't stored as part of a raw LZMA2 stream.
      * <p>
      * Specifying a too small dictionary size will prevent decompressing
      * the stream. Specifying a too big dictionary is waste of memory but
      * decompression will work.
      * <p>
      * There is no need to specify a dictionary bigger than
      * the uncompressed size of the data even if a bigger dictionary
      * was used when compressing. If you know the uncompressed size
      * of the data, this might allow saving some memory.
      *
      * @param       in          input stream from which LZMA2-compressed
      *                          data is read
      *
      * @param       dictSize    LZMA2 dictionary size as bytes, must be
      *                          in the range [<code>DICT_SIZE_MIN</code>,
      *                          <code>DICT_SIZE_MAX</code>]
      */
     public LZMA2InputStream(InputStream in, int dictSize) {
         this(in, dictSize, null);
     }

     /**
      * Creates a new LZMA2 decompressor using a preset dictionary.
      * <p>
      * This is like <code>LZMA2InputStream(InputStream, int)</code> except
      * that the dictionary may be initialized using a preset dictionary.
      * If a preset dictionary was used when compressing the data, the
      * same preset dictionary must be provided when decompressing.
      *
      * @param       in          input stream from which LZMA2-compressed
      *                          data is read
      *
      * @param       dictSize    LZMA2 dictionary size as bytes, must be
      *                          in the range [<code>DICT_SIZE_MIN</code>,
      *                          <code>DICT_SIZE_MAX</code>]
      *
      * @param       presetDict  preset dictionary or <code>null</code>
      *                          to use no preset dictionary
      */
     public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
         this(in, dictSize, presetDict, ArrayCache.getDefaultCache());
     }

     /**
      * Creates a new LZMA2 decompressor using a preset dictionary
      * and array cache.
      * <p>
      * This is like <code>LZMA2InputStream(InputStream, int, byte[])</code>
      * except that this also takes the <code>arrayCache</code> argument.
      *
      * @param       in          input stream from which LZMA2-compressed
      *                          data is read
      *
      * @param       dictSize    LZMA2 dictionary size as bytes, must be
      *                          in the range [<code>DICT_SIZE_MIN</code>,
      *                          <code>DICT_SIZE_MAX</code>]
      *
      * @param       presetDict  preset dictionary or <code>null</code>
      *                          to use no preset dictionary
      *
      * @param       arrayCache  cache to be used for allocating large arrays
      *
      * @since 1.7
      */
     LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict,
                      ArrayCache arrayCache) {
         // Check for null because otherwise null isn't detect
         // in this constructor.
         if (in == null)
             throw new NullPointerException();

         this.arrayCache = arrayCache;
         this.in = new DataInputStream(in);
         this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache);
         this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);

         if (presetDict != null && presetDict.length > 0)
             needDictReset = false;
     }

     /**
      * Decompresses the next byte from this input stream.
      * <p>
      * Reading lots of data with <code>read()</code> from this input stream
      * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
      * if you need to read lots of data one byte at a time.
      *
      * @return      the next decompressed byte, or <code>-1</code>
      *              to indicate the end of the compressed stream
      *
      * @throws      CorruptedInputException
      *
      * @throws      XZIOException if the stream has been closed
      *
      * @throws      EOFException
      *                          compressed input is truncated or corrupt
      *
      * @throws      IOException may be thrown by <code>in</code>
      */
     public int read() throws IOException {
         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
     }

     /**
      * Decompresses into an array of bytes.
      * <p>
      * If <code>len</code> is zero, no bytes are read and <code>0</code>
      * is returned. Otherwise this will block until <code>len</code>
      * bytes have been decompressed, the end of the LZMA2 stream is reached,
      * or an exception is thrown.
      *
      * @param       buf         target buffer for uncompressed data
      * @param       off         start offset in <code>buf</code>
      * @param       len         maximum number of uncompressed bytes to read
      *
      * @return      number of bytes read, or <code>-1</code> to indicate
      *              the end of the compressed stream
      *
      * @throws      CorruptedInputException
      *
      * @throws      XZIOException if the stream has been closed
      *
      * @throws      EOFException
      *                          compressed input is truncated or corrupt
      *
      * @throws      IOException may be thrown by <code>in</code>
      */
     public int read(byte[] buf, int off, int len) throws IOException {
         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
             throw new IndexOutOfBoundsException();

         if (len == 0)
             return 0;

         if (in == null)
             throw new XZIOException("Stream closed");

         if (exception != null)
             throw exception;

         if (endReached)
             return -1;

         try {
             int size = 0;

             while (len > 0) {
                 if (uncompressedSize == 0) {
                     decodeChunkHeader();
                     if (endReached)
                         return size == 0 ? -1 : size;
                 }

                 int copySizeMax = Math.min(uncompressedSize, len);

                 if (!isLZMAChunk) {
                     lz.copyUncompressed(in, copySizeMax);
                 } else {
                     lz.setLimit(copySizeMax);
                     lzma.decode();
                 }

                 int copiedSize = lz.flush(buf, off);
                 off += copiedSize;
                 len -= copiedSize;
                 size += copiedSize;
                 uncompressedSize -= copiedSize;

                 if (uncompressedSize == 0)
                     if (!rc.isFinished() || lz.hasPending())
                         throw new CorruptedInputException();
             }

             return size;

         } catch (IOException e) {
             exception = e;
             throw e;
         }
     }

     private void decodeChunkHeader() throws IOException {
         int control = in.readUnsignedByte();

         if (control == 0x00) {
             endReached = true;
             putArraysToCache();
             return;
         }

         if (control >= 0xE0 || control == 0x01) {
             needProps = true;
             needDictReset = false;
             lz.reset();
         } else if (needDictReset) {
             throw new CorruptedInputException();
         }

         if (control >= 0x80) {
             isLZMAChunk = true;

             uncompressedSize = (control & 0x1F) << 16;
             uncompressedSize += in.readUnsignedShort() + 1;

             int compressedSize = in.readUnsignedShort() + 1;

             if (control >= 0xC0) {
                 needProps = false;
                 decodeProps();

             } else if (needProps) {
                 throw new CorruptedInputException();

             } else if (control >= 0xA0) {
                 lzma.reset();
             }

             rc.prepareInputBuffer(in, compressedSize);

         } else if (control > 0x02) {
             throw new CorruptedInputException();

         } else {
             isLZMAChunk = false;
             uncompressedSize = in.readUnsignedShort() + 1;
         }
     }

     private void decodeProps() throws IOException {
         int props = in.readUnsignedByte();

         if (props > (4 * 5 + 4) * 9 + 8)
             throw new CorruptedInputException();

         int pb = props / (9 * 5);
         props -= pb * 9 * 5;
         int lp = props / 9;
         int lc = props - lp * 9;

         if (lc + lp > 4)
             throw new CorruptedInputException();

         lzma = new LZMADecoder(lz, rc, lc, lp, pb);
     }

     /**
      * Returns the number of uncompressed bytes that can be read
      * without blocking. The value is returned with an assumption
      * that the compressed input data will be valid. If the compressed
      * data is corrupt, <code>CorruptedInputException</code> may get
      * thrown before the number of bytes claimed to be available have
      * been read from this input stream.
      * <p>
      * In LZMA2InputStream, the return value will be non-zero when the
      * decompressor is in the middle of an LZMA2 chunk. The return value
      * will then be the number of uncompressed bytes remaining from that
      * chunk. The return value can also be non-zero in the middle of
      * an uncompressed chunk, but then the return value depends also on
      * the <code>available()</code> method of the underlying InputStream.
      *
      * @return      the number of uncompressed bytes that can be read
      *              without blocking
      */
     public int available() throws IOException {
         if (in == null)
             throw new XZIOException("Stream closed");

         if (exception != null)
             throw exception;

         return isLZMAChunk ? uncompressedSize
                            : Math.min(uncompressedSize, in.available());
     }

     private void putArraysToCache() {
         if (lz != null) {
             lz.putArraysToCache(arrayCache);
             lz = null;

             rc.putArraysToCache(arrayCache);
             rc = null;
         }
     }

     /**
      * Closes the stream and calls <code>in.close()</code>.
      * If the stream was already closed, this does nothing.
      *
      * @throws  IOException if thrown by <code>in.close()</code>
      */
     public void close() throws IOException {
         if (in != null) {
             putArraysToCache();

             try {
                 in.close();
             } finally {
                 in = null;
             }
         }
     }
 }
	/*
	* LZMA2InputStream
	*
	* Authors: Lasse Collin <lasse.collin@tukaani.org>
	* Igor Pavlov <http://7-zip.org/>
	*
	* This file has been put into the public domain.
	* You can do whatever you want with this file.
	*/

	package org.tukaani.xz;

	import java.io.InputStream;
	import java.io.DataInputStream;
	import java.io.IOException;
	import org.tukaani.xz.lz.LZDecoder;
	import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
	import org.tukaani.xz.lzma.LZMADecoder;

	/**
	* Decompresses a raw LZMA2 stream (no XZ headers).
	*/
	public class LZMA2InputStream extends InputStream {
	/**
	* Smallest valid LZMA2 dictionary size.
	* <p>
	* Very tiny dictionaries would be a performance problem, so
	* the minimum is 4 KiB.
	*/
	public static final int DICT_SIZE_MIN = 4096;

	/**
	* Largest dictionary size supported by this implementation.
	* <p>
	* The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
	* This implementation supports only 16 bytes less than 2 GiB for raw
	* LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
	* limitation is due to Java using signed 32-bit integers for array
	* indexing. The limitation shouldn't matter much in practice since so
	* huge dictionaries are not normally used.
	*/
	public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;

	private static final int COMPRESSED_SIZE_MAX = 1 << 16;

	private final ArrayCache arrayCache;
	private DataInputStream in;

	private LZDecoder lz;
	private RangeDecoderFromBuffer rc;
	private LZMADecoder lzma;

	private int uncompressedSize = 0;
	private boolean isLZMAChunk = false;

	private boolean needDictReset = true;
	private boolean needProps = true;
	private boolean endReached = false;

	private IOException exception = null;

	private final byte[] tempBuf = new byte[1];

	/**
	* Gets approximate decompressor memory requirements as kibibytes for
	* the given dictionary size.
	*
	* @param dictSize LZMA2 dictionary size as bytes, must be
	* in the range [<code>DICT_SIZE_MIN</code>,
	* <code>DICT_SIZE_MAX</code>]
	*
	* @return approximate memory requirements as kibibytes (KiB)
	*/
	public static int getMemoryUsage(int dictSize) {
	// The base state is around 30-40 KiB (probabilities etc.),
	// range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
	// and LZ decoder needs a dictionary buffer.
	return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
	}

	private static int getDictSize(int dictSize) {
	if (dictSize < DICT_SIZE_MIN \|\| dictSize > DICT_SIZE_MAX)
	throw new IllegalArgumentException(
	"Unsupported dictionary size " + dictSize);

	// Round dictionary size upward to a multiple of 16. This way LZMA
	// can use LZDecoder.getPos() for calculating LZMA's posMask.
	// Note that this check is needed only for raw LZMA2 streams; it is
	// redundant with .xz.
	return (dictSize + 15) & ~15;
	}

	/**
	* Creates a new input stream that decompresses raw LZMA2 data
	* from <code>in</code>.
	* <p>
	* The caller needs to know the dictionary size used when compressing;
	* the dictionary size isn't stored as part of a raw LZMA2 stream.
	* <p>
	* Specifying a too small dictionary size will prevent decompressing
	* the stream. Specifying a too big dictionary is waste of memory but
	* decompression will work.
	* <p>
	* There is no need to specify a dictionary bigger than
	* the uncompressed size of the data even if a bigger dictionary
	* was used when compressing. If you know the uncompressed size
	* of the data, this might allow saving some memory.
	*
	* @param in input stream from which LZMA2-compressed
	* data is read
	*
	* @param dictSize LZMA2 dictionary size as bytes, must be
	* in the range [<code>DICT_SIZE_MIN</code>,
	* <code>DICT_SIZE_MAX</code>]
	*/
	public LZMA2InputStream(InputStream in, int dictSize) {
	this(in, dictSize, null);
	}

	/**
	* Creates a new LZMA2 decompressor using a preset dictionary.
	* <p>
	* This is like <code>LZMA2InputStream(InputStream, int)</code> except
	* that the dictionary may be initialized using a preset dictionary.
	* If a preset dictionary was used when compressing the data, the
	* same preset dictionary must be provided when decompressing.
	*
	* @param in input stream from which LZMA2-compressed
	* data is read
	*
	* @param dictSize LZMA2 dictionary size as bytes, must be
	* in the range [<code>DICT_SIZE_MIN</code>,
	* <code>DICT_SIZE_MAX</code>]
	*
	* @param presetDict preset dictionary or <code>null</code>
	* to use no preset dictionary
	*/
	public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
	this(in, dictSize, presetDict, ArrayCache.getDefaultCache());
	}

	/**
	* Creates a new LZMA2 decompressor using a preset dictionary
	* and array cache.
	* <p>
	* This is like <code>LZMA2InputStream(InputStream, int, byte[])</code>
	* except that this also takes the <code>arrayCache</code> argument.
	*
	* @param in input stream from which LZMA2-compressed
	* data is read
	*
	* @param dictSize LZMA2 dictionary size as bytes, must be
	* in the range [<code>DICT_SIZE_MIN</code>,
	* <code>DICT_SIZE_MAX</code>]
	*
	* @param presetDict preset dictionary or <code>null</code>
	* to use no preset dictionary
	*
	* @param arrayCache cache to be used for allocating large arrays
	*
	* @since 1.7
	*/
	LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict,
	ArrayCache arrayCache) {
	// Check for null because otherwise null isn't detect
	// in this constructor.
	if (in == null)
	throw new NullPointerException();

	this.arrayCache = arrayCache;
	this.in = new DataInputStream(in);
	this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache);
	this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);

	if (presetDict != null && presetDict.length > 0)
	needDictReset = false;
	}

	/**
	* Decompresses the next byte from this input stream.
	* <p>
	* Reading lots of data with <code>read()</code> from this input stream
	* may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
	* if you need to read lots of data one byte at a time.
	*
	* @return the next decompressed byte, or <code>-1</code>
	* to indicate the end of the compressed stream
	*
	* @throws CorruptedInputException
	*
	* @throws XZIOException if the stream has been closed
	*
	* @throws EOFException
	* compressed input is truncated or corrupt
	*
	* @throws IOException may be thrown by <code>in</code>
	*/
	public int read() throws IOException {
	return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
	}

	/**
	* Decompresses into an array of bytes.
	* <p>
	* If <code>len</code> is zero, no bytes are read and <code>0</code>
	* is returned. Otherwise this will block until <code>len</code>
	* bytes have been decompressed, the end of the LZMA2 stream is reached,
	* or an exception is thrown.
	*
	* @param buf target buffer for uncompressed data
	* @param off start offset in <code>buf</code>
	* @param len maximum number of uncompressed bytes to read
	*
	* @return number of bytes read, or <code>-1</code> to indicate
	* the end of the compressed stream
	*
	* @throws CorruptedInputException
	*
	* @throws XZIOException if the stream has been closed
	*
	* @throws EOFException
	* compressed input is truncated or corrupt
	*
	* @throws IOException may be thrown by <code>in</code>
	*/
	public int read(byte[] buf, int off, int len) throws IOException {
	if (off < 0 \|\| len < 0 \|\| off + len < 0 \|\| off + len > buf.length)
	throw new IndexOutOfBoundsException();

	if (len == 0)
	return 0;

	if (in == null)
	throw new XZIOException("Stream closed");

	if (exception != null)
	throw exception;

	if (endReached)
	return -1;

	try {
	int size = 0;

	while (len > 0) {
	if (uncompressedSize == 0) {
	decodeChunkHeader();
	if (endReached)
	return size == 0 ? -1 : size;
	}

	int copySizeMax = Math.min(uncompressedSize, len);

	if (!isLZMAChunk) {
	lz.copyUncompressed(in, copySizeMax);
	} else {
	lz.setLimit(copySizeMax);
	lzma.decode();
	}

	int copiedSize = lz.flush(buf, off);
	off += copiedSize;
	len -= copiedSize;
	size += copiedSize;
	uncompressedSize -= copiedSize;

	if (uncompressedSize == 0)
	if (!rc.isFinished() \|\| lz.hasPending())
	throw new CorruptedInputException();
	}

	return size;

	} catch (IOException e) {
	exception = e;
	throw e;
	}
	}

	private void decodeChunkHeader() throws IOException {
	int control = in.readUnsignedByte();

	if (control == 0x00) {
	endReached = true;
	putArraysToCache();
	return;
	}

	if (control >= 0xE0 \|\| control == 0x01) {
	needProps = true;
	needDictReset = false;
	lz.reset();
	} else if (needDictReset) {
	throw new CorruptedInputException();
	}

	if (control >= 0x80) {
	isLZMAChunk = true;

	uncompressedSize = (control & 0x1F) << 16;
	uncompressedSize += in.readUnsignedShort() + 1;

	int compressedSize = in.readUnsignedShort() + 1;

	if (control >= 0xC0) {
	needProps = false;
	decodeProps();

	} else if (needProps) {
	throw new CorruptedInputException();

	} else if (control >= 0xA0) {
	lzma.reset();
	}

	rc.prepareInputBuffer(in, compressedSize);

	} else if (control > 0x02) {
	throw new CorruptedInputException();

	} else {
	isLZMAChunk = false;
	uncompressedSize = in.readUnsignedShort() + 1;
	}
	}

	private void decodeProps() throws IOException {
	int props = in.readUnsignedByte();

	if (props > (4 * 5 + 4) * 9 + 8)
	throw new CorruptedInputException();

	int pb = props / (9 * 5);
	props -= pb * 9 * 5;
	int lp = props / 9;
	int lc = props - lp * 9;

	if (lc + lp > 4)
	throw new CorruptedInputException();

	lzma = new LZMADecoder(lz, rc, lc, lp, pb);
	}

	/**
	* Returns the number of uncompressed bytes that can be read
	* without blocking. The value is returned with an assumption
	* that the compressed input data will be valid. If the compressed
	* data is corrupt, <code>CorruptedInputException</code> may get
	* thrown before the number of bytes claimed to be available have
	* been read from this input stream.
	* <p>
	* In LZMA2InputStream, the return value will be non-zero when the
	* decompressor is in the middle of an LZMA2 chunk. The return value
	* will then be the number of uncompressed bytes remaining from that
	* chunk. The return value can also be non-zero in the middle of
	* an uncompressed chunk, but then the return value depends also on
	* the <code>available()</code> method of the underlying InputStream.
	*
	* @return the number of uncompressed bytes that can be read
	* without blocking
	*/
	public int available() throws IOException {
	if (in == null)
	throw new XZIOException("Stream closed");

	if (exception != null)
	throw exception;

	return isLZMAChunk ? uncompressedSize
	: Math.min(uncompressedSize, in.available());
	}

	private void putArraysToCache() {
	if (lz != null) {
	lz.putArraysToCache(arrayCache);
	lz = null;

	rc.putArraysToCache(arrayCache);
	rc = null;
	}
	}

	/**
	* Closes the stream and calls <code>in.close()</code>.
	* If the stream was already closed, this does nothing.
	*
	* @throws IOException if thrown by <code>in.close()</code>
	*/
	public void close() throws IOException {
	if (in != null) {
	putArraysToCache();

	try {
	in.close();
	} finally {
	in = null;
	}
	}
	}
	}