blob: 33c8a0a8aa50add44408809533c6a26683274c11 [file] [log] [blame]
/* Copyright (c) 2001-2010, The HSQL Development Group
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the HSQL Development Group nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.hsqldb.lib.tar;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;
/**
* Note that this class <b>is not</b> a java.io.FileInputStream,
* because our goal is to greatly restrict the public methods of
* FileInputStream, yet we must use public methods of the underlying
* FileInputStream internally. Can't accomplish these goals in Java if we
* subclass.
* <P>
* This class is ignorant about Tar header fields, attributes and such.
* It is concerned with reading and writing blocks of data in conformance with
* Tar formatting, in a way convenient to those who want to get the header and
* data blocks.
* </P> <P>
* Asymmetric to the Tar file writing side, the bufferBlocks setting here is
* used only for to adjust read buffer size (for file data reads), so the user
* can compromise between available memory and performance. Small buffer sizes
* will always work, but will incur more reads; on the other hand, buffer sizes
* larger than the largest component file is just a waste of memory.
* </P> <P>
* We assume the responsibility to manage the setting because the decision
* should be based on available RAM more than anything else (therefore, we can't
* set a good value automatically).
* </P> <P>
* As alluded to above, headers are read in separate reads, regardless of the
* readBufferBlocks setting. readBufferBlocks is used for reading
* <I>file data</I>.
* </P> <P>
* I have purposefully not implemented skip(), because, though I haven't tested
* it, I believe our readBlock() and readBlocks() methods are at least as fast,
* since we use the larges read buffer within limits the user has set.
* </P>
*/
public class TarFileInputStream {
/* Would love to use a RandomAccessFile, but RandomAccessFiles do not play
* nicely with InputStreams or filters, and it just would not work with
* compressed input. */
protected long bytesRead = 0;
// Pronounced as past tense of "to read", not the other forms of "read".
// I.e., the homonym of "red".
private InputStream readStream;
/* This is not a "Reader", but the byte "Stream" that we read() from. */
protected byte[] readBuffer;
protected int readBufferBlocks;
protected int compressionType;
/**
* Convenience wrapper to use default readBufferBlocks and compressionType.
*
* @see #TarFileInputStream(File, int, int)
*/
public TarFileInputStream(File sourceFile) throws IOException {
this(sourceFile, TarFileOutputStream.Compression.DEFAULT_COMPRESSION);
}
/**
* Convenience wrapper to use default readBufferBlocks.
*
* @see #TarFileInputStream(File, int, int)
*/
public TarFileInputStream(File sourceFile,
int compressionType) throws IOException {
this(sourceFile, compressionType,
TarFileOutputStream.Compression.DEFAULT_BLOCKS_PER_RECORD);
}
public int getReadBufferBlocks() {
return readBufferBlocks;
}
/**
* This class does no validation or enforcement of file naming conventions.
* If desired, the caller should enforce extensions like "tar" and
* "tar.gz" (and that they match the specified compression type).
* <P>
* This object will automatically release its I/O resources when you get
* false back from a readNextHeaderBlock() call.
* If you abort before then, you must call the close() method like for a
* normal InputStream.
* </P>
*
* @see #close()
* @see #readNextHeaderBlock()
*/
public TarFileInputStream(File sourceFile, int compressionType,
int readBufferBlocks) throws IOException {
if (!sourceFile.isFile()) {
throw new FileNotFoundException(sourceFile.getAbsolutePath());
}
if (!sourceFile.canRead()) {
throw new IOException(
RB.read_denied.getString(sourceFile.getAbsolutePath()));
}
this.readBufferBlocks = readBufferBlocks;
this.compressionType = compressionType;
readBuffer = new byte[readBufferBlocks * 512];
switch (compressionType) {
case TarFileOutputStream.Compression.NO_COMPRESSION :
readStream = new FileInputStream(sourceFile);
break;
case TarFileOutputStream.Compression.GZIP_COMPRESSION :
readStream =
new GZIPInputStream(new FileInputStream(sourceFile),
readBuffer.length);
break;
default :
throw new IllegalArgumentException(
RB.compression_unknown.getString(compressionType));
}
}
/**
* readBlocks(int) is the method that USERS of this class should use to
* read file data from the tar file.
* This method reads from the tar file and writes to the readBuffer array.
* <P>
* This class and subclasses should read from the underlying readStream
* <b>ONLY WITH THIS METHOD</b>.
* That way we can be confident that bytesRead will always be accurate.
* </P> <P>
* This method is different from a typical Java byte array read command
* in that when reading tar files <OL>
* <LI>we always know ahead-of-time how many bytes we should read, and
* <LI>we always want to read quantities of bytes in multiples of 512.
* </OL>
* </P>
*
* @param blocks How many 512 blocks to read.
* @throws IOException for an I/O error on the underlying InputStream
* @throws TarMalformatException if no I/O error occurred, but we failed to
* read the exact number of bytes requested.
*/
public void readBlocks(int blocks)
throws IOException, TarMalformatException {
/* int for blocks should support sizes up to about 1T, according to
* my off-the-cuff calculations */
if (compressionType
!= TarFileOutputStream.Compression.NO_COMPRESSION) {
readCompressedBlocks(blocks);
return;
}
int i = readStream.read(readBuffer, 0, blocks * 512);
bytesRead += i;
if (i != blocks * 512) {
throw new TarMalformatException(
RB.insufficient_read.getString(blocks * 512, i));
}
}
/**
* Work-around for the problem that compressed InputReaders don't fill
* the read buffer before returning.
*
* Has visibility 'protected' so that subclasses may override with
* different algorithms, or use different algorithms for different
* compression stream.
*/
protected void readCompressedBlocks(int blocks) throws IOException {
int bytesSoFar = 0;
int requiredBytes = 512 * blocks;
// This method works with individual bytes!
int i;
while (bytesSoFar < requiredBytes) {
i = readStream.read(readBuffer, bytesSoFar,
requiredBytes - bytesSoFar);
if (i < 0) {
throw new EOFException(RB.decompression_ranout.getString(
bytesSoFar, requiredBytes));
}
bytesRead += i;
bytesSoFar += i;
}
}
/**
* readBlock() and readNextHeaderBlock are the methods that USERS of this
* class should use to read header blocks from the tar file.
* <P>
* readBlock() should be used when you know that the current block should
* contain what you want.
* E.g. you know that the very first block of a tar file should contain
* a Tar Entry header block.
* </P>
*
* @see #readNextHeaderBlock
*/
public void readBlock() throws IOException, TarMalformatException {
readBlocks(1);
}
/**
* readBlock() and readNextHeaderBlock are the methods that USERS of this
* class should use to read header blocks from the tar file.
* <P>
* readNextHeaderBlock continues working through the Tar File from the
* current point until it finds a block with a non-0 first byte.
* </P>
*
* @return True if a header block was read and place at beginning of the
* readBuffer array. False if EOF was encountered without finding
* any blocks with first byte != 0. If false is returned, we have
* automatically closed the this TarFileInputStream too.
* @see #readBlock
*/
public boolean readNextHeaderBlock()
throws IOException, TarMalformatException {
// We read a-byte-at-a-time because there should only be 2 empty blocks
// between each Tar Entry.
try {
while (readStream.available() > 0) {
readBlock();
if (readBuffer[0] != 0) {
return true;
}
}
} catch (EOFException ee) {
/* This is a work-around.
* Sun Java's inputStream.available() works like crap.
* Reach this point when performing a read of a GZip stream when
* .available == 1, which according to API Spec, should not happen.
* We treat this condition exactly as if readStream.available is 0,
* which it should be.
*/
}
close();
return false;
}
/**
* Implements java.io.Closeable.
*
* @see java.io.Closeable
*/
public void close() throws IOException {
if (readStream == null) {
return;
}
try {
readStream.close();
} finally {
readStream = null; // Encourage buffer GC
}
}
}