blob: 3cc47c436b6583e60c28973241f17c73b6765135 [file] [log] [blame]
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sun.org.apache.xerces.internal.impl.io;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Locale;
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
import com.sun.org.apache.xerces.internal.util.MessageFormatter;
/**
* <p>
* A UTF-16 reader. Can also be used for UCS-2 (i.e. ISO-10646-UCS-2).</p>
*
* @xerces.internal
*
* @author Michael Glavassevich, IBM
*
* @version $Id: UTF16Reader.java 718095 2008-11-16 20:00:14Z mrglavas $
*/
public final class UTF16Reader
extends Reader {
//
// Constants
//
/**
* Default byte buffer size (4096).
*/
public static final int DEFAULT_BUFFER_SIZE = 4096;
//
// Data
//
/**
* Input stream.
*/
protected final InputStream fInputStream;
/**
* Byte buffer.
*/
protected final byte[] fBuffer;
/**
* Endianness.
*/
protected final boolean fIsBigEndian;
// message formatter; used to produce localized exception messages
private final MessageFormatter fFormatter;
// Locale to use for messages
private final Locale fLocale;
//
// Constructors
//
/**
* Constructs a UTF-16 reader from the specified input stream using the
* default buffer size. Primarily for testing.
*
* @param inputStream The input stream.
* @param isBigEndian The byte order.
*/
public UTF16Reader(InputStream inputStream, boolean isBigEndian) {
this(inputStream, DEFAULT_BUFFER_SIZE, isBigEndian,
new XMLMessageFormatter(), Locale.getDefault());
} // <init>(InputStream, boolean)
/**
* Constructs a UTF-16 reader from the specified input stream using the
* default buffer size and the given MessageFormatter.
*
* @param inputStream The input stream.
* @param isBigEndian The byte order.
*/
public UTF16Reader(InputStream inputStream, boolean isBigEndian,
MessageFormatter messageFormatter, Locale locale) {
this(inputStream, DEFAULT_BUFFER_SIZE, isBigEndian, messageFormatter, locale);
} // <init>(InputStream, boolean, MessageFormatter, Locale)
/**
* Constructs a UTF-16 reader from the specified input stream and buffer
* size and given MessageFormatter.
*
* @param inputStream The input stream.
* @param size The initial buffer size.
* @param isBigEndian The byte order.
* @param messageFormatter Given MessageFormatter
* @param locale Locale to use for messages
*/
public UTF16Reader(InputStream inputStream, int size, boolean isBigEndian,
MessageFormatter messageFormatter, Locale locale) {
this(inputStream, new byte[size], isBigEndian, messageFormatter, locale);
} // <init>(InputStream, int, boolean, MessageFormatter, Locale)
/**
* Constructs a UTF-16 reader from the specified input stream, buffer and
* MessageFormatter.
*
* @param inputStream The input stream.
* @param buffer The byte buffer.
* @param isBigEndian The byte order.
* @param messageFormatter Given MessageFormatter
* @param locale Locale to use for messages
*/
public UTF16Reader(InputStream inputStream, byte[] buffer, boolean isBigEndian,
MessageFormatter messageFormatter, Locale locale) {
fInputStream = inputStream;
fBuffer = buffer;
fIsBigEndian = isBigEndian;
fFormatter = messageFormatter;
fLocale = locale;
} // <init>(InputStream, byte[], boolean, MessageFormatter, Locale)
//
// Reader methods
//
/**
* Read a single character. This method will block until a character is
* available, an I/O error occurs, or the end of the stream is reached.
*
* <p>
* Subclasses that intend to support efficient single-character input should
* override this method.
*
* @return The character read, as an integer in the range 0 to 65535
* (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public int read() throws IOException {
final int b0 = fInputStream.read();
if (b0 == -1) {
return -1;
}
final int b1 = fInputStream.read();
if (b1 == -1) {
expectedTwoBytes();
}
// UTF-16BE
if (fIsBigEndian) {
return (b0 << 8) | b1;
}
// UTF-16LE
return (b1 << 8) | b0;
} // read():int
/**
* Read characters into a portion of an array. This method will block until
* some input is available, an I/O error occurs, or the end of the stream is
* reached.
*
* @param ch Destination buffer
* @param offset Offset at which to start storing characters
* @param length Maximum number of characters to read
*
* @return The number of characters read, or -1 if the end of the stream has
* been reached
*
* @exception IOException If an I/O error occurs
*/
public int read(char ch[], int offset, int length) throws IOException {
int byteLength = length << 1;
if (byteLength > fBuffer.length) {
byteLength = fBuffer.length;
}
int byteCount = fInputStream.read(fBuffer, 0, byteLength);
if (byteCount == -1) {
return -1;
}
// If an odd number of bytes were read, we still need to read one more.
if ((byteCount & 1) != 0) {
int b = fInputStream.read();
if (b == -1) {
expectedTwoBytes();
}
fBuffer[byteCount++] = (byte) b;
}
final int charCount = byteCount >> 1;
if (fIsBigEndian) {
processBE(ch, offset, charCount);
} else {
processLE(ch, offset, charCount);
}
return charCount;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
long bytesSkipped = fInputStream.skip(n << 1);
if ((bytesSkipped & 1) != 0) {
int b = fInputStream.read();
if (b == -1) {
expectedTwoBytes();
}
++bytesSkipped;
}
return bytesSkipped >> 1;
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return false;
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset() will
* attempt to reposition the stream to this point. Not all character-input
* streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be read
* while still preserving the mark. After reading this many characters,
* attempting to reset the stream may fail.
*
* @exception IOException If the stream does not support mark(), or if some
* other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
throw new IOException(fFormatter.formatMessage(fLocale, "OperationNotSupported", new Object[]{"mark()", "UTF-16"}));
} // mark(int)
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream, for
* example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked, or if the mark
* has been invalidated, or if the stream does not support reset(), or if
* some other I/O error occurs
*/
public void reset() throws IOException {
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(), ready(),
* mark(), or reset() invocations will throw an IOException. Closing a
* previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
//
// Private methods
//
/**
* Decodes UTF-16BE *
*/
private void processBE(final char ch[], int offset, final int count) {
int curPos = 0;
for (int i = 0; i < count; ++i) {
final int b0 = fBuffer[curPos++] & 0xff;
final int b1 = fBuffer[curPos++] & 0xff;
ch[offset++] = (char) ((b0 << 8) | b1);
}
} // processBE(char[],int,int)
/**
* Decodes UTF-16LE *
*/
private void processLE(final char ch[], int offset, final int count) {
int curPos = 0;
for (int i = 0; i < count; ++i) {
final int b0 = fBuffer[curPos++] & 0xff;
final int b1 = fBuffer[curPos++] & 0xff;
ch[offset++] = (char) ((b1 << 8) | b0);
}
} // processLE(char[],int,int)
/**
* Throws an exception for expected byte.
*/
private void expectedTwoBytes()
throws MalformedByteSequenceException {
throw new MalformedByteSequenceException(fFormatter,
fLocale,
XMLMessageFormatter.XML_DOMAIN,
"ExpectedByte",
new Object[]{"2", "2"});
} // expectedTwoBytes()
} // class UTF16Reader