| /* |
| * reserved comment block |
| * DO NOT REMOVE OR ALTER! |
| */ |
| /* |
| * Copyright 2000-2002,2004 The Apache Software Foundation. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.sun.org.apache.xerces.internal.impl.io; |
| |
| import java.io.InputStream; |
| import java.io.IOException; |
| import java.io.Reader; |
| |
| import com.sun.xml.internal.stream.util.BufferAllocator; |
| import com.sun.xml.internal.stream.util.ThreadLocalBufferAllocator; |
| |
| /** |
| * Reader for UCS-2 and UCS-4 encodings. |
| * (i.e., encodings from ISO-10646-UCS-(2|4)). |
| * |
| * @xerces.internal |
| * |
| * @author Neil Graham, IBM |
| * |
| */ |
| public class UCSReader extends Reader { |
| |
| // |
| // Constants |
| // |
| |
| /** Default byte buffer size (8192, larger than that of ASCIIReader |
| * since it's reasonable to surmise that the average UCS-4-encoded |
| * file should be 4 times as large as the average ASCII-encoded file). |
| */ |
| public static final int DEFAULT_BUFFER_SIZE = 8192; |
| |
| public static final short UCS2LE = 1; |
| public static final short UCS2BE = 2; |
| public static final short UCS4LE = 4; |
| public static final short UCS4BE = 8; |
| |
| // |
| // Data |
| // |
| |
| /** Input stream. */ |
| protected InputStream fInputStream; |
| |
| /** Byte buffer. */ |
| protected byte[] fBuffer; |
| |
| // what kind of data we're dealing with |
| protected short fEncoding; |
| |
| // |
| // Constructors |
| // |
| |
| /** |
| * Constructs an ASCII reader from the specified input stream |
| * using the default buffer size. The Endian-ness and whether this is |
| * UCS-2 or UCS-4 needs also to be known in advance. |
| * |
| * @param inputStream The input stream. |
| * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE. |
| */ |
| public UCSReader(InputStream inputStream, short encoding) { |
| this(inputStream, DEFAULT_BUFFER_SIZE, encoding); |
| } // <init>(InputStream, short) |
| |
| /** |
| * Constructs an ASCII reader from the specified input stream |
| * and buffer size. The Endian-ness and whether this is |
| * UCS-2 or UCS-4 needs also to be known in advance. |
| * |
| * @param inputStream The input stream. |
| * @param size The initial buffer size. |
| * @param encoding One of UCS2LE, UCS2BE, UCS4LE or UCS4BE. |
| */ |
| public UCSReader(InputStream inputStream, int size, short encoding) { |
| fInputStream = inputStream; |
| BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator(); |
| fBuffer = ba.getByteBuffer(size); |
| if (fBuffer == null) { |
| fBuffer = new byte[size]; |
| } |
| fEncoding = encoding; |
| } // <init>(InputStream,int,short) |
| |
| // |
| // Reader methods |
| // |
| |
| /** |
| * Read a single character. This method will block until a character is |
| * available, an I/O error occurs, or the end of the stream is reached. |
| * |
| * <p> Subclasses that intend to support efficient single-character input |
| * should override this method. |
| * |
| * @return The character read, as an integer in the range 0 to 127 |
| * (<tt>0x00-0x7f</tt>), or -1 if the end of the stream has |
| * been reached |
| * |
| * @exception IOException If an I/O error occurs |
| */ |
| public int read() throws IOException { |
| int b0 = fInputStream.read() & 0xff; |
| if (b0 == 0xff) |
| return -1; |
| int b1 = fInputStream.read() & 0xff; |
| if (b1 == 0xff) |
| return -1; |
| if(fEncoding >=4) { |
| int b2 = fInputStream.read() & 0xff; |
| if (b2 == 0xff) |
| return -1; |
| int b3 = fInputStream.read() & 0xff; |
| if (b3 == 0xff) |
| return -1; |
| System.err.println("b0 is " + (b0 & 0xff) + " b1 " + (b1 & 0xff) + " b2 " + (b2 & 0xff) + " b3 " + (b3 & 0xff)); |
| if (fEncoding == UCS4BE) |
| return (b0<<24)+(b1<<16)+(b2<<8)+b3; |
| else |
| return (b3<<24)+(b2<<16)+(b1<<8)+b0; |
| } else { // UCS-2 |
| if (fEncoding == UCS2BE) |
| return (b0<<8)+b1; |
| else |
| return (b1<<8)+b0; |
| } |
| } // read():int |
| |
| /** |
| * Read characters into a portion of an array. This method will block |
| * until some input is available, an I/O error occurs, or the end of the |
| * stream is reached. |
| * |
| * @param ch Destination buffer |
| * @param offset Offset at which to start storing characters |
| * @param length Maximum number of characters to read |
| * |
| * @return The number of characters read, or -1 if the end of the |
| * stream has been reached |
| * |
| * @exception IOException If an I/O error occurs |
| */ |
| public int read(char ch[], int offset, int length) throws IOException { |
| int byteLength = length << ((fEncoding >= 4)?2:1); |
| if (byteLength > fBuffer.length) { |
| byteLength = fBuffer.length; |
| } |
| int count = fInputStream.read(fBuffer, 0, byteLength); |
| if(count == -1) return -1; |
| // try and make count be a multiple of the number of bytes we're looking for |
| if(fEncoding >= 4) { // BigEndian |
| // this looks ugly, but it avoids an if at any rate... |
| int numToRead = (4 - (count & 3) & 3); |
| for(int i=0; i<numToRead; i++) { |
| int charRead = fInputStream.read(); |
| if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls. |
| for (int j = i;j<numToRead; j++) |
| fBuffer[count+j] = 0; |
| break; |
| } else { |
| fBuffer[count+i] = (byte)charRead; |
| } |
| } |
| count += numToRead; |
| } else { |
| int numToRead = count & 1; |
| if(numToRead != 0) { |
| count++; |
| int charRead = fInputStream.read(); |
| if(charRead == -1) { // end of input; something likely went wrong!A Pad buffer with nulls. |
| fBuffer[count] = 0; |
| } else { |
| fBuffer[count] = (byte)charRead; |
| } |
| } |
| } |
| |
| // now count is a multiple of the right number of bytes |
| int numChars = count >> ((fEncoding >= 4)?2:1); |
| int curPos = 0; |
| for (int i = 0; i < numChars; i++) { |
| int b0 = fBuffer[curPos++] & 0xff; |
| int b1 = fBuffer[curPos++] & 0xff; |
| if(fEncoding >=4) { |
| int b2 = fBuffer[curPos++] & 0xff; |
| int b3 = fBuffer[curPos++] & 0xff; |
| if (fEncoding == UCS4BE) |
| ch[offset+i] = (char)((b0<<24)+(b1<<16)+(b2<<8)+b3); |
| else |
| ch[offset+i] = (char)((b3<<24)+(b2<<16)+(b1<<8)+b0); |
| } else { // UCS-2 |
| if (fEncoding == UCS2BE) |
| ch[offset+i] = (char)((b0<<8)+b1); |
| else |
| ch[offset+i] = (char)((b1<<8)+b0); |
| } |
| } |
| return numChars; |
| } // read(char[],int,int) |
| |
| /** |
| * Skip characters. This method will block until some characters are |
| * available, an I/O error occurs, or the end of the stream is reached. |
| * |
| * @param n The number of characters to skip |
| * |
| * @return The number of characters actually skipped |
| * |
| * @exception IOException If an I/O error occurs |
| */ |
| public long skip(long n) throws IOException { |
| // charWidth will represent the number of bits to move |
| // n leftward to get num of bytes to skip, and then move the result rightward |
| // to get num of chars effectively skipped. |
| // The trick with &'ing, as with elsewhere in this dcode, is |
| // intended to avoid an expensive use of / that might not be optimized |
| // away. |
| int charWidth = (fEncoding >=4)?2:1; |
| long bytesSkipped = fInputStream.skip(n<<charWidth); |
| if((bytesSkipped & (charWidth | 1)) == 0) return bytesSkipped >> charWidth; |
| return (bytesSkipped >> charWidth) + 1; |
| } // skip(long):long |
| |
| /** |
| * Tell whether this stream is ready to be read. |
| * |
| * @return True if the next read() is guaranteed not to block for input, |
| * false otherwise. Note that returning false does not guarantee that the |
| * next read will block. |
| * |
| * @exception IOException If an I/O error occurs |
| */ |
| public boolean ready() throws IOException { |
| return false; |
| } // ready() |
| |
| /** |
| * Tell whether this stream supports the mark() operation. |
| */ |
| public boolean markSupported() { |
| return fInputStream.markSupported(); |
| } // markSupported() |
| |
| /** |
| * Mark the present position in the stream. Subsequent calls to reset() |
| * will attempt to reposition the stream to this point. Not all |
| * character-input streams support the mark() operation. |
| * |
| * @param readAheadLimit Limit on the number of characters that may be |
| * read while still preserving the mark. After |
| * reading this many characters, attempting to |
| * reset the stream may fail. |
| * |
| * @exception IOException If the stream does not support mark(), |
| * or if some other I/O error occurs |
| */ |
| public void mark(int readAheadLimit) throws IOException { |
| fInputStream.mark(readAheadLimit); |
| } // mark(int) |
| |
| /** |
| * Reset the stream. If the stream has been marked, then attempt to |
| * reposition it at the mark. If the stream has not been marked, then |
| * attempt to reset it in some way appropriate to the particular stream, |
| * for example by repositioning it to its starting point. Not all |
| * character-input streams support the reset() operation, and some support |
| * reset() without supporting mark(). |
| * |
| * @exception IOException If the stream has not been marked, |
| * or if the mark has been invalidated, |
| * or if the stream does not support reset(), |
| * or if some other I/O error occurs |
| */ |
| public void reset() throws IOException { |
| fInputStream.reset(); |
| } // reset() |
| |
| /** |
| * Close the stream. Once a stream has been closed, further read(), |
| * ready(), mark(), or reset() invocations will throw an IOException. |
| * Closing a previously-closed stream, however, has no effect. |
| * |
| * @exception IOException If an I/O error occurs |
| */ |
| public void close() throws IOException { |
| BufferAllocator ba = ThreadLocalBufferAllocator.getBufferAllocator(); |
| ba.returnByteBuffer(fBuffer); |
| fBuffer = null; |
| fInputStream.close(); |
| } // close() |
| |
| } // class UCSReader |