| /* |
| * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Sun designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Sun in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| * CA 95054 USA or visit www.sun.com if you need additional information or |
| * have any questions. |
| */ |
| |
| package com.sun.xml.internal.dtdparser; |
| |
| import org.xml.sax.InputSource; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| import java.io.CharConversionException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| import java.io.UnsupportedEncodingException; |
| import java.net.URL; |
| import java.util.Locale; |
| |
| /** |
| * This is how the parser talks to its input entities, of all kinds. |
| * The entities are in a stack. |
| * <p/> |
| * <P> For internal entities, the character arrays are referenced here, |
| * and read from as needed (they're read-only). External entities have |
| * mutable buffers, that are read into as needed. |
| * <p/> |
| * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for |
| * whether it's in an external (parsed) entity or not. The XML 1.0 spec |
| * is inconsistent in explaining EOL handling; this is the sensible way. |
| * |
| * @author David Brownell |
| * @author Janet Koenig |
| */ |
| public class InputEntity { |
| private int start, finish; |
| private char buf []; |
| private int lineNumber = 1; |
| private boolean returnedFirstHalf = false; |
| private boolean maybeInCRLF = false; |
| |
| // name of entity (never main document or unnamed DTD PE) |
| private String name; |
| |
| private InputEntity next; |
| |
| // for system and public IDs in diagnostics |
| private InputSource input; |
| |
| // this is a buffer; some buffers can be replenished. |
| private Reader reader; |
| private boolean isClosed; |
| |
| private DTDEventListener errHandler; |
| private Locale locale; |
| |
| private StringBuffer rememberedText; |
| private int startRemember; |
| |
| // record if this is a PE, so endParsedEntity won't be called |
| private boolean isPE; |
| |
| // InputStreamReader throws an internal per-read exception, so |
| // we minimize reads. We also add a byte to compensate for the |
| // "ungetc" byte we keep, so that our downstream reads are as |
| // nicely sized as we can make them. |
| final private static int BUFSIZ = 8 * 1024 + 1; |
| |
| final private static char newline [] = {'\n'}; |
| |
| public static InputEntity getInputEntity(DTDEventListener h, Locale l) { |
| InputEntity retval = new InputEntity(); |
| retval.errHandler = h; |
| retval.locale = l; |
| return retval; |
| } |
| |
| private InputEntity() { |
| } |
| |
| // |
| // predicate: return true iff this is an internal entity reader, |
| // and so may safely be "popped" as needed. external entities have |
| // syntax to uphold; internal parameter entities have at most validity |
| // constraints to monitor. also, only external entities get decent |
| // location diagnostics. |
| // |
| public boolean isInternal() { |
| return reader == null; |
| } |
| |
| // |
| // predicate: return true iff this is the toplevel document |
| // |
| public boolean isDocument() { |
| return next == null; |
| } |
| |
| // |
| // predicate: return true iff this is a PE expansion (so that |
| // LexicalEventListner.endParsedEntity won't be called) |
| // |
| public boolean isParameterEntity() { |
| return isPE; |
| } |
| |
| // |
| // return name of current entity |
| // |
| public String getName() { |
| return name; |
| } |
| |
| // |
| // use this for an external parsed entity |
| // |
| public void init(InputSource in, String name, InputEntity stack, |
| boolean isPE) |
| throws IOException, SAXException { |
| |
| input = in; |
| this.isPE = isPE; |
| reader = in.getCharacterStream(); |
| |
| if (reader == null) { |
| InputStream bytes = in.getByteStream(); |
| |
| if (bytes == null) |
| reader = XmlReader.createReader(new URL(in.getSystemId()) |
| .openStream()); |
| else if (in.getEncoding() != null) |
| reader = XmlReader.createReader(in.getByteStream(), |
| in.getEncoding()); |
| else |
| reader = XmlReader.createReader(in.getByteStream()); |
| } |
| next = stack; |
| buf = new char[BUFSIZ]; |
| this.name = name; |
| checkRecursion(stack); |
| } |
| |
| // |
| // use this for an internal parsed entity; buffer is readonly |
| // |
| public void init(char b [], String name, InputEntity stack, boolean isPE) |
| throws SAXException { |
| |
| next = stack; |
| buf = b; |
| finish = b.length; |
| this.name = name; |
| this.isPE = isPE; |
| checkRecursion(stack); |
| } |
| |
| private void checkRecursion(InputEntity stack) |
| throws SAXException { |
| |
| if (stack == null) |
| return; |
| for (stack = stack.next; stack != null; stack = stack.next) { |
| if (stack.name != null && stack.name.equals(name)) |
| fatal("P-069", new Object[]{name}); |
| } |
| } |
| |
| public InputEntity pop() throws IOException { |
| |
| // caller has ensured there's nothing left to read |
| close(); |
| return next; |
| } |
| |
| /** |
| * returns true iff there's no more data to consume ... |
| */ |
| public boolean isEOF() throws IOException, SAXException { |
| |
| // called to ensure WF-ness of included entities and to pop |
| // input entities appropriately ... EOF is not always legal. |
| if (start >= finish) { |
| fillbuf(); |
| return start >= finish; |
| } else |
| return false; |
| } |
| |
| /** |
| * Returns the name of the encoding in use, else null; the name |
| * returned is in as standard a form as we can get. |
| */ |
| public String getEncoding() { |
| |
| if (reader == null) |
| return null; |
| if (reader instanceof XmlReader) |
| return ((XmlReader) reader).getEncoding(); |
| |
| // XXX prefer a java2std() call to normalize names... |
| |
| if (reader instanceof InputStreamReader) |
| return ((InputStreamReader) reader).getEncoding(); |
| return null; |
| } |
| |
| |
| /** |
| * returns the next name char, or NUL ... faster than getc(), |
| * and the common "name or nmtoken must be next" case won't |
| * need ungetc(). |
| */ |
| public char getNameChar() throws IOException, SAXException { |
| |
| if (finish <= start) |
| fillbuf(); |
| if (finish > start) { |
| char c = buf[start++]; |
| if (XmlChars.isNameChar(c)) |
| return c; |
| start--; |
| } |
| return 0; |
| } |
| |
| /** |
| * gets the next Java character -- might be part of an XML |
| * text character represented by a surrogate pair, or be |
| * the end of the entity. |
| */ |
| public char getc() throws IOException, SAXException { |
| |
| if (finish <= start) |
| fillbuf(); |
| if (finish > start) { |
| char c = buf[start++]; |
| |
| // [2] Char ::= #x0009 | #x000A | #x000D |
| // | [#x0020-#xD7FF] |
| // | [#xE000-#xFFFD] |
| // plus surrogate _pairs_ representing [#x10000-#x10ffff] |
| if (returnedFirstHalf) { |
| if (c >= 0xdc00 && c <= 0xdfff) { |
| returnedFirstHalf = false; |
| return c; |
| } else |
| fatal("P-070", new Object[]{Integer.toHexString(c)}); |
| } |
| if ((c >= 0x0020 && c <= 0xD7FF) |
| || c == 0x0009 |
| // no surrogates! |
| || (c >= 0xE000 && c <= 0xFFFD)) |
| return c; |
| |
| // |
| // CRLF and CR are both line ends; map both to LF, and |
| // keep line count correct. |
| // |
| else if (c == '\r' && !isInternal()) { |
| maybeInCRLF = true; |
| c = getc(); |
| if (c != '\n') |
| ungetc(); |
| maybeInCRLF = false; |
| |
| lineNumber++; |
| return '\n'; |
| |
| } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF |
| if (!isInternal() && !maybeInCRLF) |
| lineNumber++; |
| return c; |
| } |
| |
| // surrogates... |
| if (c >= 0xd800 && c < 0xdc00) { |
| returnedFirstHalf = true; |
| return c; |
| } |
| |
| fatal("P-071", new Object[]{Integer.toHexString(c)}); |
| } |
| throw new EndOfInputException(); |
| } |
| |
| |
| /** |
| * lookahead one character |
| */ |
| public boolean peekc(char c) throws IOException, SAXException { |
| |
| if (finish <= start) |
| fillbuf(); |
| if (finish > start) { |
| if (buf[start] == c) { |
| start++; |
| return true; |
| } else |
| return false; |
| } |
| return false; |
| } |
| |
| |
| /** |
| * two character pushback is guaranteed |
| */ |
| public void ungetc() { |
| |
| if (start == 0) |
| throw new InternalError("ungetc"); |
| start--; |
| |
| if (buf[start] == '\n' || buf[start] == '\r') { |
| if (!isInternal()) |
| lineNumber--; |
| } else if (returnedFirstHalf) |
| returnedFirstHalf = false; |
| } |
| |
| |
| /** |
| * optional grammatical whitespace (discarded) |
| */ |
| public boolean maybeWhitespace() |
| throws IOException, SAXException { |
| |
| char c; |
| boolean isSpace = false; |
| boolean sawCR = false; |
| |
| // [3] S ::= #20 | #09 | #0D | #0A |
| for (; ;) { |
| if (finish <= start) |
| fillbuf(); |
| if (finish <= start) |
| return isSpace; |
| |
| c = buf[start++]; |
| if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') { |
| isSpace = true; |
| |
| // |
| // CR, LF are line endings ... CLRF is one, not two! |
| // |
| if ((c == '\n' || c == '\r') && !isInternal()) { |
| if (!(c == '\n' && sawCR)) { |
| lineNumber++; |
| sawCR = false; |
| } |
| if (c == '\r') |
| sawCR = true; |
| } |
| } else { |
| start--; |
| return isSpace; |
| } |
| } |
| } |
| |
| |
| /** |
| * normal content; whitespace in markup may be handled |
| * specially if the parser uses the content model. |
| * <p/> |
| * <P> content terminates with markup delimiter characters, |
| * namely ampersand (&amp;) and left angle bracket (&lt;). |
| * <p/> |
| * <P> the document handler's characters() method is called |
| * on all the content found |
| */ |
| public boolean parsedContent(DTDEventListener docHandler |
| /*ElementValidator validator*/) |
| throws IOException, SAXException { |
| |
| // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
| |
| int first; // first char to return |
| int last; // last char to return |
| boolean sawContent; // sent any chars? |
| char c; |
| |
| // deliver right out of the buffer, until delimiter, EOF, |
| // or error, refilling as we go |
| for (first = last = start, sawContent = false; ; last++) { |
| |
| // buffer empty? |
| if (last >= finish) { |
| if (last > first) { |
| // validator.text (); |
| docHandler.characters(buf, first, last - first); |
| sawContent = true; |
| start = last; |
| } |
| if (isEOF()) // calls fillbuf |
| return sawContent; |
| first = start; |
| last = first - 1; // incremented in loop |
| continue; |
| } |
| |
| c = buf[last]; |
| |
| // |
| // pass most chars through ASAP; this inlines the code of |
| // [2] !XmlChars.isChar(c) leaving only characters needing |
| // special treatment ... line ends, surrogates, and: |
| // 0x0026 == '&' |
| // 0x003C == '<' |
| // 0x005D == ']' |
| // Comparisons ordered for speed on 'typical' text |
| // |
| if ((c > 0x005D && c <= 0xD7FF) // a-z and more |
| || (c < 0x0026 && c >= 0x0020) // space & punct |
| || (c > 0x003C && c < 0x005D) // A-Z & punct |
| || (c > 0x0026 && c < 0x003C) // 0-9 & punct |
| || c == 0x0009 |
| || (c >= 0xE000 && c <= 0xFFFD) |
| ) |
| continue; |
| |
| // terminate on markup delimiters |
| if (c == '<' || c == '&') |
| break; |
| |
| // count lines |
| if (c == '\n') { |
| if (!isInternal()) |
| lineNumber++; |
| continue; |
| } |
| |
| // External entities get CR, CRLF --> LF mapping |
| // Internal ones got it already, and we can't repeat |
| // else we break char ref handling!! |
| if (c == '\r') { |
| if (isInternal()) |
| continue; |
| |
| docHandler.characters(buf, first, last - first); |
| docHandler.characters(newline, 0, 1); |
| sawContent = true; |
| lineNumber++; |
| if (finish > (last + 1)) { |
| if (buf[last + 1] == '\n') |
| last++; |
| } else { // CR at end of buffer |
| // XXX case not yet handled: CRLF here will look like two lines |
| } |
| first = start = last + 1; |
| continue; |
| } |
| |
| // ']]>' is a WF error -- must fail if we see it |
| if (c == ']') { |
| switch (finish - last) { |
| // for suspicious end-of-buffer cases, get more data |
| // into the buffer to rule out this sequence. |
| case 2: |
| if (buf[last + 1] != ']') |
| continue; |
| // FALLTHROUGH |
| |
| case 1: |
| if (reader == null || isClosed) |
| continue; |
| if (last == first) |
| throw new InternalError("fillbuf"); |
| last--; |
| if (last > first) { |
| // validator.text (); |
| docHandler.characters(buf, first, last - first); |
| sawContent = true; |
| start = last; |
| } |
| fillbuf(); |
| first = last = start; |
| continue; |
| |
| // otherwise any "]]>" would be buffered, and we can |
| // see right away if that's what we have |
| default: |
| if (buf[last + 1] == ']' && buf[last + 2] == '>') |
| fatal("P-072", null); |
| continue; |
| } |
| } |
| |
| // correctly paired surrogates are OK |
| if (c >= 0xd800 && c <= 0xdfff) { |
| if ((last + 1) >= finish) { |
| if (last > first) { |
| // validator.text (); |
| docHandler.characters(buf, first, last - first); |
| sawContent = true; |
| start = last + 1; |
| } |
| if (isEOF()) { // calls fillbuf |
| fatal("P-081", |
| new Object[]{Integer.toHexString(c)}); |
| } |
| first = start; |
| last = first; |
| continue; |
| } |
| if (checkSurrogatePair(last)) |
| last++; |
| else { |
| last--; |
| // also terminate on surrogate pair oddities |
| break; |
| } |
| continue; |
| } |
| |
| fatal("P-071", new Object[]{Integer.toHexString(c)}); |
| } |
| if (last == first) |
| return sawContent; |
| // validator.text (); |
| docHandler.characters(buf, first, last - first); |
| start = last; |
| return true; |
| } |
| |
| |
| /** |
| * CDATA -- character data, terminated by "]]>" and optionally |
| * including unescaped markup delimiters (ampersand and left angle |
| * bracket). This should otherwise be exactly like character data, |
| * modulo differences in error report details. |
| * <p/> |
| * <P> The document handler's characters() or ignorableWhitespace() |
| * methods are invoked on all the character data found |
| * |
| * @param docHandler gets callbacks for character data |
| * @param ignorableWhitespace if true, whitespace characters will |
| * be reported using docHandler.ignorableWhitespace(); implicitly, |
| * non-whitespace characters will cause validation errors |
| * @param whitespaceInvalidMessage if true, ignorable whitespace |
| * causes a validity error report as well as a callback |
| */ |
| public boolean unparsedContent(DTDEventListener docHandler, |
| /*ElementValidator validator,*/ |
| boolean ignorableWhitespace, |
| String whitespaceInvalidMessage) |
| throws IOException, SAXException { |
| |
| // [18] CDSect ::= CDStart CData CDEnd |
| // [19] CDStart ::= '<![CDATA[' |
| // [20] CData ::= (Char* - (Char* ']]>' Char*)) |
| // [21] CDEnd ::= ']]>' |
| |
| // caller peeked the leading '<' ... |
| if (!peek("![CDATA[", null)) |
| return false; |
| docHandler.startCDATA(); |
| |
| // only a literal ']]>' stops this ... |
| int last; |
| |
| for (; ;) { // until ']]>' seen |
| boolean done = false; |
| char c; |
| |
| // don't report ignorable whitespace as "text" for |
| // validation purposes. |
| boolean white = ignorableWhitespace; |
| |
| for (last = start; last < finish; last++) { |
| c = buf[last]; |
| |
| // |
| // Reject illegal characters. |
| // |
| if (!XmlChars.isChar(c)) { |
| white = false; |
| if (c >= 0xd800 && c <= 0xdfff) { |
| if (checkSurrogatePair(last)) { |
| last++; |
| continue; |
| } else { |
| last--; |
| break; |
| } |
| } |
| fatal("P-071", new Object[] |
| {Integer.toHexString(buf[last])}); |
| } |
| if (c == '\n') { |
| if (!isInternal()) |
| lineNumber++; |
| continue; |
| } |
| if (c == '\r') { |
| // As above, we can't repeat CR/CRLF --> LF mapping |
| if (isInternal()) |
| continue; |
| |
| if (white) { |
| if (whitespaceInvalidMessage != null) |
| errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, |
| whitespaceInvalidMessage), null)); |
| docHandler.ignorableWhitespace(buf, start, |
| last - start); |
| docHandler.ignorableWhitespace(newline, 0, 1); |
| } else { |
| // validator.text (); |
| docHandler.characters(buf, start, last - start); |
| docHandler.characters(newline, 0, 1); |
| } |
| lineNumber++; |
| if (finish > (last + 1)) { |
| if (buf[last + 1] == '\n') |
| last++; |
| } else { // CR at end of buffer |
| // XXX case not yet handled ... as above |
| } |
| start = last + 1; |
| continue; |
| } |
| if (c != ']') { |
| if (c != ' ' && c != '\t') |
| white = false; |
| continue; |
| } |
| if ((last + 2) < finish) { |
| if (buf[last + 1] == ']' && buf[last + 2] == '>') { |
| done = true; |
| break; |
| } |
| white = false; |
| continue; |
| } else { |
| //last--; |
| break; |
| } |
| } |
| if (white) { |
| if (whitespaceInvalidMessage != null) |
| errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale, |
| whitespaceInvalidMessage), null)); |
| docHandler.ignorableWhitespace(buf, start, last - start); |
| } else { |
| // validator.text (); |
| docHandler.characters(buf, start, last - start); |
| } |
| if (done) { |
| start = last + 3; |
| break; |
| } |
| start = last; |
| if (isEOF()) |
| fatal("P-073", null); |
| } |
| docHandler.endCDATA(); |
| return true; |
| } |
| |
| // return false to backstep at end of buffer) |
| private boolean checkSurrogatePair(int offset) |
| throws SAXException { |
| |
| if ((offset + 1) >= finish) |
| return false; |
| |
| char c1 = buf[offset++]; |
| char c2 = buf[offset]; |
| |
| if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff)) |
| return true; |
| fatal("P-074", new Object[]{ |
| Integer.toHexString(c1 & 0x0ffff), |
| Integer.toHexString(c2 & 0x0ffff) |
| }); |
| return false; |
| } |
| |
| |
| /** |
| * whitespace in markup (flagged to app, discardable) |
| * <p/> |
| * <P> the document handler's ignorableWhitespace() method |
| * is called on all the whitespace found |
| */ |
| public boolean ignorableWhitespace(DTDEventListener handler) |
| throws IOException, SAXException { |
| |
| char c; |
| boolean isSpace = false; |
| int first; |
| |
| // [3] S ::= #20 | #09 | #0D | #0A |
| for (first = start; ;) { |
| if (finish <= start) { |
| if (isSpace) |
| handler.ignorableWhitespace(buf, first, start - first); |
| fillbuf(); |
| first = start; |
| } |
| if (finish <= start) |
| return isSpace; |
| |
| c = buf[start++]; |
| switch (c) { |
| case '\n': |
| if (!isInternal()) |
| lineNumber++; |
| // XXX handles Macintosh line endings wrong |
| // fallthrough |
| case 0x09: |
| case 0x20: |
| isSpace = true; |
| continue; |
| |
| case '\r': |
| isSpace = true; |
| if (!isInternal()) |
| lineNumber++; |
| handler.ignorableWhitespace(buf, first, |
| (start - 1) - first); |
| handler.ignorableWhitespace(newline, 0, 1); |
| if (start < finish && buf[start] == '\n') |
| ++start; |
| first = start; |
| continue; |
| |
| default: |
| ungetc(); |
| if (isSpace) |
| handler.ignorableWhitespace(buf, first, start - first); |
| return isSpace; |
| } |
| } |
| } |
| |
| /** |
| * returns false iff 'next' string isn't as provided, |
| * else skips that text and returns true. |
| * <p/> |
| * <P> NOTE: two alternative string representations are |
| * both passed in, since one is faster. |
| */ |
| public boolean peek(String next, char chars []) |
| throws IOException, SAXException { |
| |
| int len; |
| int i; |
| |
| if (chars != null) |
| len = chars.length; |
| else |
| len = next.length(); |
| |
| // buffer should hold the whole thing ... give it a |
| // chance for the end-of-buffer case and cope with EOF |
| // by letting fillbuf compact and fill |
| if (finish <= start || (finish - start) < len) |
| fillbuf(); |
| |
| // can't peek past EOF |
| if (finish <= start) |
| return false; |
| |
| // compare the string; consume iff it matches |
| if (chars != null) { |
| for (i = 0; i < len && (start + i) < finish; i++) { |
| if (buf[start + i] != chars[i]) |
| return false; |
| } |
| } else { |
| for (i = 0; i < len && (start + i) < finish; i++) { |
| if (buf[start + i] != next.charAt(i)) |
| return false; |
| } |
| } |
| |
| // if the first fillbuf didn't get enough data, give |
| // fillbuf another chance to read |
| if (i < len) { |
| if (reader == null || isClosed) |
| return false; |
| |
| // |
| // This diagnostic "knows" that the only way big strings would |
| // fail to be peeked is where it's a symbol ... e.g. for an |
| // </EndTag> construct. That knowledge could also be applied |
| // to get rid of the symbol length constraint, since having |
| // the wrong symbol is a fatal error anyway ... |
| // |
| if (len > buf.length) |
| fatal("P-077", new Object[]{new Integer(buf.length)}); |
| |
| fillbuf(); |
| return peek(next, chars); |
| } |
| |
| start += len; |
| return true; |
| } |
| |
| |
| // |
| // Support for reporting the internal DTD subset, so <!DOCTYPE...> |
| // declarations can be recreated. This is collected as a single |
| // string; such subsets are normally small, and many applications |
| // don't even care about this. |
| // |
| public void startRemembering() { |
| |
| if (startRemember != 0) |
| throw new InternalError(); |
| startRemember = start; |
| } |
| |
| public String rememberText() { |
| |
| String retval; |
| |
| // If the internal subset crossed a buffer boundary, we |
| // created a temporary buffer. |
| if (rememberedText != null) { |
| rememberedText.append(buf, startRemember, |
| start - startRemember); |
| retval = rememberedText.toString(); |
| } else |
| retval = new String(buf, startRemember, |
| start - startRemember); |
| |
| startRemember = 0; |
| rememberedText = null; |
| return retval; |
| } |
| |
| private InputEntity getTopEntity() { |
| |
| InputEntity current = this; |
| |
| // don't report locations within internal entities! |
| |
| while (current != null && current.input == null) |
| current = current.next; |
| return current == null ? this : current; |
| } |
| |
| /** |
| * Returns the public ID of this input source, if known |
| */ |
| public String getPublicId() { |
| |
| InputEntity where = getTopEntity(); |
| if (where == this) |
| return input.getPublicId(); |
| return where.getPublicId(); |
| } |
| |
| /** |
| * Returns the system ID of this input source, if known |
| */ |
| public String getSystemId() { |
| |
| InputEntity where = getTopEntity(); |
| if (where == this) |
| return input.getSystemId(); |
| return where.getSystemId(); |
| } |
| |
| /** |
| * Returns the current line number in this input source |
| */ |
| public int getLineNumber() { |
| |
| InputEntity where = getTopEntity(); |
| if (where == this) |
| return lineNumber; |
| return where.getLineNumber(); |
| } |
| |
| /** |
| * returns -1; maintaining column numbers hurts performance |
| */ |
| public int getColumnNumber() { |
| |
| return -1; // not maintained (speed) |
| } |
| |
| |
| // |
| // n.b. for non-EOF end-of-buffer cases, reader should return |
| // at least a handful of bytes so various lookaheads behave. |
| // |
| // two character pushback exists except at first; characters |
| // represented by surrogate pairs can't be pushed back (they'd |
| // only be in character data anyway). |
| // |
| // DTD exception thrown on char conversion problems; line number |
| // will be low, as a rule. |
| // |
| private void fillbuf() throws IOException, SAXException { |
| |
| // don't touched fixed buffers, that'll usually |
| // change entity values (and isn't needed anyway) |
| // likewise, ignore closed streams |
| if (reader == null || isClosed) |
| return; |
| |
| // if remembering DTD text, copy! |
| if (startRemember != 0) { |
| if (rememberedText == null) |
| rememberedText = new StringBuffer(buf.length); |
| rememberedText.append(buf, startRemember, |
| start - startRemember); |
| } |
| |
| boolean extra = (finish > 0) && (start > 0); |
| int len; |
| |
| if (extra) // extra pushback |
| start--; |
| len = finish - start; |
| |
| System.arraycopy(buf, start, buf, 0, len); |
| start = 0; |
| finish = len; |
| |
| try { |
| len = buf.length - len; |
| len = reader.read(buf, finish, len); |
| } catch (UnsupportedEncodingException e) { |
| fatal("P-075", new Object[]{e.getMessage()}); |
| } catch (CharConversionException e) { |
| fatal("P-076", new Object[]{e.getMessage()}); |
| } |
| if (len >= 0) |
| finish += len; |
| else |
| close(); |
| if (extra) // extra pushback |
| start++; |
| |
| if (startRemember != 0) |
| // assert extra == true |
| startRemember = 1; |
| } |
| |
| public void close() { |
| |
| try { |
| if (reader != null && !isClosed) |
| reader.close(); |
| isClosed = true; |
| } catch (IOException e) { |
| /* NOTHING */ |
| } |
| } |
| |
| |
| private void fatal(String messageId, Object params []) |
| throws SAXException { |
| |
| SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null); |
| |
| // not continuable ... e.g. WF errors |
| close(); |
| errHandler.fatalError(x); |
| throw x; |
| } |
| } |