| /* |
| * reserved comment block |
| * DO NOT REMOVE OR ALTER! |
| */ |
| /* |
| * The Apache Software License, Version 1.1 |
| * |
| * |
| * Copyright (c) 1999-2004 The Apache Software Foundation. |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Xerces" and "Apache Software Foundation" must |
| * not be used to endorse or promote products derived from this |
| * software without prior written permission. For written |
| * permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * nor may "Apache" appear in their name, without prior written |
| * permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation and was |
| * originally based on software copyright (c) 1999, International |
| * Business Machines, Inc., http://www.apache.org. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| package com.sun.org.apache.xerces.internal.impl; |
| |
| import java.io.IOException; |
| |
| import com.sun.org.apache.xerces.internal.util.SymbolTable; |
| import com.sun.org.apache.xerces.internal.util.XML11Char; |
| import com.sun.org.apache.xerces.internal.util.XMLChar; |
| import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; |
| import com.sun.org.apache.xerces.internal.xni.XMLString; |
| import com.sun.org.apache.xerces.internal.xni.XNIException; |
| |
| /** |
| * This class is responsible for scanning the declarations found |
| * in the internal and external subsets of a DTD in an XML document. |
| * The scanner acts as the sources for the DTD information which is |
| * communicated to the DTD handlers. |
| * <p> |
| * This component requires the following features and properties from the |
| * component manager that uses it: |
| * <ul> |
| * <li>http://xml.org/sax/features/validation</li> |
| * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> |
| * <li>http://apache.org/xml/properties/internal/symbol-table</li> |
| * <li>http://apache.org/xml/properties/internal/error-reporter</li> |
| * <li>http://apache.org/xml/properties/internal/entity-manager</li> |
| * </ul> |
| * |
| * @xerces.internal |
| * |
| * @author Arnaud Le Hors, IBM |
| * @author Andy Clark, IBM |
| * @author Glenn Marcy, IBM |
| * @author Eric Ye, IBM |
| * |
| */ |
| public class XML11DTDScannerImpl |
| extends XMLDTDScannerImpl { |
| |
| /** Array of 3 strings. */ |
| private String[] fStrings = new String[3]; |
| |
| /** String. */ |
| private XMLString fString = new XMLString(); |
| |
| /** String buffer. */ |
| private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); |
| |
| /** String buffer. */ |
| private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); |
| private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); |
| |
| // |
| // Constructors |
| // |
| |
| /** Default constructor. */ |
| public XML11DTDScannerImpl() {super();} // <init>() |
| |
| /** Constructor for he use of non-XMLComponentManagers. */ |
| public XML11DTDScannerImpl(SymbolTable symbolTable, |
| XMLErrorReporter errorReporter, XMLEntityManager entityManager) { |
| super(symbolTable, errorReporter, entityManager); |
| } |
| |
| // |
| // XMLDTDScanner methods |
| // |
| |
| // |
| // XMLScanner methods |
| // |
| // NOTE: this is a carbon copy of the code in XML11DocumentScannerImpl; |
| // we need to override these methods in both places. Ah for |
| // multiple inheritance... |
| // This needs to be refactored!!! - NG |
| /** |
| * Scans public ID literal. |
| * |
| * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
| * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] |
| * |
| * The returned string is normalized according to the following rule, |
| * from http://www.w3.org/TR/REC-xml#dt-pubid: |
| * |
| * Before a match is attempted, all strings of white space in the public |
| * identifier must be normalized to single space characters (#x20), and |
| * leading and trailing white space must be removed. |
| * |
| * @param literal The string to fill in with the public ID literal. |
| * @return True on success. |
| * |
| * <strong>Note:</strong> This method uses fStringBuffer, anything in it at |
| * the time of calling is lost. |
| */ |
| protected boolean scanPubidLiteral(XMLString literal) |
| throws IOException, XNIException |
| { |
| int quote = fEntityScanner.scanChar(); |
| if (quote != '\'' && quote != '"') { |
| reportFatalError("QuoteRequiredInPublicID", null); |
| return false; |
| } |
| |
| fStringBuffer.clear(); |
| // skip leading whitespace |
| boolean skipSpace = true; |
| boolean dataok = true; |
| while (true) { |
| int c = fEntityScanner.scanChar(); |
| // REVISIT: it could really only be \n or 0x20; all else is normalized, no? - neilg |
| if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { |
| if (!skipSpace) { |
| // take the first whitespace as a space and skip the others |
| fStringBuffer.append(' '); |
| skipSpace = true; |
| } |
| } |
| else if (c == quote) { |
| if (skipSpace) { |
| // if we finished on a space let's trim it |
| fStringBuffer.length--; |
| } |
| literal.setValues(fStringBuffer); |
| break; |
| } |
| else if (XMLChar.isPubid(c)) { |
| fStringBuffer.append((char)c); |
| skipSpace = false; |
| } |
| else if (c == -1) { |
| reportFatalError("PublicIDUnterminated", null); |
| return false; |
| } |
| else { |
| dataok = false; |
| reportFatalError("InvalidCharInPublicID", |
| new Object[]{Integer.toHexString(c)}); |
| } |
| } |
| return dataok; |
| } |
| |
| /** |
| * Normalize whitespace in an XMLString converting all whitespace |
| * characters to space characters. |
| */ |
| protected void normalizeWhitespace(XMLString value) { |
| int end = value.offset + value.length; |
| for (int i = value.offset; i < end; ++i) { |
| int c = value.ch[i]; |
| if (XMLChar.isSpace(c)) { |
| value.ch[i] = ' '; |
| } |
| } |
| } |
| |
| /** |
| * Normalize whitespace in an XMLString converting all whitespace |
| * characters to space characters. |
| */ |
| protected void normalizeWhitespace(XMLString value, int fromIndex) { |
| int end = value.offset + value.length; |
| for (int i = value.offset + fromIndex; i < end; ++i) { |
| int c = value.ch[i]; |
| if (XMLChar.isSpace(c)) { |
| value.ch[i] = ' '; |
| } |
| } |
| } |
| |
| /** |
| * Checks whether this string would be unchanged by normalization. |
| * |
| * @return -1 if the value would be unchanged by normalization, |
| * otherwise the index of the first whitespace character which |
| * would be transformed. |
| */ |
| protected int isUnchangedByNormalization(XMLString value) { |
| int end = value.offset + value.length; |
| for (int i = value.offset; i < end; ++i) { |
| int c = value.ch[i]; |
| if (XMLChar.isSpace(c)) { |
| return i - value.offset; |
| } |
| } |
| return -1; |
| } |
| |
| // returns true if the given character is not |
| // valid with respect to the version of |
| // XML understood by this scanner. |
| protected boolean isInvalid(int value) { |
| return (!XML11Char.isXML11Valid(value)); |
| } // isInvalid(int): boolean |
| |
| // returns true if the given character is not |
| // valid or may not be used outside a character reference |
| // with respect to the version of XML understood by this scanner. |
| protected boolean isInvalidLiteral(int value) { |
| return (!XML11Char.isXML11ValidLiteral(value)); |
| } // isInvalidLiteral(int): boolean |
| |
| // returns true if the given character is |
| // a valid nameChar with respect to the version of |
| // XML understood by this scanner. |
| protected boolean isValidNameChar(int value) { |
| return (XML11Char.isXML11Name(value)); |
| } // isValidNameChar(int): boolean |
| |
| // returns true if the given character is |
| // a valid nameStartChar with respect to the version of |
| // XML understood by this scanner. |
| protected boolean isValidNameStartChar(int value) { |
| return (XML11Char.isXML11NameStart(value)); |
| } // isValidNameStartChar(int): boolean |
| |
| // returns true if the given character is |
| // a valid NCName character with respect to the version of |
| // XML understood by this scanner. |
| protected boolean isValidNCName(int value) { |
| return (XML11Char.isXML11NCName(value)); |
| } // isValidNCName(int): boolean |
| |
| // returns true if the given character is |
| // a valid high surrogate for a nameStartChar |
| // with respect to the version of XML understood |
| // by this scanner. |
| protected boolean isValidNameStartHighSurrogate(int value) { |
| return XML11Char.isXML11NameHighSurrogate(value); |
| } // isValidNameStartHighSurrogate(int): boolean |
| |
| // note that, according to 4.3.4 of the XML 1.1 spec, XML 1.1 |
| // documents may invoke 1.0 entities; thus either version decl (or none!) |
| // is allowed to appear in this context |
| protected boolean versionSupported(String version) { |
| return version.equals("1.1") || version.equals ("1.0"); |
| } // versionSupported(String): boolean |
| |
| // returns the error message key for unsupported |
| // versions of XML with respect to the version of |
| // XML understood by this scanner. |
| protected String getVersionNotSupportedKey () { |
| return "VersionNotSupported11"; |
| } // getVersionNotSupportedKey: String |
| |
| } // class XML11DTDScannerImpl |