jaxp/src/com/sun/org/apache/xerces/internal/impl/XML11DTDScannerImpl.java - platform/libcore - Git at Google

 /*
  * reserved comment block
  * DO NOT REMOVE OR ALTER!
  */
 /*
  * The Apache Software License, Version 1.1
  *
  *
  * Copyright (c) 1999-2004 The Apache Software Foundation.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. The end-user documentation included with the redistribution,
  *    if any, must include the following acknowledgment:
  *       "This product includes software developed by the
  *        Apache Software Foundation (http://www.apache.org/)."
  *    Alternately, this acknowledgment may appear in the software itself,
  *    if and wherever such third-party acknowledgments normally appear.
  *
  * 4. The names "Xerces" and "Apache Software Foundation" must
  *    not be used to endorse or promote products derived from this
  *    software without prior written permission. For written
  *    permission, please contact apache@apache.org.
  *
  * 5. Products derived from this software may not be called "Apache",
  *    nor may "Apache" appear in their name, without prior written
  *    permission of the Apache Software Foundation.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * ====================================================================
  *
  * This software consists of voluntary contributions made by many
  * individuals on behalf of the Apache Software Foundation and was
  * originally based on software copyright (c) 1999, International
  * Business Machines, Inc., http://www.apache.org.  For more
  * information on the Apache Software Foundation, please see
  * <http://www.apache.org/>.
  */

 package com.sun.org.apache.xerces.internal.impl;

 import java.io.IOException;

 import com.sun.org.apache.xerces.internal.util.SymbolTable;
 import com.sun.org.apache.xerces.internal.util.XML11Char;
 import com.sun.org.apache.xerces.internal.util.XMLChar;
 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
 import com.sun.org.apache.xerces.internal.xni.XMLString;
 import com.sun.org.apache.xerces.internal.xni.XNIException;

 /**
  * This class is responsible for scanning the declarations found
  * in the internal and external subsets of a DTD in an XML document.
  * The scanner acts as the sources for the DTD information which is
  * communicated to the DTD handlers.
  * <p>
  * This component requires the following features and properties from the
  * component manager that uses it:
  * <ul>
  *  <li>http://xml.org/sax/features/validation</li>
  *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
  *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
  *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
  *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
  * </ul>
  *
  * @xerces.internal
  *
  * @author Arnaud  Le Hors, IBM
  * @author Andy Clark, IBM
  * @author Glenn Marcy, IBM
  * @author Eric Ye, IBM
  *
  */
 public class XML11DTDScannerImpl
     extends XMLDTDScannerImpl {

     /** Array of 3 strings. */
     private String[] fStrings = new String[3];

     /** String. */
     private XMLString fString = new XMLString();

     /** String buffer. */
     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();

     /** String buffer. */
     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();

     //
     // Constructors
     //

     /** Default constructor. */
     public XML11DTDScannerImpl() {super();} // <init>()

     /** Constructor for he use of non-XMLComponentManagers. */
     public XML11DTDScannerImpl(SymbolTable symbolTable,
                 XMLErrorReporter errorReporter, XMLEntityManager entityManager) {
         super(symbolTable, errorReporter, entityManager);
     }

     //
     // XMLDTDScanner methods
     //

     //
     // XMLScanner methods
     //
     // NOTE:  this is a carbon copy of the code in XML11DocumentScannerImpl;
     // we need to override these methods in both places.  Ah for
     // multiple inheritance...
     // This needs to be refactored!!!  - NG
     /**
      * Scans public ID literal.
      *
      * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
      * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
      *
      * The returned string is normalized according to the following rule,
      * from http://www.w3.org/TR/REC-xml#dt-pubid:
      *
      * Before a match is attempted, all strings of white space in the public
      * identifier must be normalized to single space characters (#x20), and
      * leading and trailing white space must be removed.
      *
      * @param literal The string to fill in with the public ID literal.
      * @return True on success.
      *
      * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
      * the time of calling is lost.
      */
     protected boolean scanPubidLiteral(XMLString literal)
         throws IOException, XNIException
     {
         int quote = fEntityScanner.scanChar();
         if (quote != '\'' && quote != '"') {
             reportFatalError("QuoteRequiredInPublicID", null);
             return false;
         }

         fStringBuffer.clear();
         // skip leading whitespace
         boolean skipSpace = true;
         boolean dataok = true;
         while (true) {
             int c = fEntityScanner.scanChar();
             // REVISIT:  it could really only be \n or 0x20; all else is normalized, no?  - neilg
             if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
                 if (!skipSpace) {
                     // take the first whitespace as a space and skip the others
                     fStringBuffer.append(' ');
                     skipSpace = true;
                 }
             }
             else if (c == quote) {
                 if (skipSpace) {
                     // if we finished on a space let's trim it
                     fStringBuffer.length--;
                 }
                 literal.setValues(fStringBuffer);
                 break;
             }
             else if (XMLChar.isPubid(c)) {
                 fStringBuffer.append((char)c);
                 skipSpace = false;
             }
             else if (c == -1) {
                 reportFatalError("PublicIDUnterminated", null);
                 return false;
             }
             else {
                 dataok = false;
                 reportFatalError("InvalidCharInPublicID",
                                  new Object[]{Integer.toHexString(c)});
             }
         }
         return dataok;
    }

     /**
      * Normalize whitespace in an XMLString converting all whitespace
      * characters to space characters.
      */
     protected void normalizeWhitespace(XMLString value) {
         int end = value.offset + value.length;
         for (int i = value.offset; i < end; ++i) {
             int c = value.ch[i];
             if (XMLChar.isSpace(c)) {
                 value.ch[i] = ' ';
             }
         }
     }

     /**
      * Normalize whitespace in an XMLString converting all whitespace
      * characters to space characters.
      */
     protected void normalizeWhitespace(XMLString value, int fromIndex) {
         int end = value.offset + value.length;
         for (int i = value.offset + fromIndex; i < end; ++i) {
             int c = value.ch[i];
             if (XMLChar.isSpace(c)) {
                 value.ch[i] = ' ';
             }
         }
     }

     /**
      * Checks whether this string would be unchanged by normalization.
      *
      * @return -1 if the value would be unchanged by normalization,
      * otherwise the index of the first whitespace character which
      * would be transformed.
      */
     protected int isUnchangedByNormalization(XMLString value) {
         int end = value.offset + value.length;
         for (int i = value.offset; i < end; ++i) {
             int c = value.ch[i];
             if (XMLChar.isSpace(c)) {
                 return i - value.offset;
             }
         }
         return -1;
     }

     // returns true if the given character is not
     // valid with respect to the version of
     // XML understood by this scanner.
     protected boolean isInvalid(int value) {
         return (!XML11Char.isXML11Valid(value));
     } // isInvalid(int):  boolean

     // returns true if the given character is not
     // valid or may not be used outside a character reference
     // with respect to the version of XML understood by this scanner.
     protected boolean isInvalidLiteral(int value) {
         return (!XML11Char.isXML11ValidLiteral(value));
     } // isInvalidLiteral(int):  boolean

     // returns true if the given character is
     // a valid nameChar with respect to the version of
     // XML understood by this scanner.
     protected boolean isValidNameChar(int value) {
         return (XML11Char.isXML11Name(value));
     } // isValidNameChar(int):  boolean

     // returns true if the given character is
     // a valid nameStartChar with respect to the version of
     // XML understood by this scanner.
     protected boolean isValidNameStartChar(int value) {
         return (XML11Char.isXML11NameStart(value));
     } // isValidNameStartChar(int):  boolean

     // returns true if the given character is
     // a valid NCName character with respect to the version of
     // XML understood by this scanner.
     protected boolean isValidNCName(int value) {
         return (XML11Char.isXML11NCName(value));
     } // isValidNCName(int):  boolean

     // returns true if the given character is
     // a valid high surrogate for a nameStartChar
     // with respect to the version of XML understood
     // by this scanner.
     protected boolean isValidNameStartHighSurrogate(int value) {
         return XML11Char.isXML11NameHighSurrogate(value);
     } // isValidNameStartHighSurrogate(int):  boolean

     // note that, according to 4.3.4 of the XML 1.1 spec, XML 1.1
     // documents may invoke 1.0 entities; thus either version decl (or none!)
     // is allowed to appear in this context
     protected boolean versionSupported(String version) {
         return version.equals("1.1") || version.equals ("1.0");
     } // versionSupported(String):  boolean

     // returns the error message key for unsupported
     // versions of XML with respect to the version of
     // XML understood by this scanner.
     protected String getVersionNotSupportedKey () {
         return "VersionNotSupported11";
     } // getVersionNotSupportedKey: String

 } // class XML11DTDScannerImpl
	/*
	* reserved comment block
	* DO NOT REMOVE OR ALTER!
	*/
	/*
	* The Apache Software License, Version 1.1
	*
	*
	* Copyright (c) 1999-2004 The Apache Software Foundation.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* 3. The end-user documentation included with the redistribution,
	* if any, must include the following acknowledgment:
	* "This product includes software developed by the
	* Apache Software Foundation (http://www.apache.org/)."
	* Alternately, this acknowledgment may appear in the software itself,
	* if and wherever such third-party acknowledgments normally appear.
	*
	* 4. The names "Xerces" and "Apache Software Foundation" must
	* not be used to endorse or promote products derived from this
	* software without prior written permission. For written
	* permission, please contact apache@apache.org.
	*
	* 5. Products derived from this software may not be called "Apache",
	* nor may "Apache" appear in their name, without prior written
	* permission of the Apache Software Foundation.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
	* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	* ====================================================================
	*
	* This software consists of voluntary contributions made by many
	* individuals on behalf of the Apache Software Foundation and was
	* originally based on software copyright (c) 1999, International
	* Business Machines, Inc., http://www.apache.org. For more
	* information on the Apache Software Foundation, please see
	* <http://www.apache.org/>.
	*/

	package com.sun.org.apache.xerces.internal.impl;

	import java.io.IOException;

	import com.sun.org.apache.xerces.internal.util.SymbolTable;
	import com.sun.org.apache.xerces.internal.util.XML11Char;
	import com.sun.org.apache.xerces.internal.util.XMLChar;
	import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
	import com.sun.org.apache.xerces.internal.xni.XMLString;
	import com.sun.org.apache.xerces.internal.xni.XNIException;

	/**
	* This class is responsible for scanning the declarations found
	* in the internal and external subsets of a DTD in an XML document.
	* The scanner acts as the sources for the DTD information which is
	* communicated to the DTD handlers.
	* <p>
	* This component requires the following features and properties from the
	* component manager that uses it:
	* <ul>
	* <li>http://xml.org/sax/features/validation</li>
	* <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
	* <li>http://apache.org/xml/properties/internal/symbol-table</li>
	* <li>http://apache.org/xml/properties/internal/error-reporter</li>
	* <li>http://apache.org/xml/properties/internal/entity-manager</li>
	* </ul>
	*
	* @xerces.internal
	*
	* @author Arnaud Le Hors, IBM
	* @author Andy Clark, IBM
	* @author Glenn Marcy, IBM
	* @author Eric Ye, IBM
	*
	*/
	public class XML11DTDScannerImpl
	extends XMLDTDScannerImpl {

	/** Array of 3 strings. */
	private String[] fStrings = new String[3];

	/** String. */
	private XMLString fString = new XMLString();

	/** String buffer. */
	private XMLStringBuffer fStringBuffer = new XMLStringBuffer();

	/** String buffer. */
	private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
	private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();

	//
	// Constructors
	//

	/** Default constructor. */
	public XML11DTDScannerImpl() {super();} // <init>()

	/** Constructor for he use of non-XMLComponentManagers. */
	public XML11DTDScannerImpl(SymbolTable symbolTable,
	XMLErrorReporter errorReporter, XMLEntityManager entityManager) {
	super(symbolTable, errorReporter, entityManager);
	}

	//
	// XMLDTDScanner methods
	//

	//
	// XMLScanner methods
	//
	// NOTE: this is a carbon copy of the code in XML11DocumentScannerImpl;
	// we need to override these methods in both places. Ah for
	// multiple inheritance...
	// This needs to be refactored!!! - NG
	/**
	* Scans public ID literal.
	*
	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
	* [13] PubidChar::= #x20 \| #xD \| #xA \| [a-zA-Z0-9] \| [-'()+,./:=?;!*#@$_%]
	*
	* The returned string is normalized according to the following rule,
	* from http://www.w3.org/TR/REC-xml#dt-pubid:
	*
	* Before a match is attempted, all strings of white space in the public
	* identifier must be normalized to single space characters (#x20), and
	* leading and trailing white space must be removed.
	*
	* @param literal The string to fill in with the public ID literal.
	* @return True on success.
	*
	* <strong>Note:</strong> This method uses fStringBuffer, anything in it at
	* the time of calling is lost.
	*/
	protected boolean scanPubidLiteral(XMLString literal)
	throws IOException, XNIException
	{
	int quote = fEntityScanner.scanChar();
	if (quote != '\'' && quote != '"') {
	reportFatalError("QuoteRequiredInPublicID", null);
	return false;
	}

	fStringBuffer.clear();
	// skip leading whitespace
	boolean skipSpace = true;
	boolean dataok = true;
	while (true) {
	int c = fEntityScanner.scanChar();
	// REVISIT: it could really only be \n or 0x20; all else is normalized, no? - neilg
	if (c == ' ' \|\| c == '\n' \|\| c == '\r' \|\| c == 0x85 \|\| c == 0x2028) {
	if (!skipSpace) {
	// take the first whitespace as a space and skip the others
	fStringBuffer.append(' ');
	skipSpace = true;
	}
	}
	else if (c == quote) {
	if (skipSpace) {
	// if we finished on a space let's trim it
	fStringBuffer.length--;
	}
	literal.setValues(fStringBuffer);
	break;
	}
	else if (XMLChar.isPubid(c)) {
	fStringBuffer.append((char)c);
	skipSpace = false;
	}
	else if (c == -1) {
	reportFatalError("PublicIDUnterminated", null);
	return false;
	}
	else {
	dataok = false;
	reportFatalError("InvalidCharInPublicID",
	new Object[]{Integer.toHexString(c)});
	}
	}
	return dataok;
	}

	/**
	* Normalize whitespace in an XMLString converting all whitespace
	* characters to space characters.
	*/
	protected void normalizeWhitespace(XMLString value) {
	int end = value.offset + value.length;
	for (int i = value.offset; i < end; ++i) {
	int c = value.ch[i];
	if (XMLChar.isSpace(c)) {
	value.ch[i] = ' ';
	}
	}
	}

	/**
	* Normalize whitespace in an XMLString converting all whitespace
	* characters to space characters.
	*/
	protected void normalizeWhitespace(XMLString value, int fromIndex) {
	int end = value.offset + value.length;
	for (int i = value.offset + fromIndex; i < end; ++i) {
	int c = value.ch[i];
	if (XMLChar.isSpace(c)) {
	value.ch[i] = ' ';
	}
	}
	}

	/**
	* Checks whether this string would be unchanged by normalization.
	*
	* @return -1 if the value would be unchanged by normalization,
	* otherwise the index of the first whitespace character which
	* would be transformed.
	*/
	protected int isUnchangedByNormalization(XMLString value) {
	int end = value.offset + value.length;
	for (int i = value.offset; i < end; ++i) {
	int c = value.ch[i];
	if (XMLChar.isSpace(c)) {
	return i - value.offset;
	}
	}
	return -1;
	}

	// returns true if the given character is not
	// valid with respect to the version of
	// XML understood by this scanner.
	protected boolean isInvalid(int value) {
	return (!XML11Char.isXML11Valid(value));
	} // isInvalid(int): boolean

	// returns true if the given character is not
	// valid or may not be used outside a character reference
	// with respect to the version of XML understood by this scanner.
	protected boolean isInvalidLiteral(int value) {
	return (!XML11Char.isXML11ValidLiteral(value));
	} // isInvalidLiteral(int): boolean

	// returns true if the given character is
	// a valid nameChar with respect to the version of
	// XML understood by this scanner.
	protected boolean isValidNameChar(int value) {
	return (XML11Char.isXML11Name(value));
	} // isValidNameChar(int): boolean

	// returns true if the given character is
	// a valid nameStartChar with respect to the version of
	// XML understood by this scanner.
	protected boolean isValidNameStartChar(int value) {
	return (XML11Char.isXML11NameStart(value));
	} // isValidNameStartChar(int): boolean

	// returns true if the given character is
	// a valid NCName character with respect to the version of
	// XML understood by this scanner.
	protected boolean isValidNCName(int value) {
	return (XML11Char.isXML11NCName(value));
	} // isValidNCName(int): boolean

	// returns true if the given character is
	// a valid high surrogate for a nameStartChar
	// with respect to the version of XML understood
	// by this scanner.
	protected boolean isValidNameStartHighSurrogate(int value) {
	return XML11Char.isXML11NameHighSurrogate(value);
	} // isValidNameStartHighSurrogate(int): boolean

	// note that, according to 4.3.4 of the XML 1.1 spec, XML 1.1
	// documents may invoke 1.0 entities; thus either version decl (or none!)
	// is allowed to appear in this context
	protected boolean versionSupported(String version) {
	return version.equals("1.1") \|\| version.equals ("1.0");
	} // versionSupported(String): boolean

	// returns the error message key for unsupported
	// versions of XML with respect to the version of
	// XML understood by this scanner.
	protected String getVersionNotSupportedKey () {
	return "VersionNotSupported11";
	} // getVersionNotSupportedKey: String

	} // class XML11DTDScannerImpl