| /* |
| * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. |
| */ |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.sun.org.apache.xerces.internal.impl; |
| |
| import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; |
| import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; |
| import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; |
| import com.sun.org.apache.xerces.internal.util.XMLChar; |
| import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; |
| import com.sun.org.apache.xerces.internal.util.XMLSymbols; |
| import com.sun.org.apache.xerces.internal.xni.QName; |
| import com.sun.org.apache.xerces.internal.xni.XMLAttributes; |
| import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; |
| import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; |
| import com.sun.org.apache.xerces.internal.xni.XMLString; |
| import com.sun.org.apache.xerces.internal.xni.XNIException; |
| import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; |
| import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; |
| import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; |
| import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; |
| import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; |
| import com.sun.org.apache.xerces.internal.xni.Augmentations; |
| import com.sun.org.apache.xerces.internal.utils.SecuritySupport; |
| import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; |
| import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; |
| import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; |
| import com.sun.xml.internal.stream.XMLBufferListener; |
| import com.sun.xml.internal.stream.XMLEntityStorage; |
| import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; |
| import java.io.EOFException; |
| import java.io.IOException; |
| import javax.xml.XMLConstants; |
| import javax.xml.stream.XMLInputFactory; |
| import javax.xml.stream.XMLStreamConstants; |
| import javax.xml.stream.events.XMLEvent; |
| import jdk.xml.internal.JdkXmlUtils; |
| |
| /** |
| * |
| * This class is responsible for scanning the structure and content |
| * of document fragments. |
| * |
| * This class has been modified as per the new design which is more suited to |
| * efficiently build pull parser. Lot of improvements have been done and |
| * the code has been added to support stax functionality/features. |
| * |
| * @author Neeraj Bajaj SUN Microsystems |
| * @author K.Venugopal SUN Microsystems |
| * @author Glenn Marcy, IBM |
| * @author Andy Clark, IBM |
| * @author Arnaud Le Hors, IBM |
| * @author Eric Ye, IBM |
| * @author Sunitha Reddy, SUN Microsystems |
| * |
| */ |
| public class XMLDocumentFragmentScannerImpl |
| extends XMLScanner |
| implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { |
| |
| // |
| // Constants |
| // |
| |
| protected int fElementAttributeLimit, fXMLNameLimit; |
| |
| /** External subset resolver. **/ |
| protected ExternalSubsetResolver fExternalSubsetResolver; |
| |
| // scanner states |
| |
| //XXX this should be divided into more states. |
| /** Scanner state: start of markup. */ |
| protected static final int SCANNER_STATE_START_OF_MARKUP = 21; |
| |
| /** Scanner state: content. */ |
| protected static final int SCANNER_STATE_CONTENT = 22; |
| |
| /** Scanner state: processing instruction. */ |
| protected static final int SCANNER_STATE_PI = 23; |
| |
| /** Scanner state: DOCTYPE. */ |
| protected static final int SCANNER_STATE_DOCTYPE = 24; |
| |
| /** Scanner state: XML Declaration */ |
| protected static final int SCANNER_STATE_XML_DECL = 25; |
| |
| /** Scanner state: root element. */ |
| protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; |
| |
| /** Scanner state: comment. */ |
| protected static final int SCANNER_STATE_COMMENT = 27; |
| |
| /** Scanner state: reference. */ |
| protected static final int SCANNER_STATE_REFERENCE = 28; |
| |
| // <book type="hard"> reading attribute name 'type' |
| protected static final int SCANNER_STATE_ATTRIBUTE = 29; |
| |
| // <book type="hard"> //reading attribute value. |
| protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; |
| |
| /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ |
| //protected static final int SCANNER_STATE_TRAILING_MISC = 32; |
| |
| /** Scanner state: end of input. */ |
| protected static final int SCANNER_STATE_END_OF_INPUT = 33; |
| |
| /** Scanner state: terminated. */ |
| protected static final int SCANNER_STATE_TERMINATED = 34; |
| |
| /** Scanner state: CDATA section. */ |
| protected static final int SCANNER_STATE_CDATA = 35; |
| |
| /** Scanner state: Text declaration. */ |
| protected static final int SCANNER_STATE_TEXT_DECL = 36; |
| |
| /** Scanner state: Text declaration. */ |
| protected static final int SCANNER_STATE_CHARACTER_DATA = 37; |
| |
| //<book type="hard">foo</book> |
| protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; |
| |
| //<book type="hard">foo</book> reading </book> |
| protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; |
| |
| protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; |
| protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; |
| |
| // feature identifiers |
| |
| |
| /** Feature identifier: notify built-in refereces. */ |
| protected static final String NOTIFY_BUILTIN_REFS = |
| Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; |
| |
| /** Property identifier: entity resolver. */ |
| protected static final String ENTITY_RESOLVER = |
| Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; |
| |
| /** Feature identifier: standard uri conformant */ |
| protected static final String STANDARD_URI_CONFORMANT = |
| Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; |
| |
| /** Property identifier: Security property manager. */ |
| private static final String XML_SECURITY_PROPERTY_MANAGER = |
| Constants.XML_SECURITY_PROPERTY_MANAGER; |
| |
| /** access external dtd: file protocol |
| * For DOM/SAX, the secure feature is set to true by default |
| */ |
| final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; |
| |
| // recognized features and properties |
| |
| /** Recognized features. */ |
| private static final String[] RECOGNIZED_FEATURES = { |
| NAMESPACES, |
| VALIDATION, |
| NOTIFY_BUILTIN_REFS, |
| NOTIFY_CHAR_REFS, |
| Constants.STAX_REPORT_CDATA_EVENT, |
| XMLConstants.USE_CATALOG |
| }; |
| |
| /** Feature defaults. */ |
| private static final Boolean[] FEATURE_DEFAULTS = { |
| Boolean.TRUE, |
| null, |
| Boolean.FALSE, |
| Boolean.FALSE, |
| Boolean.TRUE, |
| JdkXmlUtils.USE_CATALOG_DEFAULT |
| }; |
| |
| /** Recognized properties. */ |
| private static final String[] RECOGNIZED_PROPERTIES = { |
| SYMBOL_TABLE, |
| ERROR_REPORTER, |
| ENTITY_MANAGER, |
| XML_SECURITY_PROPERTY_MANAGER, |
| JdkXmlUtils.CATALOG_DEFER, |
| JdkXmlUtils.CATALOG_FILES, |
| JdkXmlUtils.CATALOG_PREFER, |
| JdkXmlUtils.CATALOG_RESOLVE |
| }; |
| |
| /** Property defaults. */ |
| private static final Object[] PROPERTY_DEFAULTS = { |
| null, |
| null, |
| null, |
| null, |
| null, |
| null, |
| null, |
| null |
| }; |
| |
| |
| private static final char [] cdata = {'[','C','D','A','T','A','['}; |
| static final char [] xmlDecl = {'<','?','x','m','l'}; |
| // private static final char [] endTag = {'<','/'}; |
| // debugging |
| |
| /** Debug scanner state. */ |
| private static final boolean DEBUG_SCANNER_STATE = false; |
| |
| /** Debug driver. */ |
| private static final boolean DEBUG_DISPATCHER = false; |
| |
| /** Debug content driver scanning. */ |
| protected static final boolean DEBUG_START_END_ELEMENT = false; |
| |
| |
| /** Debug driver next */ |
| protected static final boolean DEBUG_NEXT = false ; |
| |
| /** Debug driver next */ |
| protected static final boolean DEBUG = false; |
| protected static final boolean DEBUG_COALESCE = false; |
| // |
| // Data |
| // |
| |
| // protected data |
| |
| /** Document handler. */ |
| protected XMLDocumentHandler fDocumentHandler; |
| protected int fScannerLastState ; |
| |
| /** Entity Storage */ |
| protected XMLEntityStorage fEntityStore; |
| |
| /** Entity stack. */ |
| protected int[] fEntityStack = new int[4]; |
| |
| /** Markup depth. */ |
| protected int fMarkupDepth; |
| |
| //is the element empty |
| protected boolean fEmptyElement ; |
| |
| //track if we are reading attributes, this is usefule while |
| //there is a callback |
| protected boolean fReadingAttributes = false; |
| |
| /** Scanner state. */ |
| protected int fScannerState; |
| |
| /** SubScanner state: inside scanContent method. */ |
| protected boolean fInScanContent = false; |
| protected boolean fLastSectionWasCData = false; |
| protected boolean fLastSectionWasEntityReference = false; |
| protected boolean fLastSectionWasCharacterData = false; |
| |
| /** has external dtd */ |
| protected boolean fHasExternalDTD; |
| |
| /** Standalone. */ |
| protected boolean fStandaloneSet; |
| protected boolean fStandalone; |
| protected String fVersion; |
| |
| // element information |
| |
| /** Current element. */ |
| protected QName fCurrentElement; |
| |
| /** Element stack. */ |
| protected ElementStack fElementStack = new ElementStack(); |
| protected ElementStack2 fElementStack2 = new ElementStack2(); |
| |
| // other info |
| |
| /** Document system identifier. |
| * REVISIT: So what's this used for? - NG |
| * protected String fDocumentSystemId; |
| ******/ |
| |
| protected String fPITarget ; |
| |
| //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values |
| protected XMLString fPIData = new XMLString(); |
| |
| // features |
| |
| |
| /** Notify built-in references. */ |
| protected boolean fNotifyBuiltInRefs = false; |
| |
| //STAX related properties |
| //defaultValues. |
| protected boolean fSupportDTD = true; |
| protected boolean fReplaceEntityReferences = true; |
| protected boolean fSupportExternalEntities = false; |
| protected boolean fReportCdataEvent = false ; |
| protected boolean fIsCoalesce = false ; |
| protected String fDeclaredEncoding = null; |
| /** Xerces Feature: Disallow doctype declaration. */ |
| protected boolean fDisallowDoctype = false; |
| |
| /** |
| * comma-delimited list of protocols that are allowed for the purpose |
| * of accessing external dtd or entity references |
| */ |
| protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; |
| |
| /** |
| * standard uri conformant (strict uri). |
| * http://apache.org/xml/features/standard-uri-conformant |
| */ |
| protected boolean fStrictURI; |
| |
| // drivers |
| |
| /** Active driver. */ |
| protected Driver fDriver; |
| |
| /** Content driver. */ |
| protected Driver fContentDriver = createContentDriver(); |
| |
| // temporary variables |
| |
| /** Element QName. */ |
| protected QName fElementQName = new QName(); |
| |
| /** Attribute QName. */ |
| protected QName fAttributeQName = new QName(); |
| |
| /** |
| * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class |
| * implements Iterator interface so we can directly give Attributes in the form of |
| * iterator. |
| */ |
| protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); |
| |
| |
| /** String. */ |
| protected XMLString fTempString = new XMLString(); |
| |
| /** String. */ |
| protected XMLString fTempString2 = new XMLString(); |
| |
| /** Array of 3 strings. */ |
| private String[] fStrings = new String[3]; |
| |
| /** Making the buffer accesible to derived class -- String buffer. */ |
| protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); |
| |
| /** Making the buffer accesible to derived class -- String buffer. */ |
| protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); |
| |
| /** stores character data. */ |
| /** Making the buffer accesible to derived class -- stores PI data */ |
| protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); |
| |
| /** Single character array. */ |
| private final char[] fSingleChar = new char[1]; |
| private String fCurrentEntityName = null; |
| |
| // New members |
| protected boolean fScanToEnd = false; |
| |
| protected DTDGrammarUtil dtdGrammarUtil= null; |
| |
| protected boolean fAddDefaultAttr = false; |
| |
| protected boolean foundBuiltInRefs = false; |
| |
| /** Built-in reference character event */ |
| protected boolean builtInRefCharacterHandled = false; |
| |
| //skip element algorithm |
| static final short MAX_DEPTH_LIMIT = 5 ; |
| static final short ELEMENT_ARRAY_LENGTH = 200 ; |
| static final short MAX_POINTER_AT_A_DEPTH = 4 ; |
| static final boolean DEBUG_SKIP_ALGORITHM = false; |
| //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH |
| String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; |
| //pointer location where last element was skipped |
| short fLastPointerLocation = 0 ; |
| short fElementPointer = 0 ; |
| //2D array to store pointer info |
| short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; |
| protected String fElementRawname ; |
| protected boolean fShouldSkip = false; |
| protected boolean fAdd = false ; |
| protected boolean fSkip = false; |
| |
| /** Reusable Augmentations. */ |
| private Augmentations fTempAugmentations = null; |
| // |
| // Constructors |
| // |
| |
| /** Default constructor. */ |
| public XMLDocumentFragmentScannerImpl() { |
| } // <init>() |
| |
| // |
| // XMLDocumentScanner methods |
| // |
| |
| /** |
| * Sets the input source. |
| * |
| * @param inputSource The input source. |
| * |
| * @throws IOException Thrown on i/o error. |
| */ |
| public void setInputSource(XMLInputSource inputSource) throws IOException { |
| fEntityManager.setEntityHandler(this); |
| fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); |
| // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); |
| } // setInputSource(XMLInputSource) |
| |
| /** |
| * Scans a document. |
| * |
| * @param complete True if the scanner should scan the document |
| * completely, pushing all events to the registered |
| * document handler. A value of false indicates that |
| * that the scanner should only scan the next portion |
| * of the document and return. A scanner instance is |
| * permitted to completely scan a document if it does |
| * not support this "pull" scanning model. |
| * |
| * @return True if there is more to scan, false otherwise. |
| */ |
| public boolean scanDocument(boolean complete) |
| throws IOException, XNIException { |
| |
| // keep dispatching "events" |
| fEntityManager.setEntityHandler(this); |
| //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); |
| |
| int event = next(); |
| do { |
| switch (event) { |
| case XMLStreamConstants.START_DOCUMENT : |
| //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get |
| break; |
| case XMLStreamConstants.START_ELEMENT : |
| //System.out.println(" in scann element"); |
| //fDocumentHandler.startElement(getElementQName(),fAttributes,null); |
| break; |
| case XMLStreamConstants.CHARACTERS : |
| fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); |
| fDocumentHandler.characters(getCharacterData(),null); |
| break; |
| case XMLStreamConstants.SPACE: |
| //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. |
| //System.out.println("in the space"); |
| //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); |
| break; |
| case XMLStreamConstants.ENTITY_REFERENCE : |
| fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); |
| //entity reference callback are given in startEntity |
| break; |
| case XMLStreamConstants.PROCESSING_INSTRUCTION : |
| fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); |
| fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); |
| break; |
| case XMLStreamConstants.COMMENT : |
| fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); |
| fDocumentHandler.comment(getCharacterData(),null); |
| break; |
| case XMLStreamConstants.DTD : |
| //all DTD related callbacks are handled in DTDScanner. |
| //1. Stax doesn't define DTD states as it does for XML Document. |
| //therefore we don't need to take care of anything here. So Just break; |
| break; |
| case XMLStreamConstants.CDATA: |
| fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); |
| fDocumentHandler.startCDATA(null); |
| //xxx: check if CDATA values comes from getCharacterData() function |
| fDocumentHandler.characters(getCharacterData(),null); |
| fDocumentHandler.endCDATA(null); |
| //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); |
| break; |
| case XMLStreamConstants.NOTATION_DECLARATION : |
| break; |
| case XMLStreamConstants.ENTITY_DECLARATION : |
| break; |
| case XMLStreamConstants.NAMESPACE : |
| break; |
| case XMLStreamConstants.ATTRIBUTE : |
| break; |
| case XMLStreamConstants.END_ELEMENT : |
| //do not give callback here. |
| //this callback is given in scanEndElement function. |
| //fDocumentHandler.endElement(getElementQName(),null); |
| break; |
| default : |
| throw new InternalError("processing event: " + event); |
| |
| } |
| //System.out.println("here in before calling next"); |
| event = next(); |
| //System.out.println("here in after calling next"); |
| } while (event!=XMLStreamConstants.END_DOCUMENT && complete); |
| |
| if(event == XMLStreamConstants.END_DOCUMENT) { |
| fDocumentHandler.endDocument(null); |
| return false; |
| } |
| |
| return true; |
| |
| } // scanDocument(boolean):boolean |
| |
| |
| |
| public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ |
| if(fScannerLastState == XMLEvent.END_ELEMENT){ |
| fElementQName.setValues(fElementStack.getLastPoppedElement()); |
| } |
| return fElementQName ; |
| } |
| |
| /** return the next state on the input |
| * @return int |
| */ |
| |
| public int next() throws IOException, XNIException { |
| return fDriver.next(); |
| } |
| |
| // |
| // XMLComponent methods |
| // |
| |
| /** |
| * Resets the component. The component can query the component manager |
| * about any features and properties that affect the operation of the |
| * component. |
| * |
| * @param componentManager The component manager. |
| * |
| * @throws SAXException Thrown by component on initialization error. |
| * For example, if a feature or property is |
| * required for the operation of the component, the |
| * component manager may throw a |
| * SAXNotRecognizedException or a |
| * SAXNotSupportedException. |
| */ |
| |
| public void reset(XMLComponentManager componentManager) |
| throws XMLConfigurationException { |
| |
| super.reset(componentManager); |
| |
| // other settings |
| // fDocumentSystemId = null; |
| |
| // sax features |
| //fAttributes.setNamespaces(fNamespaces); |
| |
| // xerces features |
| fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); |
| fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); |
| fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); |
| |
| Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); |
| fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? |
| (ExternalSubsetResolver) resolver : null; |
| |
| //attribute |
| fReadingAttributes = false; |
| //xxx: external entities are supported in Xerces |
| // it would be good to define feature for this case |
| fSupportExternalEntities = true; |
| fReplaceEntityReferences = true; |
| fIsCoalesce = false; |
| |
| // setup Driver |
| setScannerState(SCANNER_STATE_CONTENT); |
| setDriver(fContentDriver); |
| |
| // JAXP 1.5 features and properties |
| XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) |
| componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); |
| fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); |
| |
| fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); |
| |
| resetCommon(); |
| //fEntityManager.test(); |
| } // reset(XMLComponentManager) |
| |
| |
| public void reset(PropertyManager propertyManager){ |
| |
| super.reset(propertyManager); |
| |
| // other settings |
| // fDocumentSystemId = null; |
| fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); |
| fNotifyBuiltInRefs = false ; |
| |
| //fElementStack2.clear(); |
| //fReplaceEntityReferences = true; |
| //fSupportExternalEntities = true; |
| Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); |
| fReplaceEntityReferences = bo.booleanValue(); |
| bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); |
| fSupportExternalEntities = bo.booleanValue(); |
| Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; |
| if(cdata != null) |
| fReportCdataEvent = cdata.booleanValue() ; |
| Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; |
| if(coalesce != null) |
| fIsCoalesce = coalesce.booleanValue(); |
| fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; |
| //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, |
| //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application |
| fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; |
| // setup Driver |
| //we dont need to do this -- nb. |
| //setScannerState(SCANNER_STATE_CONTENT); |
| //setDriver(fContentDriver); |
| //fEntityManager.test(); |
| |
| // JAXP 1.5 features and properties |
| XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) |
| propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); |
| fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); |
| |
| fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); |
| resetCommon(); |
| } // reset(XMLComponentManager) |
| |
| void resetCommon() { |
| // initialize vars |
| fMarkupDepth = 0; |
| fCurrentElement = null; |
| fElementStack.clear(); |
| fHasExternalDTD = false; |
| fStandaloneSet = false; |
| fStandalone = false; |
| fInScanContent = false; |
| //skipping algorithm |
| fShouldSkip = false; |
| fAdd = false; |
| fSkip = false; |
| |
| fEntityStore = fEntityManager.getEntityStore(); |
| dtdGrammarUtil = null; |
| |
| if (fSecurityManager != null) { |
| fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); |
| fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); |
| } else { |
| fElementAttributeLimit = 0; |
| fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); |
| } |
| fLimitAnalyzer = fEntityManager.fLimitAnalyzer; |
| } |
| |
| /** |
| * Returns a list of feature identifiers that are recognized by |
| * this component. This method may return null if no features |
| * are recognized by this component. |
| */ |
| public String[] getRecognizedFeatures() { |
| return (String[])(RECOGNIZED_FEATURES.clone()); |
| } // getRecognizedFeatures():String[] |
| |
| /** |
| * Sets the state of a feature. This method is called by the component |
| * manager any time after reset when a feature changes state. |
| * <p> |
| * <strong>Note:</strong> Components should silently ignore features |
| * that do not affect the operation of the component. |
| * |
| * @param featureId The feature identifier. |
| * @param state The state of the feature. |
| * |
| * @throws SAXNotRecognizedException The component should not throw |
| * this exception. |
| * @throws SAXNotSupportedException The component should not throw |
| * this exception. |
| */ |
| public void setFeature(String featureId, boolean state) |
| throws XMLConfigurationException { |
| |
| super.setFeature(featureId, state); |
| |
| // Xerces properties |
| if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { |
| String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); |
| if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { |
| fNotifyBuiltInRefs = state; |
| } |
| } |
| |
| } // setFeature(String,boolean) |
| |
| /** |
| * Returns a list of property identifiers that are recognized by |
| * this component. This method may return null if no properties |
| * are recognized by this component. |
| */ |
| public String[] getRecognizedProperties() { |
| return (String[])(RECOGNIZED_PROPERTIES.clone()); |
| } // getRecognizedProperties():String[] |
| |
| /** |
| * Sets the value of a property. This method is called by the component |
| * manager any time after reset when a property changes value. |
| * <p> |
| * <strong>Note:</strong> Components should silently ignore properties |
| * that do not affect the operation of the component. |
| * |
| * @param propertyId The property identifier. |
| * @param value The value of the property. |
| * |
| * @throws SAXNotRecognizedException The component should not throw |
| * this exception. |
| * @throws SAXNotSupportedException The component should not throw |
| * this exception. |
| */ |
| public void setProperty(String propertyId, Object value) |
| throws XMLConfigurationException { |
| |
| super.setProperty(propertyId, value); |
| |
| // Xerces properties |
| if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { |
| final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); |
| if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && |
| propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { |
| fEntityManager = (XMLEntityManager)value; |
| return; |
| } |
| if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && |
| propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { |
| fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? |
| (ExternalSubsetResolver) value : null; |
| return; |
| } |
| } |
| |
| |
| // Xerces properties |
| if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { |
| String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); |
| if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { |
| fEntityManager = (XMLEntityManager)value; |
| } |
| return; |
| } |
| |
| //JAXP 1.5 properties |
| if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) |
| { |
| XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; |
| fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); |
| } |
| |
| } // setProperty(String,Object) |
| |
| /** |
| * Returns the default state for a feature, or null if this |
| * component does not want to report a default value for this |
| * feature. |
| * |
| * @param featureId The feature identifier. |
| * |
| * @since Xerces 2.2.0 |
| */ |
| public Boolean getFeatureDefault(String featureId) { |
| for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { |
| if (RECOGNIZED_FEATURES[i].equals(featureId)) { |
| return FEATURE_DEFAULTS[i]; |
| } |
| } |
| return null; |
| } // getFeatureDefault(String):Boolean |
| |
| /** |
| * Returns the default state for a property, or null if this |
| * component does not want to report a default value for this |
| * property. |
| * |
| * @param propertyId The property identifier. |
| * |
| * @since Xerces 2.2.0 |
| */ |
| public Object getPropertyDefault(String propertyId) { |
| for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { |
| if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { |
| return PROPERTY_DEFAULTS[i]; |
| } |
| } |
| return null; |
| } // getPropertyDefault(String):Object |
| |
| // |
| // XMLDocumentSource methods |
| // |
| |
| /** |
| * setDocumentHandler |
| * |
| * @param documentHandler |
| */ |
| public void setDocumentHandler(XMLDocumentHandler documentHandler) { |
| fDocumentHandler = documentHandler; |
| //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); |
| } // setDocumentHandler(XMLDocumentHandler) |
| |
| |
| /** Returns the document handler */ |
| public XMLDocumentHandler getDocumentHandler(){ |
| return fDocumentHandler; |
| } |
| |
| // |
| // XMLEntityHandler methods |
| // |
| |
| /** |
| * This method notifies of the start of an entity. The DTD has the |
| * pseudo-name of "[dtd]" parameter entity names start with '%'; and |
| * general entities are just specified by their name. |
| * |
| * @param name The name of the entity. |
| * @param identifier The resource identifier. |
| * @param encoding The auto-detected IANA encoding name of the entity |
| * stream. This value will be null in those situations |
| * where the entity encoding is not auto-detected (e.g. |
| * internal entities or a document entity that is |
| * parsed from a java.io.Reader). |
| * @param augs Additional information that may include infoset augmentations |
| * |
| * @throws XNIException Thrown by handler to signal an error. |
| */ |
| public void startEntity(String name, |
| XMLResourceIdentifier identifier, |
| String encoding, Augmentations augs) throws XNIException { |
| |
| // keep track of this entity before fEntityDepth is increased |
| if (fEntityDepth == fEntityStack.length) { |
| int[] entityarray = new int[fEntityStack.length * 2]; |
| System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); |
| fEntityStack = entityarray; |
| } |
| fEntityStack[fEntityDepth] = fMarkupDepth; |
| |
| super.startEntity(name, identifier, encoding, augs); |
| |
| // WFC: entity declared in external subset in standalone doc |
| if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { |
| reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", |
| new Object[]{name}); |
| } |
| |
| /** we are not calling the handlers yet.. */ |
| // call handler |
| if (fDocumentHandler != null && !fScanningAttribute) { |
| if (!name.equals("[xml]")) { |
| fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); |
| } |
| } |
| |
| } // startEntity(String,XMLResourceIdentifier,String) |
| |
| /** |
| * This method notifies the end of an entity. The DTD has the pseudo-name |
| * of "[dtd]" parameter entity names start with '%'; and general entities |
| * are just specified by their name. |
| * |
| * @param name The name of the entity. |
| * @param augs Additional information that may include infoset augmentations |
| * |
| * @throws XNIException Thrown by handler to signal an error. |
| */ |
| public void endEntity(String name, Augmentations augs) throws IOException, XNIException { |
| |
| /** |
| * // flush possible pending output buffer - see scanContent |
| * if (fInScanContent && fStringBuffer.length != 0 |
| * && fDocumentHandler != null) { |
| * fDocumentHandler.characters(fStringBuffer, null); |
| * fStringBuffer.length = 0; // make sure we know it's been flushed |
| * } |
| */ |
| super.endEntity(name, augs); |
| |
| // make sure markup is properly balanced |
| if (fMarkupDepth != fEntityStack[fEntityDepth]) { |
| reportFatalError("MarkupEntityMismatch", null); |
| } |
| |
| /**/ |
| // call handler |
| if (fDocumentHandler != null && !fScanningAttribute) { |
| if (!name.equals("[xml]")) { |
| fDocumentHandler.endGeneralEntity(name, augs); |
| } |
| } |
| |
| |
| } // endEntity(String) |
| |
| // |
| // Protected methods |
| // |
| |
| // Driver factory methods |
| |
| /** Creates a content Driver. */ |
| protected Driver createContentDriver() { |
| return new FragmentContentDriver(); |
| } // createContentDriver():Driver |
| |
| // scanning methods |
| |
| /** |
| * Scans an XML or text declaration. |
| * <p> |
| * <pre> |
| * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' |
| * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") |
| * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) |
| * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* |
| * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") |
| * | ('"' ('yes' | 'no') '"')) |
| * |
| * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
| * </pre> |
| * |
| * @param scanningTextDecl True if a text declaration is to |
| * be scanned instead of an XML |
| * declaration. |
| */ |
| protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) |
| throws IOException, XNIException { |
| |
| // scan decl |
| super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); |
| fMarkupDepth--; |
| |
| // pseudo-attribute values |
| String version = fStrings[0]; |
| String encoding = fStrings[1]; |
| String standalone = fStrings[2]; |
| fDeclaredEncoding = encoding; |
| // set standalone |
| fStandaloneSet = standalone != null; |
| fStandalone = fStandaloneSet && standalone.equals("yes"); |
| ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information |
| //but this information is only related with Document Entity. |
| fEntityManager.setStandalone(fStandalone); |
| |
| |
| // call handler |
| if (fDocumentHandler != null) { |
| if (scanningTextDecl) { |
| fDocumentHandler.textDecl(version, encoding, null); |
| } else { |
| fDocumentHandler.xmlDecl(version, encoding, standalone, null); |
| } |
| } |
| |
| if(version != null){ |
| fEntityScanner.setVersion(version); |
| fEntityScanner.setXMLVersion(version); |
| } |
| // set encoding on reader, only if encoding was not specified by the application explicitly |
| if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { |
| fEntityScanner.setEncoding(encoding); |
| } |
| |
| } // scanXMLDeclOrTextDecl(boolean) |
| |
| public String getPITarget(){ |
| return fPITarget ; |
| } |
| |
| public XMLStringBuffer getPIData(){ |
| return fContentBuffer ; |
| } |
| |
| //XXX: why not this function behave as per the state of the parser? |
| public XMLString getCharacterData(){ |
| if(fUsebuffer){ |
| return fContentBuffer ; |
| }else{ |
| return fTempString; |
| } |
| |
| } |
| |
| |
| /** |
| * Scans a processing data. This is needed to handle the situation |
| * where a document starts with a processing instruction whose |
| * target name <em>starts with</em> "xml". (e.g. xmlfoo) |
| * |
| * @param target The PI target |
| * @param data The XMLStringBuffer to fill in with the data |
| */ |
| protected void scanPIData(String target, XMLStringBuffer data) |
| throws IOException, XNIException { |
| |
| super.scanPIData(target, data); |
| |
| //set the PI target and values |
| fPITarget = target ; |
| |
| fMarkupDepth--; |
| |
| } // scanPIData(String) |
| |
| /** |
| * Scans a comment. |
| * <p> |
| * <pre> |
| * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' |
| * </pre> |
| * <p> |
| * <strong>Note:</strong> Called after scanning past '<!--' |
| */ |
| protected void scanComment() throws IOException, XNIException { |
| fContentBuffer.clear(); |
| scanComment(fContentBuffer); |
| //getTextCharacters can also be called for reading comments |
| fUsebuffer = true; |
| fMarkupDepth--; |
| |
| } // scanComment() |
| |
| //xxx value returned by this function may not remain valid if another event is scanned. |
| public String getComment(){ |
| return fContentBuffer.toString(); |
| } |
| |
| void addElement(String rawname){ |
| if(fElementPointer < ELEMENT_ARRAY_LENGTH){ |
| //storing element raw name in a linear list of array |
| fElementArray[fElementPointer] = rawname ; |
| //storing elemnetPointer for particular element depth |
| |
| if(DEBUG_SKIP_ALGORITHM){ |
| StringBuffer sb = new StringBuffer() ; |
| sb.append(" Storing element information ") ; |
| sb.append(" fElementPointer = " + fElementPointer) ; |
| sb.append(" fElementRawname = " + fElementQName.rawname) ; |
| sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); |
| System.out.println(sb.toString()) ; |
| } |
| |
| //store pointer information only when element depth is less MAX_DEPTH_LIMIT |
| if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ |
| short column = storePointerForADepth(fElementPointer); |
| if(column > 0){ |
| short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); |
| //identity comparison shouldn't take much time and we can rely on this |
| //since its guaranteed to have same object id for same string. |
| if(rawname == fElementArray[pointer]){ |
| fShouldSkip = true ; |
| fLastPointerLocation = pointer ; |
| //reset the things and return. |
| resetPointer((short)fElementStack.fDepth , column) ; |
| fElementArray[fElementPointer] = null ; |
| return ; |
| }else{ |
| fShouldSkip = false ; |
| } |
| } |
| } |
| fElementPointer++ ; |
| } |
| } |
| |
| |
| void resetPointer(short depth, short column){ |
| fPointerInfo[depth] [column] = (short)0; |
| } |
| |
| //returns column information at which pointer was stored. |
| short storePointerForADepth(short elementPointer){ |
| short depth = (short) fElementStack.fDepth ; |
| |
| //Stores element pointer locations at particular depth , only 4 pointer locations |
| //are stored at particular depth for now. |
| for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ |
| |
| if(canStore(depth, i)){ |
| fPointerInfo[depth][i] = elementPointer ; |
| if(DEBUG_SKIP_ALGORITHM){ |
| StringBuffer sb = new StringBuffer() ; |
| sb.append(" Pointer information ") ; |
| sb.append(" fElementPointer = " + fElementPointer) ; |
| sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); |
| sb.append(" column = " + i ) ; |
| System.out.println(sb.toString()) ; |
| } |
| return i; |
| } |
| //else |
| //pointer was not stored because we reached the limit |
| } |
| return -1 ; |
| } |
| |
| boolean canStore(short depth, short column){ |
| //colum = 0 , means first element at particular depth |
| //column = 1, means second element at particular depth |
| // calle should make sure that it doesn't call for value outside allowed co-ordinates |
| return fPointerInfo[depth][column] == 0 ? true : false ; |
| } |
| |
| |
| short getElementPointer(short depth, short column){ |
| //colum = 0 , means first element at particular depth |
| //column = 1, means second element at particular depth |
| // calle should make sure that it doesn't call for value outside allowed co-ordinates |
| return fPointerInfo[depth][column] ; |
| } |
| |
| //this function assumes that string passed is not null and skips |
| //the following string from the buffer this makes sure |
| boolean skipFromTheBuffer(String rawname) throws IOException{ |
| if(fEntityScanner.skipString(rawname)){ |
| char c = (char)fEntityScanner.peekChar() ; |
| //If the start element was completely skipped we should encounter either ' '(space), |
| //or '/' (in case of empty element) or '>' |
| if( c == ' ' || c == '/' || c == '>'){ |
| fElementRawname = rawname ; |
| return true ; |
| } else{ |
| return false; |
| } |
| } else |
| return false ; |
| } |
| |
| boolean skipQElement(String rawname) throws IOException{ |
| |
| final int c = fEntityScanner.getChar(rawname.length()); |
| //if this character is still valid element name -- this means string can't match |
| if(XMLChar.isName(c)){ |
| return false; |
| }else{ |
| return fEntityScanner.skipString(rawname); |
| } |
| } |
| |
| protected boolean skipElement() throws IOException { |
| |
| if(!fShouldSkip) return false ; |
| |
| if(fLastPointerLocation != 0){ |
| //Look at the next element stored in the array list.. we might just get a match. |
| String rawname = fElementArray[fLastPointerLocation + 1] ; |
| if(rawname != null && skipFromTheBuffer(rawname)){ |
| fLastPointerLocation++ ; |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); |
| } |
| return true ; |
| } else{ |
| //reset it back to zero... we haven't got the correct subset yet. |
| fLastPointerLocation = 0 ; |
| |
| } |
| } |
| //xxx: we can put some logic here as from what column it should start looking |
| //for now we always start at 0 |
| //fallback to tolerant algorithm, it would look for differnt element stored at different |
| //depth and get us the pointer location. |
| return fShouldSkip && skipElement((short)0); |
| |
| } |
| |
| //start of the column at which it should try searching |
| boolean skipElement(short column) throws IOException { |
| short depth = (short)fElementStack.fDepth ; |
| |
| if(depth > MAX_DEPTH_LIMIT){ |
| return fShouldSkip = false ; |
| } |
| for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ |
| short pointer = getElementPointer(depth , i ) ; |
| |
| if(pointer == 0){ |
| return fShouldSkip = false ; |
| } |
| |
| if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println(); |
| System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); |
| System.out.println(); |
| } |
| fLastPointerLocation = pointer ; |
| return fShouldSkip = true ; |
| } |
| } |
| return fShouldSkip = false ; |
| } |
| |
| /** |
| * Scans a start element. This method will handle the binding of |
| * namespace information and notifying the handler of the start |
| * of the element. |
| * <p> |
| * <pre> |
| * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' |
| * [40] STag ::= '<' Name (S Attribute)* S? '>' |
| * </pre> |
| * <p> |
| * <strong>Note:</strong> This method assumes that the leading |
| * '<' character has been consumed. |
| * <p> |
| * <strong>Note:</strong> This method uses the fElementQName and |
| * fAttributes variables. The contents of these variables will be |
| * destroyed. The caller should copy important information out of |
| * these variables before calling this method. |
| * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT |
| * |
| * @return True if element is empty. (i.e. It matches |
| * production [44]. |
| */ |
| // fElementQName will have the details of element just read.. |
| // fAttributes will have the details of all the attributes. |
| protected boolean scanStartElement() |
| throws IOException, XNIException { |
| |
| if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); |
| //when skipping is true and no more elements should be added |
| if(fSkip && !fAdd){ |
| //get the stored element -- if everything goes right this should match the |
| //token in the buffer |
| |
| QName name = fElementStack.getNext(); |
| |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Trying to skip String = " + name.rawname); |
| } |
| |
| //Be conservative -- if skipping fails -- stop. |
| fSkip = fEntityScanner.skipString(name.rawname); |
| |
| if(fSkip){ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Element SUCESSFULLY skipped = " + name.rawname); |
| } |
| fElementStack.push(); |
| fElementQName = name; |
| }else{ |
| //if skipping fails reposition the stack or fallback to normal way of processing |
| fElementStack.reposition(); |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Element was NOT skipped, REPOSITIONING stack" ); |
| } |
| } |
| } |
| |
| //we are still at the stage of adding elements |
| //the elements were not matched or |
| //fSkip is not set to true |
| if(!fSkip || fAdd){ |
| //get the next element from the stack |
| fElementQName = fElementStack.nextElement(); |
| // name |
| if (fNamespaces) { |
| fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); |
| } else { |
| String name = fEntityScanner.scanName(NameType.ELEMENTSTART); |
| fElementQName.setValues(null, name, name, null); |
| } |
| |
| if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); |
| if(DEBUG_SKIP_ALGORITHM){ |
| if(fAdd){ |
| System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); |
| } |
| } |
| |
| } |
| |
| //when the elements are being added , we need to check if we are set for skipping the elements |
| if(fAdd){ |
| //this sets the value of fAdd variable |
| fElementStack.matchElement(fElementQName); |
| } |
| |
| |
| //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName |
| fCurrentElement = fElementQName; |
| |
| String rawname = fElementQName.rawname; |
| |
| fEmptyElement = false; |
| |
| fAttributes.removeAllAttributes(); |
| |
| checkDepth(rawname); |
| if(!seekCloseOfStartTag()){ |
| fReadingAttributes = true; |
| fAttributeCacheUsedCount =0; |
| fStringBufferIndex =0; |
| fAddDefaultAttr = true; |
| do { |
| scanAttribute(fAttributes); |
| if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && |
| fAttributes.getLength() > fElementAttributeLimit){ |
| fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
| "ElementAttributeLimit", |
| new Object[]{rawname, fElementAttributeLimit }, |
| XMLErrorReporter.SEVERITY_FATAL_ERROR ); |
| } |
| |
| } while (!seekCloseOfStartTag()); |
| fReadingAttributes=false; |
| } |
| |
| if (fEmptyElement) { |
| //decrease the markup depth.. |
| fMarkupDepth--; |
| |
| // check that this element was opened in the same entity |
| if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { |
| reportFatalError("ElementEntityMismatch", |
| new Object[]{fCurrentElement.rawname}); |
| } |
| // call handler |
| if (fDocumentHandler != null) { |
| fDocumentHandler.emptyElement(fElementQName, fAttributes, null); |
| } |
| |
| //We should not be popping out the context here in endELement becaause the namespace context is still |
| //valid when parser is at the endElement state. |
| //if (fNamespaces) { |
| // fNamespaceContext.popContext(); |
| //} |
| |
| //pop the element off the stack.. |
| fElementStack.popElement(); |
| |
| } else { |
| |
| if(dtdGrammarUtil != null) |
| dtdGrammarUtil.startElement(fElementQName, fAttributes); |
| if(fDocumentHandler != null){ |
| //complete element and attributes are traversed in this function so we can send a callback |
| //here. |
| //<strong>we shouldn't be sending callback in scanDocument()</strong> |
| fDocumentHandler.startElement(fElementQName, fAttributes, null); |
| } |
| } |
| |
| |
| if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); |
| return fEmptyElement; |
| |
| } // scanStartElement():boolean |
| |
| /** |
| * Looks for the close of start tag, i.e. if it finds '>' or '/>' |
| * Characters are consumed. |
| */ |
| protected boolean seekCloseOfStartTag() throws IOException, XNIException { |
| // spaces |
| boolean sawSpace = fEntityScanner.skipSpaces(); |
| |
| // end tag? |
| final int c = fEntityScanner.peekChar(); |
| if (c == '>') { |
| fEntityScanner.scanChar(null); |
| return true; |
| } else if (c == '/') { |
| fEntityScanner.scanChar(null); |
| if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { |
| reportFatalError("ElementUnterminated", |
| new Object[]{fElementQName.rawname}); |
| } |
| fEmptyElement = true; |
| return true; |
| } else if (!isValidNameStartChar(c) || !sawSpace) { |
| // Second chance. Check if this character is a high |
| // surrogate of a valid name start character. |
| if (!isValidNameStartHighSurrogate(c) || !sawSpace) { |
| reportFatalError("ElementUnterminated", |
| new Object[]{fElementQName.rawname}); |
| } |
| } |
| |
| return false; |
| } |
| |
| public boolean hasAttributes(){ |
| return fAttributes.getLength() > 0 ? true : false ; |
| } |
| |
| |
| /** |
| * Scans an attribute. |
| * <p> |
| * <pre> |
| * [41] Attribute ::= Name Eq AttValue |
| * </pre> |
| * <p> |
| * <strong>Note:</strong> This method assumes that the next |
| * character on the stream is the first character of the attribute |
| * name. |
| * <p> |
| * <strong>Note:</strong> This method uses the fAttributeQName and |
| * fQName variables. The contents of these variables will be |
| * destroyed. |
| * |
| * @param attributes The attributes list for the scanned attribute. |
| */ |
| |
| /** |
| * protected void scanAttribute(AttributeIteratorImpl attributes) |
| * throws IOException, XNIException { |
| * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); |
| * |
| * |
| * // name |
| * if (fNamespaces) { |
| * fEntityScanner.scanQName(fAttributeQName); |
| * } |
| * else { |
| * String name = fEntityScanner.scanName(); |
| * fAttributeQName.setValues(null, name, name, null); |
| * } |
| * |
| * // equals |
| * fEntityScanner.skipSpaces(); |
| * if (!fEntityScanner.skipChar('=')) { |
| * reportFatalError("EqRequiredInAttribute", |
| * new Object[]{fAttributeQName.rawname}); |
| * } |
| * fEntityScanner.skipSpaces(); |
| * |
| * |
| * // content |
| * int oldLen = attributes.getLength(); |
| */ |
| /**xxx there is one check of duplicate attribute that has been removed. |
| * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); |
| * |
| * // WFC: Unique Att Spec |
| * if (oldLen == attributes.getLength()) { |
| * reportFatalError("AttributeNotUnique", |
| * new Object[]{fCurrentElement.rawname, |
| * fAttributeQName.rawname}); |
| * } |
| */ |
| |
| /* |
| //REVISIT: one more case needs to be included: external PE and standalone is no |
| boolean isVC = fHasExternalDTD && !fStandalone; |
| scanAttributeValue(fTempString, fTempString2, |
| fAttributeQName.rawname, attributes, |
| oldLen, isVC); |
| |
| //attributes.setValue(oldLen, fTempString.toString()); |
| //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); |
| //attributes.setSpecified(oldLen, true); |
| |
| AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); |
| fAttributes.addAttribute(attribute); |
| if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); |
| } // scanAttribute(XMLAttributes) |
| |
| */ |
| |
| /** return the attribute iterator implementation */ |
| public XMLAttributesIteratorImpl getAttributeIterator(){ |
| if(dtdGrammarUtil != null && fAddDefaultAttr){ |
| dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); |
| fAddDefaultAttr = false; |
| } |
| return fAttributes; |
| } |
| |
| /** return if standalone is set */ |
| public boolean standaloneSet(){ |
| return fStandaloneSet; |
| } |
| /** return if the doucment is standalone */ |
| public boolean isStandAlone(){ |
| return fStandalone ; |
| } |
| /** |
| * Scans an attribute name value pair. |
| * <p> |
| * <pre> |
| * [41] Attribute ::= Name Eq AttValue |
| * </pre> |
| * <p> |
| * <strong>Note:</strong> This method assumes that the next |
| * character on the stream is the first character of the attribute |
| * name. |
| * <p> |
| * <strong>Note:</strong> This method uses the fAttributeQName and |
| * fQName variables. The contents of these variables will be |
| * destroyed. |
| * |
| * @param attributes The attributes list for the scanned attribute. |
| */ |
| |
| protected void scanAttribute(XMLAttributes attributes) |
| throws IOException, XNIException { |
| if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); |
| |
| // name |
| if (fNamespaces) { |
| fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); |
| } else { |
| String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); |
| fAttributeQName.setValues(null, name, name, null); |
| } |
| |
| // equals |
| fEntityScanner.skipSpaces(); |
| if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { |
| reportFatalError("EqRequiredInAttribute", |
| new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); |
| } |
| fEntityScanner.skipSpaces(); |
| |
| int attIndex = 0 ; |
| //REVISIT: one more case needs to be included: external PE and standalone is no |
| boolean isVC = fHasExternalDTD && !fStandalone; |
| //fTempString would store attribute value |
| ///fTempString2 would store attribute non-normalized value |
| |
| //this function doesn't use 'attIndex'. We are adding the attribute later |
| //after we have figured out that current attribute is not namespace declaration |
| //since scanAttributeValue doesn't use attIndex parameter therefore we |
| //can safely add the attribute later.. |
| XMLString tmpStr = getString(); |
| |
| scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, |
| attIndex, isVC, fCurrentElement.rawname, false); |
| |
| // content |
| int oldLen = attributes.getLength(); |
| //if the attribute name already exists.. new value is replaced with old value |
| attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); |
| |
| // WFC: Unique Att Spec |
| //attributes count will be same if the current attribute name already exists for this element name. |
| //this means there are two duplicate attributes. |
| if (oldLen == attributes.getLength()) { |
| reportFatalError("AttributeNotUnique", |
| new Object[]{fCurrentElement.rawname, |
| fAttributeQName.rawname}); |
| } |
| |
| //tmpString contains attribute value |
| //we are passing null as the attribute value |
| attributes.setValue(attIndex, null, tmpStr); |
| |
| ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM |
| //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); |
| attributes.setSpecified(attIndex, true); |
| |
| if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); |
| |
| } // scanAttribute(XMLAttributes) |
| |
| /** |
| * Scans element content. |
| * |
| * @return Returns the next character on the stream. |
| */ |
| //CHANGED: |
| //EARLIER: scanContent() |
| //NOW: scanContent(XMLStringBuffer) |
| //It makes things easy if this functions takes XMLStringBuffer as parameter.. |
| //this function appends the data to the buffer. |
| protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { |
| //set the fTempString length to 0 before passing it on to scanContent |
| //scanContent sets the correct co-ordinates as per the content read |
| fTempString.length = 0; |
| int c = fEntityScanner.scanContent(fTempString); |
| content.append(fTempString); |
| fTempString.length = 0; |
| if (c == '\r') { |
| // happens when there is the character reference |
| //xxx: We know the next chracter.. we should just skip it and add ']' directlry |
| fEntityScanner.scanChar(null); |
| content.append((char)c); |
| c = -1; |
| } else if (c == ']') { |
| //fStringBuffer.clear(); |
| //xxx: We know the next chracter.. we should just skip it and add ']' directlry |
| content.append((char)fEntityScanner.scanChar(null)); |
| // remember where we are in case we get an endEntity before we |
| // could flush the buffer out - this happens when we're parsing an |
| // entity which ends with a ] |
| fInScanContent = true; |
| // |
| // We work on a single character basis to handle cases such as: |
| // ']]]>' which we might otherwise miss. |
| // |
| if (fEntityScanner.skipChar(']', null)) { |
| content.append(']'); |
| while (fEntityScanner.skipChar(']', null)) { |
| content.append(']'); |
| } |
| if (fEntityScanner.skipChar('>', null)) { |
| reportFatalError("CDEndInContent", null); |
| } |
| } |
| fInScanContent = false; |
| c = -1; |
| } |
| if (fDocumentHandler != null && content.length > 0) { |
| //fDocumentHandler.characters(content, null); |
| } |
| return c; |
| |
| } // scanContent():int |
| |
| |
| /** |
| * Scans a CDATA section. |
| * <p> |
| * <strong>Note:</strong> This method uses the fTempString and |
| * fStringBuffer variables. |
| * |
| * @param complete True if the CDATA section is to be scanned |
| * completely. |
| * |
| * @return True if CDATA is completely scanned. |
| */ |
| //CHANGED: |
| protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) |
| throws IOException, XNIException { |
| |
| // call handler |
| if (fDocumentHandler != null) { |
| //fDocumentHandler.startCDATA(null); |
| } |
| |
| while (true) { |
| //scanData will fill the contentBuffer |
| if (!fEntityScanner.scanData("]]>", contentBuffer)) { |
| break ; |
| /** We dont need all this code if we pass ']]>' as delimeter.. |
| * int brackets = 2; |
| * while (fEntityScanner.skipChar(']')) { |
| * brackets++; |
| * } |
| * |
| * //When we find more than 2 square brackets |
| * if (fDocumentHandler != null && brackets > 2) { |
| * //we dont need to clear the buffer.. |
| * //contentBuffer.clear(); |
| * for (int i = 2; i < brackets; i++) { |
| * contentBuffer.append(']'); |
| * } |
| * fDocumentHandler.characters(contentBuffer, null); |
| * } |
| * |
| * if (fEntityScanner.skipChar('>')) { |
| * break; |
| * } |
| * if (fDocumentHandler != null) { |
| * //we dont need to clear the buffer now.. |
| * //contentBuffer.clear(); |
| * contentBuffer.append("]]"); |
| * fDocumentHandler.characters(contentBuffer, null); |
| * } |
| **/ |
| } else { |
| int c = fEntityScanner.peekChar(); |
| if (c != -1 && isInvalidLiteral(c)) { |
| if (XMLChar.isHighSurrogate(c)) { |
| //contentBuffer.clear(); |
| //scan surrogates if any.... |
| scanSurrogates(contentBuffer); |
| } else { |
| reportFatalError("InvalidCharInCDSect", |
| new Object[]{Integer.toString(c,16)}); |
| fEntityScanner.scanChar(null); |
| } |
| } |
| //by this time we have also read surrogate contents if any... |
| if (fDocumentHandler != null) { |
| //fDocumentHandler.characters(contentBuffer, null); |
| } |
| } |
| } |
| fMarkupDepth--; |
| |
| if (fDocumentHandler != null && contentBuffer.length > 0) { |
| //fDocumentHandler.characters(contentBuffer, null); |
| } |
| |
| // call handler |
| if (fDocumentHandler != null) { |
| //fDocumentHandler.endCDATA(null); |
| } |
| |
| return true; |
| |
| } // scanCDATASection(XMLStringBuffer, boolean):boolean |
| |
| /** |
| * Scans an end element. |
| * <p> |
| * <pre> |
| * [42] ETag ::= '</' Name S? '>' |
| * </pre> |
| * <p> |
| * <strong>Note:</strong> This method uses the fElementQName variable. |
| * The contents of this variable will be destroyed. The caller should |
| * copy the needed information out of this variable before calling |
| * this method. |
| * |
| * @return The element depth. |
| */ |
| protected int scanEndElement() throws IOException, XNIException { |
| if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); |
| |
| // pop context |
| QName endElementName = fElementStack.popElement(); |
| |
| String rawname = endElementName.rawname; |
| if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); |
| // Take advantage of the fact that next string _should_ be "fElementQName.rawName", |
| //In scanners most of the time is consumed on checks done for XML characters, we can |
| // optimize on it and avoid the checks done for endElement, |
| //we will also avoid symbol table lookup - neeraj.bajaj@sun.com |
| |
| // this should work both for namespace processing true or false... |
| |
| //REVISIT: if the string is not the same as expected.. we need to do better error handling.. |
| //We can skip this for now... In any case if the string doesn't match -- document is not well formed. |
| |
| if (!fEntityScanner.skipString(endElementName.rawname)) { |
| reportFatalError("ETagRequired", new Object[]{rawname}); |
| } |
| |
| // end |
| fEntityScanner.skipSpaces(); |
| if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { |
| reportFatalError("ETagUnterminated", |
| new Object[]{rawname}); |
| } |
| fMarkupDepth--; |
| |
| //we have increased the depth for two markup "<" characters |
| fMarkupDepth--; |
| |
| // check that this element was opened in the same entity |
| if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { |
| reportFatalError("ElementEntityMismatch", |
| new Object[]{rawname}); |
| } |
| |
| //We should not be popping out the context here in endELement becaause the namespace context is still |
| //valid when parser is at the endElement state. |
| |
| //if (fNamespaces) { |
| // fNamespaceContext.popContext(); |
| //} |
| |
| // call handler |
| if (fDocumentHandler != null ) { |
| //end element is scanned in this function so we can send a callback |
| //here. |
| //<strong>we shouldn't be sending callback in scanDocument()</strong> |
| |
| fDocumentHandler.endElement(endElementName, null); |
| } |
| if(dtdGrammarUtil != null) |
| dtdGrammarUtil.endElement(endElementName); |
| |
| return fMarkupDepth; |
| |
| } // scanEndElement():int |
| |
| /** |
| * Scans a character reference. |
| * <p> |
| * <pre> |
| * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' |
| * </pre> |
| */ |
| protected void scanCharReference() |
| throws IOException, XNIException { |
| |
| fStringBuffer2.clear(); |
| int ch = scanCharReferenceValue(fStringBuffer2, null); |
| fMarkupDepth--; |
| if (ch != -1) { |
| // call handler |
| |
| if (fDocumentHandler != null) { |
| if (fNotifyCharRefs) { |
| fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); |
| } |
| Augmentations augs = null; |
| if (fValidation && ch <= 0x20) { |
| if (fTempAugmentations != null) { |
| fTempAugmentations.removeAllItems(); |
| } |
| else { |
| fTempAugmentations = new AugmentationsImpl(); |
| } |
| augs = fTempAugmentations; |
| augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); |
| } |
| //xxx: How do we deal with this - how to return charReferenceValues |
| //now this is being commented because this is taken care in scanDocument() |
| //fDocumentHandler.characters(fStringBuffer2, null); |
| if (fNotifyCharRefs) { |
| fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); |
| } |
| } |
| } |
| |
| } // scanCharReference() |
| |
| |
| /** |
| * Scans an entity reference. |
| * |
| * @return returns true if the new entity is started. If it was built-in entity |
| * 'false' is returned. |
| * @throws IOException Thrown if i/o error occurs. |
| * @throws XNIException Thrown if handler throws exception upon |
| * notification. |
| */ |
| protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { |
| String name = fEntityScanner.scanName(NameType.REFERENCE); |
| if (name == null) { |
| reportFatalError("NameRequiredInReference", null); |
| return; |
| } |
| if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { |
| reportFatalError("SemicolonRequiredInReference", new Object []{name}); |
| } |
| if (fEntityStore.isUnparsedEntity(name)) { |
| reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); |
| } |
| fMarkupDepth--; |
| fCurrentEntityName = name; |
| |
| // handle built-in entities |
| if (name == fAmpSymbol) { |
| handleCharacter('&', fAmpSymbol, content); |
| fScannerState = SCANNER_STATE_BUILT_IN_REFS; |
| return ; |
| } else if (name == fLtSymbol) { |
| handleCharacter('<', fLtSymbol, content); |
| fScannerState = SCANNER_STATE_BUILT_IN_REFS; |
| return ; |
| } else if (name == fGtSymbol) { |
| handleCharacter('>', fGtSymbol, content); |
| fScannerState = SCANNER_STATE_BUILT_IN_REFS; |
| return ; |
| } else if (name == fQuotSymbol) { |
| handleCharacter('"', fQuotSymbol, content); |
| fScannerState = SCANNER_STATE_BUILT_IN_REFS; |
| return ; |
| } else if (name == fAposSymbol) { |
| handleCharacter('\'', fAposSymbol, content); |
| fScannerState = SCANNER_STATE_BUILT_IN_REFS; |
| return ; |
| } |
| |
| //1. if the entity is external and support to external entities is not required |
| // 2. or entities should not be replaced |
| //3. or if it is built in entity reference. |
| boolean isEE = fEntityStore.isExternalEntity(name); |
| if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ |
| fScannerState = SCANNER_STATE_REFERENCE; |
| return ; |
| } |
| // start general entity |
| if (!fEntityStore.isDeclaredEntity(name)) { |
| //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception |
| if (!fSupportDTD && fReplaceEntityReferences) { |
| reportFatalError("EntityNotDeclared", new Object[]{name}); |
| return; |
| } |
| //REVISIT: one more case needs to be included: external PE and standalone is no |
| if ( fHasExternalDTD && !fStandalone) { |
| if (fValidation) |
| fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", |
| new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); |
| } else |
| reportFatalError("EntityNotDeclared", new Object[]{name}); |
| } |
| //we are starting the entity even if the entity was not declared |
| //if that was the case it its taken care in XMLEntityManager.startEntity() |
| //we immediately call the endEntity. Application gets to know if there was |
| //any entity that was not declared. |
| fEntityManager.startEntity(true, name, false); |
| //set the scaner state to content.. parser will automatically revive itself at any point of time. |
| //setScannerState(SCANNER_STATE_CONTENT); |
| //return true ; |
| } // scanEntityReference() |
| |
| // utility methods |
| |
| /** |
| * Check if the depth exceeds the maxElementDepth limit |
| * @param elementName name of the current element |
| */ |
| void checkDepth(String elementName) { |
| fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); |
| if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { |
| fSecurityManager.debugPrint(fLimitAnalyzer); |
| reportFatalError("MaxElementDepthLimit", new Object[]{elementName, |
| fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), |
| fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), |
| "maxElementDepth"}); |
| } |
| } |
| |
| /** |
| * Calls document handler with a single character resulting from |
| * built-in entity resolution. |
| * |
| * @param c |
| * @param entity built-in name |
| * @param XMLStringBuffer append the character to buffer |
| * |
| * we really dont need to call this function -- this function is only required when |
| * we integrate with rest of Xerces2. SO maintaining the current behavior and still |
| * calling this function to hanlde built-in entity reference. |
| * |
| */ |
| private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { |
| foundBuiltInRefs = true; |
| checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); |
| content.append(c); |
| if (fDocumentHandler != null) { |
| fSingleChar[0] = c; |
| if (fNotifyBuiltInRefs) { |
| fDocumentHandler.startGeneralEntity(entity, null, null, null); |
| } |
| fTempString.setValues(fSingleChar, 0, 1); |
| if(!fIsCoalesce){ |
| fDocumentHandler.characters(fTempString, null); |
| builtInRefCharacterHandled = true; |
| } |
| |
| if (fNotifyBuiltInRefs) { |
| fDocumentHandler.endGeneralEntity(entity, null); |
| } |
| } |
| } // handleCharacter(char) |
| |
| // helper methods |
| |
| /** |
| * Sets the scanner state. |
| * |
| * @param state The new scanner state. |
| */ |
| protected final void setScannerState(int state) { |
| |
| fScannerState = state; |
| if (DEBUG_SCANNER_STATE) { |
| System.out.print("### setScannerState: "); |
| //System.out.print(fScannerState); |
| System.out.print(getScannerStateName(state)); |
| System.out.println(); |
| } |
| |
| } // setScannerState(int) |
| |
| |
| /** |
| * Sets the Driver. |
| * |
| * @param Driver The new Driver. |
| */ |
| protected final void setDriver(Driver driver) { |
| fDriver = driver; |
| if (DEBUG_DISPATCHER) { |
| System.out.print("%%% setDriver: "); |
| System.out.print(getDriverName(driver)); |
| System.out.println(); |
| } |
| } |
| |
| // |
| // Private methods |
| // |
| |
| /** Returns the scanner state name. */ |
| protected String getScannerStateName(int state) { |
| |
| switch (state) { |
| case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; |
| case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; |
| case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; |
| case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; |
| case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; |
| case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; |
| case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; |
| case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; |
| case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; |
| case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; |
| case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; |
| case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; |
| case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; |
| case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; |
| case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; |
| case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; |
| } |
| |
| return "??? ("+state+')'; |
| |
| } // getScannerStateName(int):String |
| public String getEntityName(){ |
| //return the cached name |
| return fCurrentEntityName; |
| } |
| |
| /** Returns the driver name. */ |
| public String getDriverName(Driver driver) { |
| |
| if (DEBUG_DISPATCHER) { |
| if (driver != null) { |
| String name = driver.getClass().getName(); |
| int index = name.lastIndexOf('.'); |
| if (index != -1) { |
| name = name.substring(index + 1); |
| index = name.lastIndexOf('$'); |
| if (index != -1) { |
| name = name.substring(index + 1); |
| } |
| } |
| return name; |
| } |
| } |
| return "null"; |
| |
| } // getDriverName():String |
| |
| /** |
| * Check the protocol used in the systemId against allowed protocols |
| * |
| * @param systemId the Id of the URI |
| * @param allowedProtocols a list of allowed protocols separated by comma |
| * @return the name of the protocol if rejected, null otherwise |
| */ |
| String checkAccess(String systemId, String allowedProtocols) throws IOException { |
| String baseSystemId = fEntityScanner.getBaseSystemId(); |
| String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); |
| return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); |
| } |
| |
| // |
| // Classes |
| // |
| |
| /** |
| * @author Neeraj Bajaj, Sun Microsystems. |
| */ |
| protected static final class Element { |
| |
| // |
| // Data |
| // |
| |
| /** Symbol. */ |
| public QName qname; |
| |
| //raw name stored as characters |
| public char[] fRawname; |
| |
| /** The next Element entry. */ |
| public Element next; |
| |
| // |
| // Constructors |
| // |
| |
| /** |
| * Constructs a new Element from the given QName and next Element |
| * reference. |
| */ |
| public Element(QName qname, Element next) { |
| this.qname.setValues(qname); |
| this.fRawname = qname.rawname.toCharArray(); |
| this.next = next; |
| } |
| |
| } // class Element |
| |
| /** |
| * Element stack. |
| * |
| * @author Neeraj Bajaj, Sun Microsystems. |
| */ |
| protected class ElementStack2 { |
| |
| // |
| // Data |
| // |
| |
| /** The stack data. */ |
| protected QName [] fQName = new QName[20]; |
| |
| //Element depth |
| protected int fDepth; |
| //total number of elements |
| protected int fCount; |
| //current position |
| protected int fPosition; |
| //Mark refers to the position |
| protected int fMark; |
| |
| protected int fLastDepth ; |
| |
| // |
| // Constructors |
| // |
| |
| /** Default constructor. */ |
| public ElementStack2() { |
| for (int i = 0; i < fQName.length; i++) { |
| fQName[i] = new QName(); |
| } |
| fMark = fPosition = 1; |
| } // <init>() |
| |
| public void resize(){ |
| /** |
| * int length = fElements.length; |
| * Element [] temp = new Element[length * 2]; |
| * System.arraycopy(fElements, 0, temp, 0, length); |
| * fElements = temp; |
| */ |
| //resize QNames |
| int oldLength = fQName.length; |
| QName [] tmp = new QName[oldLength * 2]; |
| System.arraycopy(fQName, 0, tmp, 0, oldLength); |
| fQName = tmp; |
| |
| for (int i = oldLength; i < fQName.length; i++) { |
| fQName[i] = new QName(); |
| } |
| |
| } |
| |
| |
| // |
| // Public methods |
| // |
| |
| /** Check if the element scanned during the start element |
| *matches the stored element. |
| * |
| *@return true if the match suceeds. |
| */ |
| public boolean matchElement(QName element) { |
| //last depth is the depth when last elemnt was pushed |
| //if last depth is greater than current depth |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("fLastDepth = " + fLastDepth); |
| System.out.println("fDepth = " + fDepth); |
| } |
| boolean match = false; |
| if(fLastDepth > fDepth && fDepth <= 2){ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); |
| } |
| if(element.rawname == fQName[fDepth].rawname){ |
| fAdd = false; |
| //mark this position |
| //decrease the depth by 1 as arrays are 0 based |
| fMark = fDepth - 1; |
| //we found the match and from next element skipping will start, add 1 |
| fPosition = fMark + 1 ; |
| match = true; |
| //Once we get match decrease the count -- this was increased by nextElement() |
| --fCount; |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); |
| System.out.println("fMark = " + fMark); |
| System.out.println("fPosition = " + fPosition); |
| System.out.println("fDepth = " + fDepth); |
| System.out.println("fCount = " + fCount); |
| } |
| }else{ |
| fAdd = true; |
| if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); |
| } |
| } |
| //store the last depth |
| fLastDepth = fDepth++; |
| return match; |
| } // pushElement(QName):QName |
| |
| /** |
| * This function doesn't increase depth. The function in this function is |
| *broken down into two functions for efficiency. <@see>matchElement</see>. |
| * This function just returns the pointer to the object and its values are set. |
| * |
| *@return QName reference to the next element in the list |
| */ |
| public QName nextElement() { |
| |
| //if number of elements becomes equal to the length of array -- stop the skipping |
| if (fCount == fQName.length) { |
| fShouldSkip = false; |
| fAdd = false; |
| if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); |
| //xxx: this is not correct, we are returning the last element |
| //this wont make any difference since flag has been set to 'false' |
| return fQName[--fCount]; |
| } |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("fCount = " + fCount); |
| } |
| return fQName[fCount++]; |
| |
| } |
| |
| /** Note that this function is considerably different than nextElement() |
| * This function just returns the previously stored elements |
| */ |
| public QName getNext(){ |
| //when position reaches number of elements in the list.. |
| //set the position back to mark, making it a circular linked list. |
| if(fPosition == fCount){ |
| fPosition = fMark; |
| } |
| return fQName[fPosition++]; |
| } |
| |
| /** returns the current depth |
| */ |
| public int popElement(){ |
| return fDepth--; |
| } |
| |
| |
| /** Clears the stack without throwing away existing QName objects. */ |
| public void clear() { |
| fLastDepth = 0; |
| fDepth = 0; |
| fCount = 0 ; |
| fPosition = fMark = 1; |
| } // clear() |
| |
| } // class ElementStack |
| |
| /** |
| * Element stack. This stack operates without synchronization, error |
| * checking, and it re-uses objects instead of throwing popped items |
| * away. |
| * |
| * @author Andy Clark, IBM |
| */ |
| protected class ElementStack { |
| |
| // |
| // Data |
| // |
| |
| /** The stack data. */ |
| protected QName[] fElements; |
| protected int [] fInt = new int[20]; |
| |
| |
| //Element depth |
| protected int fDepth; |
| //total number of elements |
| protected int fCount; |
| //current position |
| protected int fPosition; |
| //Mark refers to the position |
| protected int fMark; |
| |
| protected int fLastDepth ; |
| |
| // |
| // Constructors |
| // |
| |
| /** Default constructor. */ |
| public ElementStack() { |
| fElements = new QName[20]; |
| for (int i = 0; i < fElements.length; i++) { |
| fElements[i] = new QName(); |
| } |
| } // <init>() |
| |
| // |
| // Public methods |
| // |
| |
| /** |
| * Pushes an element on the stack. |
| * <p> |
| * <strong>Note:</strong> The QName values are copied into the |
| * stack. In other words, the caller does <em>not</em> orphan |
| * the element to the stack. Also, the QName object returned |
| * is <em>not</em> orphaned to the caller. It should be |
| * considered read-only. |
| * |
| * @param element The element to push onto the stack. |
| * |
| * @return Returns the actual QName object that stores the |
| */ |
| //XXX: THIS FUNCTION IS NOT USED |
| public QName pushElement(QName element) { |
| if (fDepth == fElements.length) { |
| QName[] array = new QName[fElements.length * 2]; |
| System.arraycopy(fElements, 0, array, 0, fDepth); |
| fElements = array; |
| for (int i = fDepth; i < fElements.length; i++) { |
| fElements[i] = new QName(); |
| } |
| } |
| fElements[fDepth].setValues(element); |
| return fElements[fDepth++]; |
| } // pushElement(QName):QName |
| |
| |
| /** Note that this function is considerably different than nextElement() |
| * This function just returns the previously stored elements |
| */ |
| public QName getNext(){ |
| //when position reaches number of elements in the list.. |
| //set the position back to mark, making it a circular linked list. |
| if(fPosition == fCount){ |
| fPosition = fMark; |
| } |
| //store the position of last opened tag at particular depth |
| //fInt[++fDepth] = fPosition; |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); |
| } |
| //return fElements[fPosition++]; |
| return fElements[fPosition]; |
| } |
| |
| /** This function should be called only when element was skipped sucessfully. |
| * 1. Increase the depth - because element was sucessfully skipped. |
| *2. Store the position of the element token in array "last opened tag" at depth. |
| *3. increase the position counter so as to point to the next element in the array |
| */ |
| public void push(){ |
| |
| fInt[++fDepth] = fPosition++; |
| } |
| |
| /** Check if the element scanned during the start element |
| *matches the stored element. |
| * |
| *@return true if the match suceeds. |
| */ |
| public boolean matchElement(QName element) { |
| //last depth is the depth when last elemnt was pushed |
| //if last depth is greater than current depth |
| //if(DEBUG_SKIP_ALGORITHM){ |
| // System.out.println("Check if the element " + element.rawname + " matches"); |
| // System.out.println("fLastDepth = " + fLastDepth); |
| // System.out.println("fDepth = " + fDepth); |
| //} |
| boolean match = false; |
| if(fLastDepth > fDepth && fDepth <= 3){ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); |
| System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); |
| } |
| if(element.rawname == fElements[fDepth - 1].rawname){ |
| fAdd = false; |
| //mark this position |
| //decrease the depth by 1 as arrays are 0 based |
| fMark = fDepth - 1; |
| //we found the match |
| fPosition = fMark; |
| match = true; |
| //Once we get match decrease the count -- this was increased by nextElement() |
| --fCount; |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); |
| System.out.println("fMark = " + fMark); |
| System.out.println("fPosition = " + fPosition); |
| System.out.println("fDepth = " + fDepth); |
| System.out.println("fCount = " + fCount); |
| System.out.println("---------MATCH SUCEEDED-----------------"); |
| System.out.println(""); |
| } |
| }else{ |
| fAdd = true; |
| if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); |
| } |
| } |
| //store the position for the current depth |
| //when we are adding the elements, when skipping |
| //starts even then this should be tracked ie. when |
| //calling getNext() |
| if(match){ |
| //from next element skipping will start, add 1 |
| fInt[fDepth] = fPosition++; |
| } else{ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); |
| } |
| //sicne fInt[fDepth] contains pointer to the element array which are 0 based. |
| fInt[fDepth] = fCount - 1; |
| } |
| |
| //if number of elements becomes equal to the length of array -- stop the skipping |
| //xxx: should we do "fCount == fInt.length" |
| if (fCount == fElements.length) { |
| fSkip = false; |
| fAdd = false; |
| //reposition the stack -- it seems to be too complex document and there is no symmerty in structure |
| reposition(); |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); |
| System.out.println("REPOSITIONING THE STACK"); |
| System.out.println("-----------SKIPPING STOPPED----------"); |
| System.out.println(""); |
| } |
| return false; |
| } |
| if(DEBUG_SKIP_ALGORITHM){ |
| if(match){ |
| System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); |
| }else{ |
| System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); |
| } |
| } |
| //store the last depth |
| fLastDepth = fDepth; |
| return match; |
| } // matchElement(QName):QName |
| |
| |
| /** |
| * Returns the next element on the stack. |
| * |
| * @return Returns the actual QName object. Callee should |
| * use this object to store the details of next element encountered. |
| */ |
| public QName nextElement() { |
| if(fSkip){ |
| fDepth++; |
| //boundary checks are done in matchElement() |
| return fElements[fCount++]; |
| } else if (fDepth == fElements.length) { |
| QName[] array = new QName[fElements.length * 2]; |
| System.arraycopy(fElements, 0, array, 0, fDepth); |
| fElements = array; |
| for (int i = fDepth; i < fElements.length; i++) { |
| fElements[i] = new QName(); |
| } |
| } |
| |
| return fElements[fDepth++]; |
| |
| } // pushElement(QName):QName |
| |
| |
| /** |
| * Pops an element off of the stack by setting the values of |
| * the specified QName. |
| * <p> |
| * <strong>Note:</strong> The object returned is <em>not</em> |
| * orphaned to the caller. Therefore, the caller should consider |
| * the object to be read-only. |
| */ |
| public QName popElement() { |
| //return the same object that was pushed -- this would avoid |
| //setting the values for every end element. |
| //STRONG: this object is read only -- this object reference shouldn't be stored. |
| if(fSkip || fAdd ){ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); |
| System.out.println(""); |
| } |
| return fElements[fInt[fDepth--]]; |
| } else{ |
| if(DEBUG_SKIP_ALGORITHM){ |
| System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); |
| } |
| return fElements[--fDepth] ; |
| } |
| //element.setValues(fElements[--fDepth]); |
| } // popElement(QName) |
| |
| /** Reposition the stack. fInt [] contains all the opened tags at particular depth. |
| * Transfer all the opened tags starting from depth '2' to the current depth and reposition them |
| *as per the depth. |
| */ |
| public void reposition(){ |
| for( int i = 2 ; i <= fDepth ; i++){ |
| fElements[i-1] = fElements[fInt[i]]; |
| } |
| if(DEBUG_SKIP_ALGORITHM){ |
| for( int i = 0 ; i < fDepth ; i++){ |
| System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); |
| } |
| } |
| } |
| |
| /** Clears the stack without throwing away existing QName objects. */ |
| public void clear() { |
| fDepth = 0; |
| fLastDepth = 0; |
| fCount = 0 ; |
| fPosition = fMark = 1; |
| |
| } // clear() |
| |
| /** |
| * This function is as a result of optimization done for endElement -- |
| * we dont need to set the value for every end element encouterd. |
| * For Well formedness checks we can have the same QName object that was pushed. |
| * the values will be set only if application need to know about the endElement |
| * -- neeraj.bajaj@sun.com |
| */ |
| |
| public QName getLastPoppedElement(){ |
| return fElements[fDepth]; |
| } |
| } // class ElementStack |
| |
| /** |
| * Drives the parser to the next state/event on the input. Parser is guaranteed |
| * to stop at the next state/event. |
| * |
| * Internally XML document is divided into several states. Each state represents |
| * a sections of XML document. When this functions returns normally, it has read |
| * the section of XML document and returns the state corresponding to section of |
| * document which has been read. For optimizations, a particular driver |
| * can read ahead of the section of document (state returned) just read and |
| * can maintain a different internal state. |
| * |
| * |
| * @author Neeraj Bajaj, Sun Microsystems |
| */ |
| protected interface Driver { |
| |
| |
| /** |
| * Drives the parser to the next state/event on the input. Parser is guaranteed |
| * to stop at the next state/event. |
| * |
| * Internally XML document is divided into several states. Each state represents |
| * a sections of XML document. When this functions returns normally, it has read |
| * the section of XML document and returns the state corresponding to section of |
| * document which has been read. For optimizations, a particular driver |
| * can read ahead of the section of document (state returned) just read and |
| * can maintain a different internal state. |
| * |
| * @return state representing the section of document just read. |
| * |
| * @throws IOException Thrown on i/o error. |
| * @throws XNIException Thrown on parse error. |
| */ |
| |
| public int next() throws IOException, XNIException; |
| |
| } // interface Driver |
| |
| /** |
| * Driver to handle content scanning. This driver is capable of reading |
| * the fragment of XML document. When it has finished reading fragment |
| * of XML documents, it can pass the job of reading to another driver. |
| * |
| * This class has been modified as per the new design which is more suited to |
| * efficiently build pull parser. Lot of performance improvements have been done and |
| * the code has been added to support stax functionality/features. |
| * |
| * @author Neeraj Bajaj, Sun Microsystems |
| * |
| * |
| * @author Andy Clark, IBM |
| * @author Eric Ye, IBM |
| */ |
| protected class FragmentContentDriver |
| implements Driver { |
| |
| // |
| // Driver methods |
| // |
| |
| /** |
| * decides the appropriate state of the parser |
| */ |
| private void startOfMarkup() throws IOException { |
| fMarkupDepth++; |
| final int ch = fEntityScanner.peekChar(); |
| if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { |
| setScannerState(SCANNER_STATE_START_ELEMENT_TAG); |
| } else { |
| switch(ch){ |
| case '?' :{ |
| setScannerState(SCANNER_STATE_PI); |
| fEntityScanner.skipChar(ch, null); |
| break; |
| } |
| case '!' :{ |
| fEntityScanner.skipChar(ch, null); |
| if (fEntityScanner.skipChar('-', null)) { |
| if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { |
| reportFatalError("InvalidCommentStart", |
| null); |
| } |
| setScannerState(SCANNER_STATE_COMMENT); |
| } else if (fEntityScanner.skipString(cdata)) { |
| setScannerState(SCANNER_STATE_CDATA ); |
| } else if (!scanForDoctypeHook()) { |
| reportFatalError("MarkupNotRecognizedInContent", |
| null); |
| } |
| break; |
| } |
| case '/' :{ |
| setScannerState(SCANNER_STATE_END_ELEMENT_TAG); |
| fEntityScanner.skipChar(ch, NameType.ELEMENTEND); |
| break; |
| } |
| default :{ |
| reportFatalError("MarkupNotRecognizedInContent", null); |
| } |
| } |
| } |
| |
| }//startOfMarkup |
| |
| private void startOfContent() throws IOException { |
| if (fEntityScanner.skipChar('<', null)) { |
| setScannerState(SCANNER_STATE_START_OF_MARKUP); |
| } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { |
| setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE |
| } else { |
| //element content is there.. |
| setScannerState(SCANNER_STATE_CHARACTER_DATA); |
| } |
| }//startOfContent |
| |
| |
| /** |
| * |
| * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. |
| * At any point of time when in doubt over the current state of the parser, the state should be |
| * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of |
| * the parser to one of its sub state. |
| * sub states are defined in the parser on the basis of different XML component like |
| * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. |
| * These sub states help the parser to have fine control over the parsing. These are the |
| * different milepost, parser stops at each sub state (milepost). Based on this state it is |
| * decided if paresr needs to stop at next milepost ?? |
| * |
| */ |
| public void decideSubState() throws IOException { |
| while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ |
| |
| switch (fScannerState) { |
| |
| case SCANNER_STATE_CONTENT: { |
| startOfContent() ; |
| break; |
| } |
| |
| case SCANNER_STATE_START_OF_MARKUP: { |
| startOfMarkup() ; |
| break; |
| } |
| } |
| } |
| }//decideSubState |
| |
| /** |
| * Drives the parser to the next state/event on the input. Parser is guaranteed |
| * to stop at the next state/event. Internally XML document |
| * is divided into several states. Each state represents a sections of XML |
| * document. When this functions returns normally, it has read the section |
| * of XML document and returns the state corresponding to section of |
| * document which has been read. For optimizations, a particular driver |
| * can read ahead of the section of document (state returned) just read and |
| * can maintain a different internal state. |
| * |
| * State returned corresponds to Stax states. |
| * |
| * @return state representing the section of document just read. |
| * |
| * @throws IOException Thrown on i/o error. |
| * @throws XNIException Thrown on parse error. |
| */ |
| |
| public int next() throws IOException, XNIException { |
| while (true) { |
| try { |
| if(DEBUG_NEXT){ |
| System.out.println("NOW IN FragmentContentDriver"); |
| System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); |
| } |
| |
| //decide the actual sub state of the scanner.For more information refer to the javadoc of |
| //decideSubState. |
| |
| switch (fScannerState) { |
| case SCANNER_STATE_CONTENT: { |
| final int ch = fEntityScanner.peekChar(); |
| if (ch == '<') { |
| fEntityScanner.scanChar(null); |
| setScannerState(SCANNER_STATE_START_OF_MARKUP); |
| } else if (ch == '&') { |
| fEntityScanner.scanChar(NameType.REFERENCE); |
| setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE |
| break; |
| } else { |
| //element content is there.. |
| setScannerState(SCANNER_STATE_CHARACTER_DATA); |
| break; |
| } |
| } |
| |
| case SCANNER_STATE_START_OF_MARKUP: { |
| startOfMarkup(); |
| break; |
| }//case: SCANNER_STATE_START_OF_MARKUP |
| |
| }//end of switch |
| //decideSubState() ; |
| |
| //do some special handling if isCoalesce is set to true. |
| if(fIsCoalesce){ |
| fUsebuffer = true ; |
| //if the last section was character data |
| if(fLastSectionWasCharacterData){ |
| |
| //if we dont encounter any CDATA or ENTITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA |
| //return the last scanned charactrer data. |
| if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) |
| && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ |
| fLastSectionWasCharacterData = false; |
| return XMLEvent.CHARACTERS; |
| } |
| }//if last section was CDATA or ENTITY REFERENCE |
| //xxx: there might be another entity reference or CDATA after this |
| //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> |
| else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ |
| //and current state is not SCANNER_STATE_CHARACTER_DATA |
| //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE |
| //this means there is nothing more to be coalesced. |
| //return the CHARACTERS event. |
| if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) |
| && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ |
| |
| fLastSectionWasCData = false; |
| fLastSectionWasEntityReference = false; |
| return XMLEvent.CHARACTERS; |
| } |
| } |
| } |
| |
| |
| if(DEBUG_NEXT){ |
| System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); |
| } |
| |
| switch(fScannerState){ |
| |
| case XMLEvent.START_DOCUMENT : |
| return XMLEvent.START_DOCUMENT; |
| |
| case SCANNER_STATE_START_ELEMENT_TAG :{ |
| |
| //xxx this function returns true when element is empty.. can be linked to end element event. |
| //returns true if the element is empty |
| fEmptyElement = scanStartElement() ; |
| //if the element is empty the next event is "end element" |
| if(fEmptyElement){ |
| setScannerState(SCANNER_STATE_END_ELEMENT_TAG); |
| }else{ |
| //set the next possible state |
| setScannerState(SCANNER_STATE_CONTENT); |
| } |
| return XMLEvent.START_ELEMENT ; |
| } |
| |
| case SCANNER_STATE_CHARACTER_DATA: { |
| if(DEBUG_COALESCE){ |
| System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); |
| System.out.println("fIsCoalesce = " + fIsCoalesce); |
| } |
| //if last section was either entity reference or cdata or character data we should be using buffer |
| fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; |
| |
| //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. |
| if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ |
| fLastSectionWasEntityReference = false; |
| fLastSectionWasCData = false; |
| fLastSectionWasCharacterData = true ; |
| fUsebuffer = true; |
| }else{ |
| //clear the buffer |
| fContentBuffer.clear(); |
| } |
| |
| //set the fTempString length to 0 before passing it on to scanContent |
| //scanContent sets the correct co-ordinates as per the content read |
| fTempString.length = 0; |
| int c = fEntityScanner.scanContent(fTempString); |
| if(DEBUG){ |
| System.out.println("fTempString = " + fTempString); |
| } |
| if(fEntityScanner.skipChar('<', null)){ |
| //check if we have reached end of element |
| if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ |
| //increase the mark up depth |
| fMarkupDepth++; |
| fLastSectionWasCharacterData = false; |
| setScannerState(SCANNER_STATE_END_ELEMENT_TAG); |
| //check if its start of new element |
| }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ |
| fMarkupDepth++; |
| fLastSectionWasCharacterData = false; |
| setScannerState(SCANNER_STATE_START_ELEMENT_TAG); |
| }else{ |
| setScannerState(SCANNER_STATE_START_OF_MARKUP); |
| //there can be cdata ahead if coalesce is true we should call again |
| if(fIsCoalesce){ |
| fUsebuffer = true; |
| fLastSectionWasCharacterData = true; |
| fContentBuffer.append(fTempString); |
| fTempString.length = 0; |
| continue; |
| } |
| } |
| //in case last section was either entity reference or cdata or character data -- we should be using buffer |
| if(fUsebuffer){ |
| fContentBuffer.append(fTempString); |
| fTempString.length = 0; |
| } |
| if(DEBUG){ |
| System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); |
| } |
| if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ |
| if(DEBUG)System.out.println("Return SPACE EVENT"); |
| return XMLEvent.SPACE; |
| }else |
| return XMLEvent.CHARACTERS; |
| |
| } else{ |
| fUsebuffer = true ; |
| if(DEBUG){ |
| System.out.println("fContentBuffer = " + fContentBuffer); |
| System.out.println("fTempString = " + fTempString); |
| } |
| fContentBuffer.append(fTempString); |
| fTempString.length = 0; |
| } |
| if (c == '\r') { |
| if(DEBUG){ |
| System.out.println("'\r' character found"); |
| } |
| // happens when there is the character reference |
| //xxx: We know the next chracter.. we should just skip it and add ']' directlry |
| fEntityScanner.scanChar(null); |
| fUsebuffer = true; |
| fContentBuffer.append((char)c); |
| c = -1 ; |
| } else if (c == ']') { |
| //fStringBuffer.clear(); |
| //xxx: We know the next chracter.. we should just skip it and add ']' directlry |
| fUsebuffer = true; |
| fContentBuffer.append((char)fEntityScanner.scanChar(null)); |
| // remember where we are in case we get an endEntity before we |
| // could flush the buffer out - this happens when we're parsing an |
| // entity which ends with a ] |
| fInScanContent = true; |
| |
| // We work on a single character basis to handle cases such as: |
| // ']]]>' which we might otherwise miss. |
| // |
| if (fEntityScanner.skipChar(']', null)) { |
| fContentBuffer.append(']'); |
| while (fEntityScanner.skipChar(']', null)) { |
| fContentBuffer.append(']'); |
| } |
| if (fEntityScanner.skipChar('>', null)) { |
| reportFatalError("CDEndInContent", null); |
| } |
| } |
| c = -1 ; |
| fInScanContent = false; |
| } |
| |
| do{ |
| //xxx: we should be using only one buffer.. |
| // we need not to grow the buffer only when isCoalesce() is not true; |
| |
| if (c == '<') { |
| fEntityScanner.scanChar(null); |
| setScannerState(SCANNER_STATE_START_OF_MARKUP); |
| break; |
| }//xxx what should be the behavior if entity reference is present in the content ? |
| else if (c == '&') { |
| fEntityScanner.scanChar(NameType.REFERENCE); |
| setScannerState(SCANNER_STATE_REFERENCE); |
| break; |
| }///xxx since this part is also characters, it should be merged... |
| else if (c != -1 && isInvalidLiteral(c)) { |
| if (XMLChar.isHighSurrogate(c)) { |
| // special case: surrogates |
| scanSurrogates(fContentBuffer) ; |
| setScannerState(SCANNER_STATE_CONTENT); |
| } else { |
| reportFatalError("InvalidCharInContent", |
| new Object[] { |
| Integer.toString(c, 16)}); |
| fEntityScanner.scanChar(null); |
| } |
| break; |
| } |
| //xxx: scanContent also gives character callback. |
| c = scanContent(fContentBuffer) ; |
| //we should not be iterating again if fIsCoalesce is not set to true |
| |
| if(!fIsCoalesce){ |
| setScannerState(SCANNER_STATE_CONTENT); |
| break; |
| } |
| |
| }while(true); |
| |
| //if (fDocumentHandler != null) { |
| // fDocumentHandler.characters(fContentBuffer, null); |
| //} |
| if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); |
| //if fIsCoalesce is true there might be more data so call fDriver.next() |
| if(fIsCoalesce){ |
| fLastSectionWasCharacterData = true ; |
| continue; |
| }else{ |
| if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ |
| if(DEBUG)System.out.println("Return SPACE EVENT"); |
| return XMLEvent.SPACE; |
| } else |
| return XMLEvent.CHARACTERS ; |
| } |
| } |
| |
| case SCANNER_STATE_END_ELEMENT_TAG :{ |
| if(fEmptyElement){ |
| //set it back to false. |
| fEmptyElement = false; |
| setScannerState(SCANNER_STATE_CONTENT); |
| //check the case when there is comment after single element document |
| //<foo/> and some comment after this |
| return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; |
| |
| } else if(scanEndElement() == 0) { |
| //It is last element of the document |
| if (elementDepthIsZeroHook()) { |
| //if element depth is zero , it indicates the end of the document |
| //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function |
| //xxx understand this point once again.. |
| return XMLEvent.END_ELEMENT ; |
| } |
| |
| } |
| setScannerState(SCANNER_STATE_CONTENT); |
| return XMLEvent.END_ELEMENT ; |
| } |
| |
| case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: |
| scanComment(); |
| setScannerState(SCANNER_STATE_CONTENT); |
| return XMLEvent.COMMENT; |
| //break; |
| } |
| case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { |
| //clear the buffer first |
| fContentBuffer.clear() ; |
| //xxx: which buffer should be passed. Ideally we shouldn't have |
| //more than two buffers -- |
| //xxx: where should we add the switch for buffering. |
| scanPI(fContentBuffer); |
| setScannerState(SCANNER_STATE_CONTENT); |
| return XMLEvent.PROCESSING_INSTRUCTION; |
| //break; |
| } |
| case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { |
| //xxx: What if CDATA is the first event |
| //<foo><![CDATA[hello<><>]]>append</foo> |
| |
| //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or |
| //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE |
| if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ |
| fLastSectionWasCData = true ; |
| fLastSectionWasEntityReference = false; |
| fLastSectionWasCharacterData = false; |
| }//if we dont need to coalesce clear the buffer |
| else{ |
| fContentBuffer.clear(); |
| } |
| fUsebuffer = true; |
| //CDATA section is completely read in all the case. |
| scanCDATASection(fContentBuffer , true); |
| setScannerState(SCANNER_STATE_CONTENT); |
| //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true |
| //and just call fDispatche.next(). Since we have set the scanner state to |
| //SCANNER_STATE_CONTENT (super state) parser will automatically recover and |
| //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event |
| //2. Check if application has set for reporting CDATA event |
| //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent |
| //return the cdata event as characters. |
| if(fIsCoalesce){ |
| fLastSectionWasCData = true ; |
| //there might be more data to coalesce. |
| continue; |
| }else if(fReportCdataEvent){ |
| return XMLEvent.CDATA; |
| } else{ |
| return XMLEvent.CHARACTERS; |
| } |
| } |
| |
| case SCANNER_STATE_REFERENCE :{ |
| fMarkupDepth++; |
| foundBuiltInRefs = false; |
| |
| //we should not clear the buffer only when the last state was either CDATA or |
| //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE |
| if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ |
| //fLastSectionWasEntityReference or fLastSectionWasCData are only |
| //used when fIsCoalesce is set to true. |
| fLastSectionWasEntityReference = true ; |
| fLastSectionWasCData = false; |
| fLastSectionWasCharacterData = false; |
| }//if we dont need to coalesce clear the buffer |
| else{ |
| fContentBuffer.clear(); |
| } |
| fUsebuffer = true ; |
| //take care of character reference |
| if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { |
| scanCharReferenceValue(fContentBuffer, null); |
| fMarkupDepth--; |
| if(!fIsCoalesce){ |
| setScannerState(SCANNER_STATE_CONTENT); |
| return XMLEvent.CHARACTERS; |
| } |
| } else { |
| // this function also starts new entity |
| scanEntityReference(fContentBuffer); |
| //if there was built-in entity reference & coalesce is not true |
| //return CHARACTERS |
| if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ |
| setScannerState(SCANNER_STATE_CONTENT); |
| if (builtInRefCharacterHandled) { |
| builtInRefCharacterHandled = false; |
| return XMLEvent.ENTITY_REFERENCE; |
| } else { |
| return XMLEvent.CHARACTERS; |
| } |
| } |
| |
| //if there was a text declaration, call next() it will be taken care. |
| if(fScannerState == SCANNER_STATE_TEXT_DECL){ |
| fLastSectionWasEntityReference = true ; |
| continue; |
| } |
| |
| if(fScannerState == SCANNER_STATE_REFERENCE){ |
| setScannerState(SCANNER_STATE_CONTENT); |
| if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { |
| // Skip the entity reference, we don't care |
| continue; |
| } |
| return XMLEvent.ENTITY_REFERENCE; |
| } |
| } |
| //Wether it was character reference, entity reference or built-in entity |
| //set the next possible state to SCANNER_STATE_CONTENT |
| setScannerState(SCANNER_STATE_CONTENT); |
| fLastSectionWasEntityReference = true ; |
| continue; |
| } |
| |
| case SCANNER_STATE_TEXT_DECL: { |
| // scan text decl |
| if (fEntityScanner.skipString("<?xml")) { |
| fMarkupDepth++; |
| // NOTE: special case where entity starts with a PI |
| // whose name starts with "xml" (e.g. "xmlfoo") |
| if (isValidNameChar(fEntityScanner.peekChar())) { |
| fStringBuffer.clear(); |
| fStringBuffer.append("xml"); |
| |
| if (fNamespaces) { |
| while (isValidNCName(fEntityScanner.peekChar())) { |
| fStringBuffer.append((char)fEntityScanner.scanChar(null)); |
| } |
| } else { |
| while (isValidNameChar(fEntityScanner.peekChar())) { |
| fStringBuffer.append((char)fEntityScanner.scanChar(null)); |
| } |
| } |
| String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); |
| fContentBuffer.clear(); |
| scanPIData(target, fContentBuffer); |
| } |
| |
| // standard text declaration |
| else { |
| //xxx: this function gives callback |
| scanXMLDeclOrTextDecl(true); |
| } |
| } |
| // now that we've straightened out the readers, we can read in chunks: |
| fEntityManager.fCurrentEntity.mayReadChunks = true; |
| setScannerState(SCANNER_STATE_CONTENT); |
| //xxx: we don't return any state, so how do we get to know about TEXT declarations. |
| //it seems we have to careful when to allow function issue a callback |
| //and when to allow adapter issue a callback. |
| continue; |
| } |
| |
| |
| case SCANNER_STATE_ROOT_ELEMENT: { |
| if (scanRootElementHook()) { |
| fEmptyElement = true; |
| //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook |
| return XMLEvent.START_ELEMENT; |
| } |
| setScannerState(SCANNER_STATE_CONTENT); |
| return XMLEvent.START_ELEMENT ; |
| } |
| case SCANNER_STATE_CHAR_REFERENCE : { |
| fContentBuffer.clear(); |
| scanCharReferenceValue(fContentBuffer, null); |
| fMarkupDepth--; |
| setScannerState(SCANNER_STATE_CONTENT); |
| return XMLEvent.CHARACTERS; |
| } |
| default: |
| throw new XNIException("Scanner State " + fScannerState + " not Recognized "); |
| |
| }//switch |
| } |
| // premature end of file |
| catch (EOFException e) { |
| endOfFileHook(e); |
| return -1; |
| } |
| } //while loop |
| }//next |
| |
| // |
| // Protected methods |
| // |
| |
| // hooks |
| |
| // NOTE: These hook methods are added so that the full document |
| // scanner can share the majority of code with this class. |
| |
| /** |
| * Scan for DOCTYPE hook. This method is a hook for subclasses |
| * to add code to handle scanning for a the "DOCTYPE" string |
| * after the string "<!" has been scanned. |
| * |
| * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" |
| * was not scanned. |
| */ |
| protected boolean scanForDoctypeHook() |
| throws IOException, XNIException { |
| return false; |
| } // scanForDoctypeHook():boolean |
| |
| /** |
| * Element depth iz zero. This methos is a hook for subclasses |
| * to add code to handle when the element depth hits zero. When |
| * scanning a document fragment, an element depth of zero is |
| * normal. However, when scanning a full XML document, the |
| * scanner must handle the trailing miscellanous section of |
| * the document after the end of the document's root element. |
| * |
| * @return True if the caller should stop and return true which |
| * allows the scanner to switch to a new scanning |
| * driver. A return value of false indicates that |
| * the content driver should continue as normal. |
| */ |
| protected boolean elementDepthIsZeroHook() |
| throws IOException, XNIException { |
| return false; |
| } // elementDepthIsZeroHook():boolean |
| |
| /** |
| * Scan for root element hook. This method is a hook for |
| * subclasses to add code that handles scanning for the root |
| * element. When scanning a document fragment, there is no |
| * "root" element. However, when scanning a full XML document, |
| * the scanner must handle the root element specially. |
| * |
| * @return True if the caller should stop and return true which |
| * allows the scanner to switch to a new scanning |
| * driver. A return value of false indicates that |
| * the content driver should continue as normal. |
| */ |
| protected boolean scanRootElementHook() |
| throws IOException, XNIException { |
| return false; |
| } // scanRootElementHook():boolean |
| |
| /** |
| * End of file hook. This method is a hook for subclasses to |
| * add code that handles the end of file. The end of file in |
| * a document fragment is OK if the markup depth is zero. |
| * However, when scanning a full XML document, an end of file |
| * is always premature. |
| */ |
| protected void endOfFileHook(EOFException e) |
| throws IOException, XNIException { |
| |
| // NOTE: An end of file is only only an error if we were |
| // in the middle of scanning some markup. -Ac |
| if (fMarkupDepth != 0) { |
| reportFatalError("PrematureEOF", null); |
| } |
| |
| } // endOfFileHook() |
| |
| } // class FragmentContentDriver |
| |
| static void pr(String str) { |
| System.out.println(str) ; |
| } |
| |
| protected boolean fUsebuffer ; |
| |
| /** this function gets an XMLString (which is used to store the attribute value) from the special pool |
| * maintained for attributes. |
| * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. |
| * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same |
| * XMLString. |
| * |
| * @return XMLString XMLString used to store an attribute value. |
| */ |
| |
| protected XMLString getString(){ |
| if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ |
| return attributeValueCache.get(fAttributeCacheUsedCount++); |
| } else{ |
| XMLString str = new XMLString(); |
| fAttributeCacheUsedCount++; |
| attributeValueCache.add(str); |
| return str; |
| } |
| } |
| |
| /** |
| * Implements XMLBufferListener interface. |
| */ |
| |
| public void refresh(){ |
| refresh(0); |
| } |
| |
| /** |
| * receives callbacks from {@link XMLEntityReader } when buffer |
| * is being changed. |
| * @param refreshPosition |
| */ |
| public void refresh(int refreshPosition){ |
| //If you are reading attributes and you got a callback |
| //cache available attributes. |
| if(fReadingAttributes){ |
| fAttributes.refresh(); |
| } |
| if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ |
| //since fTempString directly matches to the underlying main buffer |
| //store the data into buffer |
| fContentBuffer.append(fTempString); |
| //clear the XMLString so that data can't be added again. |
| fTempString.length = 0; |
| fUsebuffer = true; |
| } |
| } |
| |
| } // class XMLDocumentFragmentScannerImpl |