| /* |
| * reserved comment block |
| * DO NOT REMOVE OR ALTER! |
| */ |
| /* |
| * Copyright 1999-2002,2004,2005 The Apache Software Foundation. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| |
| // Sep 14, 2000: |
| // Fixed problem with namespace handling. Contributed by |
| // David Blondeau <blondeau@intalio.com> |
| // Sep 14, 2000: |
| // Fixed serializer to report IO exception directly, instead at |
| // the end of document processing. |
| // Reported by Patrick Higgins <phiggins@transzap.com> |
| // Aug 21, 2000: |
| // Fixed bug in startDocument not calling prepare. |
| // Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se> |
| // Aug 21, 2000: |
| // Added ability to omit DOCTYPE declaration. |
| |
| |
| package com.sun.org.apache.xml.internal.serialize; |
| |
| |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.io.Writer; |
| |
| import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; |
| import com.sun.org.apache.xerces.internal.impl.Constants; |
| import com.sun.org.apache.xerces.internal.util.NamespaceSupport; |
| import com.sun.org.apache.xerces.internal.util.SymbolTable; |
| import com.sun.org.apache.xerces.internal.util.XML11Char; |
| import com.sun.org.apache.xerces.internal.util.XMLChar; |
| import org.xml.sax.SAXException; |
| import org.w3c.dom.DOMError; |
| |
| /** |
| * Implements an XML serializer supporting both DOM and SAX pretty |
| * serializing. For usage instructions see {@link Serializer}. |
| * <p> |
| * If an output stream is used, the encoding is taken from the |
| * output format (defaults to <tt>UTF-8</tt>). If a writer is |
| * used, make sure the writer uses the same encoding (if applies) |
| * as specified in the output format. |
| * <p> |
| * The serializer supports both DOM and SAX. SAX serializing is done by firing |
| * SAX events and using the serializer as a document handler. DOM serializing is done |
| * by calling {@link #serialize(Document)} or by using DOM Level 3 |
| * {@link org.w3c.dom.ls.DOMSerializer} and |
| * serializing with {@link org.w3c.dom.ls.DOMSerializer#write}, |
| * {@link org.w3c.dom.ls.DOMSerializer#writeToString}. |
| * <p> |
| * If an I/O exception occurs while serializing, the serializer |
| * will not throw an exception directly, but only throw it |
| * at the end of serializing (either DOM or SAX's {@link |
| * org.xml.sax.DocumentHandler#endDocument}. |
| * <p> |
| * For elements that are not specified as whitespace preserving, |
| * the serializer will potentially break long text lines at space |
| * boundaries, indent lines, and serialize elements on separate |
| * lines. Line terminators will be regarded as spaces, and |
| * spaces at beginning of line will be stripped. |
| * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> |
| * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a> |
| * @author Elena Litani IBM |
| * @see Serializer |
| */ |
| public class XML11Serializer |
| extends XMLSerializer { |
| |
| // |
| // constants |
| // |
| |
| protected static final boolean DEBUG = false; |
| |
| // |
| // data |
| // |
| |
| // |
| // DOM Level 3 implementation: variables intialized in DOMSerializerImpl |
| // |
| |
| /** stores namespaces in scope */ |
| protected NamespaceSupport fNSBinder; |
| |
| /** stores all namespace bindings on the current element */ |
| protected NamespaceSupport fLocalNSBinder; |
| |
| /** symbol table for serialization */ |
| protected SymbolTable fSymbolTable; |
| |
| // is node dom level 1 node? |
| protected boolean fDOML1 = false; |
| // counter for new prefix names |
| protected int fNamespaceCounter = 1; |
| protected final static String PREFIX = "NS"; |
| |
| /** |
| * Controls whether namespace fixup should be performed during |
| * the serialization. |
| * NOTE: if this field is set to true the following |
| * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable, |
| * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter. |
| */ |
| protected boolean fNamespaces = false; |
| |
| |
| private boolean fPreserveSpace; |
| |
| |
| /** |
| * Constructs a new serializer. The serializer cannot be used without |
| * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} |
| * first. |
| */ |
| public XML11Serializer() { |
| super( ); |
| _format.setVersion("1.1"); |
| } |
| |
| |
| /** |
| * Constructs a new serializer. The serializer cannot be used without |
| * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} |
| * first. |
| */ |
| public XML11Serializer( OutputFormat format ) { |
| super( format ); |
| _format.setVersion("1.1"); |
| } |
| |
| |
| /** |
| * Constructs a new serializer that writes to the specified writer |
| * using the specified output format. If <tt>format</tt> is null, |
| * will use a default output format. |
| * |
| * @param writer The writer to use |
| * @param format The output format to use, null for the default |
| */ |
| public XML11Serializer( Writer writer, OutputFormat format ) { |
| super( writer, format ); |
| _format.setVersion("1.1"); |
| } |
| |
| |
| /** |
| * Constructs a new serializer that writes to the specified output |
| * stream using the specified output format. If <tt>format</tt> |
| * is null, will use a default output format. |
| * |
| * @param output The output stream to use |
| * @param format The output format to use, null for the default |
| */ |
| public XML11Serializer( OutputStream output, OutputFormat format ) { |
| super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) ); |
| _format.setVersion("1.1"); |
| } |
| |
| //-----------------------------------------// |
| // SAX content handler serializing methods // |
| //-----------------------------------------// |
| |
| |
| public void characters( char[] chars, int start, int length ) |
| throws SAXException |
| { |
| ElementState state; |
| |
| try { |
| state = content(); |
| |
| // Check if text should be print as CDATA section or unescaped |
| // based on elements listed in the output format (the element |
| // state) or whether we are inside a CDATA section or entity. |
| |
| if ( state.inCData || state.doCData ) { |
| int saveIndent; |
| |
| // Print a CDATA section. The text is not escaped, but ']]>' |
| // appearing in the code must be identified and dealt with. |
| // The contents of a text node is considered space preserving. |
| if ( ! state.inCData ) { |
| _printer.printText( "<![CDATA[" ); |
| state.inCData = true; |
| } |
| saveIndent = _printer.getNextIndent(); |
| _printer.setNextIndent( 0 ); |
| char ch; |
| final int end = start + length; |
| for ( int index = start; index < end; ++index ) { |
| ch = chars[index]; |
| if ( ch == ']' && index + 2 < end && |
| chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { |
| _printer.printText("]]]]><![CDATA[>"); |
| index +=2; |
| continue; |
| } |
| if (!XML11Char.isXML11Valid(ch)) { |
| // check if it is surrogate |
| if (++index < end) { |
| surrogates(ch, chars[index]); |
| } |
| else { |
| fatalError("The character '"+(char)ch+"' is an invalid XML character"); |
| } |
| continue; |
| } else { |
| if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) { |
| _printer.printText((char)ch); |
| } else { |
| // The character is not printable -- split CDATA section |
| _printer.printText("]]>&#x"); |
| _printer.printText(Integer.toHexString(ch)); |
| _printer.printText(";<![CDATA["); |
| } |
| } |
| } |
| _printer.setNextIndent( saveIndent ); |
| |
| } else { |
| |
| int saveIndent; |
| |
| if ( state.preserveSpace ) { |
| // If preserving space then hold of indentation so no |
| // excessive spaces are printed at line breaks, escape |
| // the text content without replacing spaces and print |
| // the text breaking only at line breaks. |
| saveIndent = _printer.getNextIndent(); |
| _printer.setNextIndent( 0 ); |
| printText( chars, start, length, true, state.unescaped ); |
| _printer.setNextIndent( saveIndent ); |
| } else { |
| printText( chars, start, length, false, state.unescaped ); |
| } |
| } |
| } catch ( IOException except ) { |
| throw new SAXException( except ); |
| } |
| } |
| |
| |
| // |
| // overwrite printing functions to make sure serializer prints out valid XML |
| // |
| protected void printEscaped( String source ) throws IOException { |
| int length = source.length(); |
| for ( int i = 0 ; i < length ; ++i ) { |
| int ch = source.charAt(i); |
| if (!XML11Char.isXML11Valid(ch)) { |
| if (++i <length) { |
| surrogates(ch, source.charAt(i)); |
| } else { |
| fatalError("The character '"+(char)ch+"' is an invalid XML character"); |
| } |
| continue; |
| } |
| if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){ |
| printHex(ch); |
| } else if (ch == '<') { |
| _printer.printText("<"); |
| } else if (ch == '&') { |
| _printer.printText("&"); |
| } else if (ch == '"') { |
| _printer.printText("""); |
| } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) { |
| _printer.printText((char) ch); |
| } else { |
| printHex(ch); |
| } |
| } |
| } |
| |
| protected final void printCDATAText(String text) throws IOException { |
| int length = text.length(); |
| char ch; |
| |
| for (int index = 0; index < length; ++index) { |
| ch = text.charAt(index); |
| |
| if (ch == ']' |
| && index + 2 < length |
| && text.charAt(index + 1) == ']' |
| && text.charAt(index + 2) == '>') { // check for ']]>' |
| if (fDOMErrorHandler != null){ |
| // REVISIT: this means that if DOM Error handler is not registered we don't report any |
| // fatal errors and might serialize not wellformed document |
| if ((features & DOMSerializerImpl.SPLITCDATA) == 0 |
| && (features & DOMSerializerImpl.WELLFORMED) == 0) { |
| // issue fatal error |
| String msg = |
| DOMMessageFormatter.formatMessage( |
| DOMMessageFormatter.SERIALIZER_DOMAIN, |
| "EndingCDATA", |
| null); |
| modifyDOMError( |
| msg, |
| DOMError.SEVERITY_FATAL_ERROR, |
| null, fCurrentNode); |
| boolean continueProcess = |
| fDOMErrorHandler.handleError(fDOMError); |
| if (!continueProcess) { |
| throw new IOException(); |
| } |
| } else { |
| // issue warning |
| String msg = |
| DOMMessageFormatter.formatMessage( |
| DOMMessageFormatter.SERIALIZER_DOMAIN, |
| "SplittingCDATA", |
| null); |
| modifyDOMError( |
| msg, |
| DOMError.SEVERITY_WARNING, |
| null, fCurrentNode); |
| fDOMErrorHandler.handleError(fDOMError); |
| } |
| } |
| // split CDATA section |
| _printer.printText("]]]]><![CDATA[>"); |
| index += 2; |
| continue; |
| } |
| |
| if (!XML11Char.isXML11Valid(ch)) { |
| // check if it is surrogate |
| if (++index < length) { |
| surrogates(ch, text.charAt(index)); |
| } else { |
| fatalError( |
| "The character '" |
| + (char) ch |
| + "' is an invalid XML character"); |
| } |
| continue; |
| } else { |
| if (_encodingInfo.isPrintable((char) ch) |
| && XML11Char.isXML11ValidLiteral(ch)) { |
| _printer.printText((char) ch); |
| } else { |
| |
| // The character is not printable -- split CDATA section |
| _printer.printText("]]>&#x"); |
| _printer.printText(Integer.toHexString(ch)); |
| _printer.printText(";<![CDATA["); |
| } |
| } |
| } |
| } |
| |
| |
| // note that this "int" should, in all cases, be a char. |
| // REVISIT: make it a char... |
| protected final void printXMLChar( int ch ) throws IOException { |
| |
| if (ch == '\r' || ch == 0x0085 || ch == 0x2028) { |
| printHex(ch); |
| } else if ( ch == '<') { |
| _printer.printText("<"); |
| } else if (ch == '&') { |
| _printer.printText("&"); |
| } else if (ch == '>'){ |
| // character sequence "]]>" can't appear in content, therefore |
| // we should escape '>' |
| _printer.printText(">"); |
| } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) { |
| _printer.printText((char)ch); |
| } else { |
| printHex(ch); |
| } |
| } |
| |
| |
| |
| protected final void surrogates(int high, int low) throws IOException{ |
| if (XMLChar.isHighSurrogate(high)) { |
| if (!XMLChar.isLowSurrogate(low)) { |
| //Invalid XML |
| fatalError("The character '"+(char)low+"' is an invalid XML character"); |
| } |
| else { |
| int supplemental = XMLChar.supplemental((char)high, (char)low); |
| if (!XML11Char.isXML11Valid(supplemental)) { |
| //Invalid XML |
| fatalError("The character '"+(char)supplemental+"' is an invalid XML character"); |
| } |
| else { |
| if (content().inCData ) { |
| _printer.printText("]]>&#x"); |
| _printer.printText(Integer.toHexString(supplemental)); |
| _printer.printText(";<![CDATA["); |
| } |
| else { |
| printHex(supplemental); |
| } |
| } |
| } |
| } else { |
| fatalError("The character '"+(char)high+"' is an invalid XML character"); |
| } |
| |
| } |
| |
| |
| protected void printText( String text, boolean preserveSpace, boolean unescaped ) |
| throws IOException { |
| int index; |
| char ch; |
| int length = text.length(); |
| if ( preserveSpace ) { |
| // Preserving spaces: the text must print exactly as it is, |
| // without breaking when spaces appear in the text and without |
| // consolidating spaces. If a line terminator is used, a line |
| // break will occur. |
| for ( index = 0 ; index < length ; ++index ) { |
| ch = text.charAt( index ); |
| if (!XML11Char.isXML11Valid(ch)) { |
| // check if it is surrogate |
| if (++index <length) { |
| surrogates(ch, text.charAt(index)); |
| } else { |
| fatalError("The character '"+(char)ch+"' is an invalid XML character"); |
| } |
| continue; |
| } |
| if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) { |
| _printer.printText( ch ); |
| } else |
| printXMLChar( ch ); |
| } |
| } else { |
| // Not preserving spaces: print one part at a time, and |
| // use spaces between parts to break them into different |
| // lines. Spaces at beginning of line will be stripped |
| // by printing mechanism. Line terminator is treated |
| // no different than other text part. |
| for ( index = 0 ; index < length ; ++index ) { |
| ch = text.charAt( index ); |
| if (!XML11Char.isXML11Valid(ch)) { |
| // check if it is surrogate |
| if (++index <length) { |
| surrogates(ch, text.charAt(index)); |
| } else { |
| fatalError("The character '"+(char)ch+"' is an invalid XML character"); |
| } |
| continue; |
| } |
| |
| if ( unescaped && XML11Char.isXML11ValidLiteral(ch) ) |
| _printer.printText( ch ); |
| else |
| printXMLChar( ch); |
| } |
| } |
| } |
| |
| |
| |
| protected void printText( char[] chars, int start, int length, |
| boolean preserveSpace, boolean unescaped ) throws IOException { |
| int index; |
| char ch; |
| |
| if ( preserveSpace ) { |
| // Preserving spaces: the text must print exactly as it is, |
| // without breaking when spaces appear in the text and without |
| // consolidating spaces. If a line terminator is used, a line |
| // break will occur. |
| while ( length-- > 0 ) { |
| ch = chars[start++]; |
| if (!XML11Char.isXML11Valid(ch)) { |
| // check if it is surrogate |
| if ( length-- > 0) { |
| surrogates(ch, chars[start++]); |
| } else { |
| fatalError("The character '"+(char)ch+"' is an invalid XML character"); |
| } |
| continue; |
| } |
| if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) |
| _printer.printText( ch ); |
| else |
| printXMLChar( ch ); |
| } |
| } else { |
| // Not preserving spaces: print one part at a time, and |
| // use spaces between parts to break them into different |
| // lines. Spaces at beginning of line will be stripped |
| // by printing mechanism. Line terminator is treated |
| // no different than other text part. |
| while ( length-- > 0 ) { |
| ch = chars[start++]; |
| if (!XML11Char.isXML11Valid(ch)) { |
| // check if it is surrogate |
| if ( length-- > 0) { |
| surrogates(ch, chars[start++]); |
| } else { |
| fatalError("The character '"+(char)ch+"' is an invalid XML character"); |
| } |
| continue; |
| } |
| |
| if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) |
| _printer.printText( ch ); |
| else |
| printXMLChar( ch ); |
| } |
| } |
| } |
| |
| |
| public boolean reset() { |
| super.reset(); |
| return true; |
| |
| } |
| |
| } |