| /* |
| * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. |
| */ |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| // Aug 21, 2000: |
| // Added ability to omit DOCTYPE declaration. |
| // Reported by Lars Martin <lars@smb-tec.com> |
| // Aug 25, 2000: |
| // Added ability to omit comments. |
| // Contributed by Anupam Bagchi <abagchi@jtcsv.com> |
| |
| |
| package com.sun.org.apache.xml.internal.serialize; |
| |
| |
| import java.io.UnsupportedEncodingException; |
| |
| import org.w3c.dom.Document; |
| import org.w3c.dom.DocumentType; |
| import org.w3c.dom.Node; |
| |
| |
| /** |
| * Specifies an output format to control the serializer. Based on the |
| * XSLT specification for output format, plus additional parameters. |
| * Used to select the suitable serializer and determine how the |
| * document should be formatted on output. |
| * <p> |
| * The two interesting constructors are: |
| * <ul> |
| * <li>{@link #OutputFormat(String,String,boolean)} creates a format |
| * for the specified method (XML, HTML, Text, etc), encoding and indentation |
| * <li>{@link #OutputFormat(Document,String,boolean)} creates a format |
| * compatible with the document type (XML, HTML, Text, etc), encoding and |
| * indentation |
| * </ul> |
| * |
| * |
| * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> |
| * <a href="mailto:visco@intalio.com">Keith Visco</a> |
| * @see Serializer |
| * @see Method |
| * @see LineSeparator |
| * |
| * @deprecated As of JDK 9, Xerces 2.9.0, Xerces DOM L3 Serializer implementation |
| * is replaced by that of Xalan. Main class |
| * {@link com.sun.org.apache.xml.internal.serialize.DOMSerializerImpl} is replaced |
| * by {@link com.sun.org.apache.xml.internal.serializer.dom3.LSSerializerImpl}. |
| */ |
| @Deprecated |
| public class OutputFormat |
| { |
| |
| |
| public static class DTD |
| { |
| |
| /** |
| * Public identifier for HTML 4.01 (Strict) document type. |
| */ |
| public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN"; |
| |
| /** |
| * System identifier for HTML 4.01 (Strict) document type. |
| */ |
| public static final String HTMLSystemId = |
| "http://www.w3.org/TR/html4/strict.dtd"; |
| |
| /** |
| * Public identifier for XHTML 1.0 (Strict) document type. |
| */ |
| public static final String XHTMLPublicId = |
| "-//W3C//DTD XHTML 1.0 Strict//EN"; |
| |
| /** |
| * System identifier for XHTML 1.0 (Strict) document type. |
| */ |
| public static final String XHTMLSystemId = |
| "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; |
| |
| } |
| |
| |
| public static class Defaults |
| { |
| |
| /** |
| * If indentation is turned on, the default identation |
| * level is 4. |
| * |
| * @see #setIndenting(boolean) |
| */ |
| public static final int Indent = 4; |
| |
| /** |
| * The default encoding for Web documents it UTF-8. |
| * |
| * @see #getEncoding() |
| */ |
| public static final String Encoding = "UTF-8"; |
| |
| /** |
| * The default line width at which to break long lines |
| * when identing. This is set to 72. |
| */ |
| public static final int LineWidth = 72; |
| |
| } |
| |
| |
| /** |
| * Holds the output method specified for this document, |
| * or null if no method was specified. |
| */ |
| private String _method; |
| |
| |
| /** |
| * Specifies the version of the output method. |
| */ |
| private String _version; |
| |
| |
| /** |
| * The indentation level, or zero if no indentation |
| * was requested. |
| */ |
| private int _indent = 0; |
| |
| |
| /** |
| * The encoding to use, if an input stream is used. |
| * The default is always UTF-8. |
| */ |
| private String _encoding = Defaults.Encoding; |
| |
| /** |
| * The EncodingInfo instance for _encoding. |
| */ |
| private EncodingInfo _encodingInfo = null; |
| |
| // whether java names for encodings are permitted |
| private boolean _allowJavaNames = false; |
| |
| /** |
| * The specified media type or null. |
| */ |
| private String _mediaType; |
| |
| |
| /** |
| * The specified document type system identifier, or null. |
| */ |
| private String _doctypeSystem; |
| |
| |
| /** |
| * The specified document type public identifier, or null. |
| */ |
| private String _doctypePublic; |
| |
| |
| /** |
| * Ture if the XML declaration should be ommited; |
| */ |
| private boolean _omitXmlDeclaration = false; |
| |
| |
| /** |
| * Ture if the DOCTYPE declaration should be ommited; |
| */ |
| private boolean _omitDoctype = false; |
| |
| |
| /** |
| * Ture if comments should be ommited; |
| */ |
| private boolean _omitComments = false; |
| |
| |
| /** |
| * Ture if the comments should be ommited; |
| */ |
| private boolean _stripComments = false; |
| |
| |
| /** |
| * True if the document type should be marked as standalone. |
| */ |
| private boolean _standalone = false; |
| |
| |
| /** |
| * List of element tag names whose text node children must |
| * be output as CDATA. |
| */ |
| private String[] _cdataElements; |
| |
| |
| /** |
| * List of element tag names whose text node children must |
| * be output unescaped. |
| */ |
| private String[] _nonEscapingElements; |
| |
| |
| /** |
| * The selected line separator. |
| */ |
| private String _lineSeparator = LineSeparator.Web; |
| |
| |
| /** |
| * The line width at which to wrap long lines when indenting. |
| */ |
| private int _lineWidth = Defaults.LineWidth; |
| |
| |
| /** |
| * True if spaces should be preserved in elements that do not |
| * specify otherwise, or specify the default behavior. |
| */ |
| private boolean _preserve = false; |
| /** If true, an empty string valued attribute is output as "". If false and |
| * and we are using the HTMLSerializer, then only the attribute name is |
| * serialized. Defaults to false for backwards compatibility. |
| */ |
| private boolean _preserveEmptyAttributes = false; |
| |
| /** |
| * Constructs a new output format with the default values. |
| */ |
| public OutputFormat() |
| { |
| } |
| |
| |
| /** |
| * Constructs a new output format with the default values for |
| * the specified method and encoding. If <tt>indent</tt> |
| * is true, the document will be pretty printed with the default |
| * indentation level and default line wrapping. |
| * |
| * @param method The specified output method |
| * @param encoding The specified encoding |
| * @param indenting True for pretty printing |
| * @see #setEncoding |
| * @see #setIndenting |
| * @see #setMethod |
| */ |
| public OutputFormat( String method, String encoding, boolean indenting ) |
| { |
| setMethod( method ); |
| setEncoding( encoding ); |
| setIndenting( indenting ); |
| } |
| |
| /** |
| * Returns the method specified for this output format. |
| * Typically the method will be <tt>xml</tt>, <tt>html</tt> |
| * or <tt>text</tt>, but it might be other values. |
| * If no method was specified, null will be returned |
| * and the most suitable method will be determined for |
| * the document by calling {@link #whichMethod}. |
| * |
| * @return The specified output method, or null |
| */ |
| public String getMethod() |
| { |
| return _method; |
| } |
| |
| |
| /** |
| * Sets the method for this output format. |
| * |
| * @see #getMethod |
| * @param method The output method, or null |
| */ |
| public void setMethod( String method ) |
| { |
| _method = method; |
| } |
| |
| |
| /** |
| * Returns the version for this output method. |
| * If no version was specified, will return null |
| * and the default version number will be used. |
| * If the serializerr does not support that particular |
| * version, it should default to a supported version. |
| * |
| * @return The specified method version, or null |
| */ |
| public String getVersion() |
| { |
| return _version; |
| } |
| |
| |
| /** |
| * Sets the version for this output method. |
| * For XML the value would be "1.0", for HTML |
| * it would be "4.0". |
| * |
| * @see #getVersion |
| * @param version The output method version, or null |
| */ |
| public void setVersion( String version ) |
| { |
| _version = version; |
| } |
| |
| |
| /** |
| * Returns the indentation specified. If no indentation |
| * was specified, zero is returned and the document |
| * should not be indented. |
| * |
| * @return The indentation or zero |
| * @see #setIndenting |
| */ |
| public int getIndent() |
| { |
| return _indent; |
| } |
| |
| |
| /** |
| * Returns true if indentation was specified. |
| */ |
| public boolean getIndenting() |
| { |
| return ( _indent > 0 ); |
| } |
| |
| |
| /** |
| * Sets the indentation. The document will not be |
| * indented if the indentation is set to zero. |
| * Calling {@link #setIndenting} will reset this |
| * value to zero (off) or the default (on). |
| * |
| * @param indent The indentation, or zero |
| */ |
| public void setIndent( int indent ) |
| { |
| if ( indent < 0 ) |
| _indent = 0; |
| else |
| _indent = indent; |
| } |
| |
| |
| /** |
| * Sets the indentation on and off. When set on, the default |
| * indentation level and default line wrapping is used |
| * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}). |
| * To specify a different indentation level or line wrapping, |
| * use {@link #setIndent} and {@link #setLineWidth}. |
| * |
| * @param on True if indentation should be on |
| */ |
| public void setIndenting( boolean on ) |
| { |
| if ( on ) { |
| _indent = Defaults.Indent; |
| _lineWidth = Defaults.LineWidth; |
| } else { |
| _indent = 0; |
| _lineWidth = 0; |
| } |
| } |
| |
| |
| /** |
| * Returns the specified encoding. If no encoding was |
| * specified, the default is always "UTF-8". |
| * |
| * @return The encoding |
| */ |
| public String getEncoding() |
| { |
| return _encoding; |
| } |
| |
| |
| /** |
| * Sets the encoding for this output method. If no |
| * encoding was specified, the default is always "UTF-8". |
| * Make sure the encoding is compatible with the one |
| * used by the {@link java.io.Writer}. |
| * |
| * @see #getEncoding |
| * @param encoding The encoding, or null |
| */ |
| public void setEncoding( String encoding ) |
| { |
| _encoding = encoding; |
| _encodingInfo = null; |
| } |
| |
| /** |
| * Sets the encoding for this output method with an <code>EncodingInfo</code> |
| * instance. |
| */ |
| public void setEncoding(EncodingInfo encInfo) { |
| _encoding = encInfo.getIANAName(); |
| _encodingInfo = encInfo; |
| } |
| |
| /** |
| * Returns an <code>EncodingInfo<code> instance for the encoding. |
| * |
| * @see #setEncoding |
| */ |
| public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException { |
| if (_encodingInfo == null) |
| _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames); |
| return _encodingInfo; |
| } |
| |
| /** |
| * Sets whether java encoding names are permitted |
| */ |
| public void setAllowJavaNames (boolean allow) { |
| _allowJavaNames = allow; |
| } |
| |
| /** |
| * Returns whether java encoding names are permitted |
| */ |
| public boolean setAllowJavaNames () { |
| return _allowJavaNames; |
| } |
| |
| /** |
| * Returns the specified media type, or null. |
| * To determine the media type based on the |
| * document type, use {@link #whichMediaType}. |
| * |
| * @return The specified media type, or null |
| */ |
| public String getMediaType() |
| { |
| return _mediaType; |
| } |
| |
| |
| /** |
| * Sets the media type. |
| * |
| * @see #getMediaType |
| * @param mediaType The specified media type |
| */ |
| public void setMediaType( String mediaType ) |
| { |
| _mediaType = mediaType; |
| } |
| |
| |
| /** |
| * Sets the document type public and system identifiers. |
| * Required only if the DOM Document or SAX events do not |
| * specify the document type, and one must be present in |
| * the serialized document. Any document type specified |
| * by the DOM Document or SAX events will override these |
| * values. |
| * |
| * @param publicId The public identifier, or null |
| * @param systemId The system identifier, or null |
| */ |
| public void setDoctype( String publicId, String systemId ) |
| { |
| _doctypePublic = publicId; |
| _doctypeSystem = systemId; |
| } |
| |
| |
| /** |
| * Returns the specified document type public identifier, |
| * or null. |
| */ |
| public String getDoctypePublic() |
| { |
| return _doctypePublic; |
| } |
| |
| |
| /** |
| * Returns the specified document type system identifier, |
| * or null. |
| */ |
| public String getDoctypeSystem() |
| { |
| return _doctypeSystem; |
| } |
| |
| |
| /** |
| * Returns true if comments should be ommited. |
| * The default is false. |
| */ |
| public boolean getOmitComments() |
| { |
| return _omitComments; |
| } |
| |
| |
| /** |
| * Sets comment omitting on and off. |
| * |
| * @param omit True if comments should be ommited |
| */ |
| public void setOmitComments( boolean omit ) |
| { |
| _omitComments = omit; |
| } |
| |
| |
| /** |
| * Returns true if the DOCTYPE declaration should |
| * be ommited. The default is false. |
| */ |
| public boolean getOmitDocumentType() |
| { |
| return _omitDoctype; |
| } |
| |
| |
| /** |
| * Sets DOCTYPE declaration omitting on and off. |
| * |
| * @param omit True if DOCTYPE declaration should be ommited |
| */ |
| public void setOmitDocumentType( boolean omit ) |
| { |
| _omitDoctype = omit; |
| } |
| |
| |
| /** |
| * Returns true if the XML document declaration should |
| * be ommited. The default is false. |
| */ |
| public boolean getOmitXMLDeclaration() |
| { |
| return _omitXmlDeclaration; |
| } |
| |
| |
| /** |
| * Sets XML declaration omitting on and off. |
| * |
| * @param omit True if XML declaration should be ommited |
| */ |
| public void setOmitXMLDeclaration( boolean omit ) |
| { |
| _omitXmlDeclaration = omit; |
| } |
| |
| |
| /** |
| * Returns true if the document type is standalone. |
| * The default is false. |
| */ |
| public boolean getStandalone() |
| { |
| return _standalone; |
| } |
| |
| |
| /** |
| * Sets document DTD standalone. The public and system |
| * identifiers must be null for the document to be |
| * serialized as standalone. |
| * |
| * @param standalone True if document DTD is standalone |
| */ |
| public void setStandalone( boolean standalone ) |
| { |
| _standalone = standalone; |
| } |
| |
| |
| /** |
| * Returns a list of all the elements whose text node children |
| * should be output as CDATA, or null if no such elements were |
| * specified. |
| */ |
| public String[] getCDataElements() |
| { |
| return _cdataElements; |
| } |
| |
| |
| /** |
| * Returns true if the text node children of the given elements |
| * should be output as CDATA. |
| * |
| * @param tagName The element's tag name |
| * @return True if should serialize as CDATA |
| */ |
| public boolean isCDataElement( String tagName ) |
| { |
| int i; |
| |
| if ( _cdataElements == null ) |
| return false; |
| for ( i = 0 ; i < _cdataElements.length ; ++i ) |
| if ( _cdataElements[ i ].equals( tagName ) ) |
| return true; |
| return false; |
| } |
| |
| |
| /** |
| * Sets the list of elements for which text node children |
| * should be output as CDATA. |
| * |
| * @param cdataElements List of CDATA element tag names |
| */ |
| public void setCDataElements( String[] cdataElements ) |
| { |
| _cdataElements = cdataElements; |
| } |
| |
| |
| /** |
| * Returns a list of all the elements whose text node children |
| * should be output unescaped (no character references), or null |
| * if no such elements were specified. |
| */ |
| public String[] getNonEscapingElements() |
| { |
| return _nonEscapingElements; |
| } |
| |
| |
| /** |
| * Returns true if the text node children of the given elements |
| * should be output unescaped. |
| * |
| * @param tagName The element's tag name |
| * @return True if should serialize unescaped |
| */ |
| public boolean isNonEscapingElement( String tagName ) |
| { |
| int i; |
| |
| if ( _nonEscapingElements == null ) { |
| return false; |
| } |
| for ( i = 0 ; i < _nonEscapingElements.length ; ++i ) |
| if ( _nonEscapingElements[ i ].equals( tagName ) ) |
| return true; |
| return false; |
| } |
| |
| |
| /** |
| * Sets the list of elements for which text node children |
| * should be output unescaped (no character references). |
| * |
| * @param nonEscapingElements List of unescaped element tag names |
| */ |
| public void setNonEscapingElements( String[] nonEscapingElements ) |
| { |
| _nonEscapingElements = nonEscapingElements; |
| } |
| |
| |
| |
| /** |
| * Returns a specific line separator to use. The default is the |
| * Web line separator (<tt>\n</tt>). A string is returned to |
| * support double codes (CR + LF). |
| * |
| * @return The specified line separator |
| */ |
| public String getLineSeparator() |
| { |
| return _lineSeparator; |
| } |
| |
| |
| /** |
| * Sets the line separator. The default is the Web line separator |
| * (<tt>\n</tt>). The machine's line separator can be obtained |
| * from the system property <tt>line.separator</tt>, but is only |
| * useful if the document is edited on machines of the same type. |
| * For general documents, use the Web line separator. |
| * |
| * @param lineSeparator The specified line separator |
| */ |
| public void setLineSeparator( String lineSeparator ) |
| { |
| if ( lineSeparator == null ) |
| _lineSeparator = LineSeparator.Web; |
| else |
| _lineSeparator = lineSeparator; |
| } |
| |
| |
| /** |
| * Returns true if the default behavior for this format is to |
| * preserve spaces. All elements that do not specify otherwise |
| * or specify the default behavior will be formatted based on |
| * this rule. All elements that specify space preserving will |
| * always preserve space. |
| */ |
| public boolean getPreserveSpace() |
| { |
| return _preserve; |
| } |
| |
| |
| /** |
| * Sets space preserving as the default behavior. The default is |
| * space stripping and all elements that do not specify otherwise |
| * or use the default value will not preserve spaces. |
| * |
| * @param preserve True if spaces should be preserved |
| */ |
| public void setPreserveSpace( boolean preserve ) |
| { |
| _preserve = preserve; |
| } |
| |
| |
| /** |
| * Return the selected line width for breaking up long lines. |
| * When indenting, and only when indenting, long lines will be |
| * broken at space boundaries based on this line width. |
| * No line wrapping occurs if this value is zero. |
| */ |
| public int getLineWidth() |
| { |
| return _lineWidth; |
| } |
| |
| |
| /** |
| * Sets the line width. If zero then no line wrapping will |
| * occur. Calling {@link #setIndenting} will reset this |
| * value to zero (off) or the default (on). |
| * |
| * @param lineWidth The line width to use, zero for default |
| * @see #getLineWidth |
| * @see #setIndenting |
| */ |
| public void setLineWidth( int lineWidth ) |
| { |
| if ( lineWidth <= 0 ) |
| _lineWidth = 0; |
| else |
| _lineWidth = lineWidth; |
| } |
| /** |
| * Returns the preserveEmptyAttribute flag. If flag is false, then' |
| * attributes with empty string values are output as the attribute |
| * name only (in HTML mode). |
| * @return preserve the preserve flag |
| */ public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /** |
| * Sets the preserveEmptyAttribute flag. If flag is false, then' |
| * attributes with empty string values are output as the attribute |
| * name only (in HTML mode). |
| * @param preserve the preserve flag |
| */ public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; } |
| |
| /** |
| * Returns the last printable character based on the selected |
| * encoding. Control characters and non-printable characters |
| * are always printed as character references. |
| */ |
| public char getLastPrintable() |
| { |
| if ( getEncoding() != null && |
| ( getEncoding().equalsIgnoreCase( "ASCII" ) ) ) |
| return 0xFF; |
| else |
| return 0xFFFF; |
| } |
| |
| |
| /** |
| * Returns the suitable media format for a document |
| * output with the specified method. |
| */ |
| public static String whichMediaType( String method ) |
| { |
| if ( method.equalsIgnoreCase( Method.XML ) ) |
| return "text/xml"; |
| if ( method.equalsIgnoreCase( Method.HTML ) ) |
| return "text/html"; |
| if ( method.equalsIgnoreCase( Method.XHTML ) ) |
| return "text/html"; |
| if ( method.equalsIgnoreCase( Method.TEXT ) ) |
| return "text/plain"; |
| if ( method.equalsIgnoreCase( Method.FOP ) ) |
| return "application/pdf"; |
| return null; |
| } |
| |
| |
| } |