| /* |
| * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| * Copyright (C) 2009, 2010 Google Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "MarkupAccumulator.h" |
| |
| #include "CDATASection.h" |
| #include "Comment.h" |
| #include "DocumentFragment.h" |
| #include "DocumentType.h" |
| #include "Editor.h" |
| #include "HTMLElement.h" |
| #include "HTMLNames.h" |
| #include "KURL.h" |
| #include "ProcessingInstruction.h" |
| #include "XMLNSNames.h" |
| #include <wtf/unicode/CharacterNames.h> |
| |
| namespace WebCore { |
| |
| using namespace HTMLNames; |
| |
| void appendCharactersReplacingEntities(Vector<UChar>& out, const UChar* content, size_t length, EntityMask entityMask) |
| { |
| DEFINE_STATIC_LOCAL(const String, ampReference, ("&")); |
| DEFINE_STATIC_LOCAL(const String, ltReference, ("<")); |
| DEFINE_STATIC_LOCAL(const String, gtReference, (">")); |
| DEFINE_STATIC_LOCAL(const String, quotReference, (""")); |
| DEFINE_STATIC_LOCAL(const String, nbspReference, (" ")); |
| |
| static const EntityDescription entityMaps[] = { |
| { '&', ampReference, EntityAmp }, |
| { '<', ltReference, EntityLt }, |
| { '>', gtReference, EntityGt }, |
| { '"', quotReference, EntityQuot }, |
| { noBreakSpace, nbspReference, EntityNbsp }, |
| }; |
| |
| size_t positionAfterLastEntity = 0; |
| for (size_t i = 0; i < length; ++i) { |
| for (size_t m = 0; m < WTF_ARRAY_LENGTH(entityMaps); ++m) { |
| if (content[i] == entityMaps[m].entity && entityMaps[m].mask & entityMask) { |
| out.append(content + positionAfterLastEntity, i - positionAfterLastEntity); |
| append(out, entityMaps[m].reference); |
| positionAfterLastEntity = i + 1; |
| break; |
| } |
| } |
| } |
| out.append(content + positionAfterLastEntity, length - positionAfterLastEntity); |
| } |
| |
| MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs shouldResolveURLs, const Range* range) |
| : m_nodes(nodes) |
| , m_range(range) |
| , m_shouldResolveURLs(shouldResolveURLs) |
| { |
| } |
| |
| MarkupAccumulator::~MarkupAccumulator() |
| { |
| } |
| |
| String MarkupAccumulator::serializeNodes(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly) |
| { |
| Vector<UChar> out; |
| serializeNodesWithNamespaces(node, nodeToSkip, childrenOnly, 0); |
| out.reserveInitialCapacity(length()); |
| concatenateMarkup(out); |
| return String::adopt(out); |
| } |
| |
| void MarkupAccumulator::serializeNodesWithNamespaces(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces) |
| { |
| if (node == nodeToSkip) |
| return; |
| |
| Namespaces namespaceHash; |
| if (namespaces) |
| namespaceHash = *namespaces; |
| |
| if (!childrenOnly) |
| appendStartTag(node, &namespaceHash); |
| |
| if (!(node->document()->isHTMLDocument() && elementCannotHaveEndTag(node))) { |
| for (Node* current = node->firstChild(); current; current = current->nextSibling()) |
| serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash); |
| } |
| |
| if (!childrenOnly) |
| appendEndTag(node); |
| } |
| |
| void MarkupAccumulator::appendString(const String& string) |
| { |
| m_succeedingMarkup.append(string); |
| } |
| |
| void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces) |
| { |
| Vector<UChar> markup; |
| appendStartMarkup(markup, node, namespaces); |
| appendString(String::adopt(markup)); |
| if (m_nodes) |
| m_nodes->append(node); |
| } |
| |
| void MarkupAccumulator::appendEndTag(Node* node) |
| { |
| Vector<UChar> markup; |
| appendEndMarkup(markup, node); |
| appendString(String::adopt(markup)); |
| } |
| |
| size_t MarkupAccumulator::totalLength(const Vector<String>& strings) |
| { |
| size_t length = 0; |
| for (size_t i = 0; i < strings.size(); ++i) |
| length += strings[i].length(); |
| return length; |
| } |
| |
| // FIXME: This is a very inefficient way of accumulating the markup. |
| // We're converting results of appendStartMarkup and appendEndMarkup from Vector<UChar> to String |
| // and then back to Vector<UChar> and again to String here. |
| void MarkupAccumulator::concatenateMarkup(Vector<UChar>& out) |
| { |
| for (size_t i = 0; i < m_succeedingMarkup.size(); ++i) |
| append(out, m_succeedingMarkup[i]); |
| } |
| |
| void MarkupAccumulator::appendAttributeValue(Vector<UChar>& result, const String& attribute, bool documentIsHTML) |
| { |
| appendCharactersReplacingEntities(result, attribute.characters(), attribute.length(), |
| documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); |
| } |
| |
| void MarkupAccumulator::appendQuotedURLAttributeValue(Vector<UChar>& result, const String& urlString) |
| { |
| UChar quoteChar = '\"'; |
| String strippedURLString = urlString.stripWhiteSpace(); |
| if (protocolIsJavaScript(strippedURLString)) { |
| // minimal escaping for javascript urls |
| if (strippedURLString.contains('"')) { |
| if (strippedURLString.contains('\'')) |
| strippedURLString.replace('\"', """); |
| else |
| quoteChar = '\''; |
| } |
| result.append(quoteChar); |
| append(result, strippedURLString); |
| result.append(quoteChar); |
| return; |
| } |
| |
| // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. |
| result.append(quoteChar); |
| appendAttributeValue(result, urlString, false); |
| result.append(quoteChar); |
| } |
| |
| void MarkupAccumulator::appendNodeValue(Vector<UChar>& out, const Node* node, const Range* range, EntityMask entityMask) |
| { |
| String str = node->nodeValue(); |
| const UChar* characters = str.characters(); |
| size_t length = str.length(); |
| |
| if (range) { |
| ExceptionCode ec; |
| if (node == range->endContainer(ec)) |
| length = range->endOffset(ec); |
| if (node == range->startContainer(ec)) { |
| size_t start = range->startOffset(ec); |
| characters += start; |
| length -= start; |
| } |
| } |
| |
| appendCharactersReplacingEntities(out, characters, length, entityMask); |
| } |
| |
| bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element) |
| { |
| // Don't add namespace attribute if it is already defined for this elem. |
| const AtomicString& prefix = element->prefix(); |
| AtomicString attr = !prefix.isEmpty() ? "xmlns:" + prefix : "xmlns"; |
| return !element->hasAttribute(attr); |
| } |
| |
| bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces) |
| { |
| namespaces.checkConsistency(); |
| |
| // Don't add namespace attributes twice |
| if (attribute.name() == XMLNSNames::xmlnsAttr) { |
| namespaces.set(emptyAtom.impl(), attribute.value().impl()); |
| return false; |
| } |
| |
| QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI); |
| if (attribute.name() == xmlnsPrefixAttr) { |
| namespaces.set(attribute.localName().impl(), attribute.value().impl()); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void MarkupAccumulator::appendNamespace(Vector<UChar>& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces) |
| { |
| namespaces.checkConsistency(); |
| if (namespaceURI.isEmpty()) |
| return; |
| |
| // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key |
| AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl(); |
| AtomicStringImpl* foundNS = namespaces.get(pre); |
| if (foundNS != namespaceURI.impl()) { |
| namespaces.set(pre, namespaceURI.impl()); |
| result.append(' '); |
| append(result, xmlnsAtom.string()); |
| if (!prefix.isEmpty()) { |
| result.append(':'); |
| append(result, prefix); |
| } |
| |
| result.append('='); |
| result.append('"'); |
| appendAttributeValue(result, namespaceURI, false); |
| result.append('"'); |
| } |
| } |
| |
| EntityMask MarkupAccumulator::entityMaskForText(Text* text) const |
| { |
| const QualifiedName* parentName = 0; |
| if (text->parentElement()) |
| parentName = &static_cast<Element*>(text->parentElement())->tagQName(); |
| |
| if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) |
| return EntityMaskInCDATA; |
| |
| return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA; |
| } |
| |
| void MarkupAccumulator::appendText(Vector<UChar>& out, Text* text) |
| { |
| appendNodeValue(out, text, m_range, entityMaskForText(text)); |
| } |
| |
| void MarkupAccumulator::appendComment(Vector<UChar>& out, const String& comment) |
| { |
| // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". |
| append(out, "<!--"); |
| append(out, comment); |
| append(out, "-->"); |
| } |
| |
| void MarkupAccumulator::appendDocumentType(Vector<UChar>& result, const DocumentType* n) |
| { |
| if (n->name().isEmpty()) |
| return; |
| |
| append(result, "<!DOCTYPE "); |
| append(result, n->name()); |
| if (!n->publicId().isEmpty()) { |
| append(result, " PUBLIC \""); |
| append(result, n->publicId()); |
| append(result, "\""); |
| if (!n->systemId().isEmpty()) { |
| append(result, " \""); |
| append(result, n->systemId()); |
| append(result, "\""); |
| } |
| } else if (!n->systemId().isEmpty()) { |
| append(result, " SYSTEM \""); |
| append(result, n->systemId()); |
| append(result, "\""); |
| } |
| if (!n->internalSubset().isEmpty()) { |
| append(result, " ["); |
| append(result, n->internalSubset()); |
| append(result, "]"); |
| } |
| append(result, ">"); |
| } |
| |
| void MarkupAccumulator::appendProcessingInstruction(Vector<UChar>& out, const String& target, const String& data) |
| { |
| // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". |
| append(out, "<?"); |
| append(out, target); |
| append(out, " "); |
| append(out, data); |
| append(out, "?>"); |
| } |
| |
| void MarkupAccumulator::appendElement(Vector<UChar>& out, Element* element, Namespaces* namespaces) |
| { |
| appendOpenTag(out, element, namespaces); |
| |
| NamedNodeMap* attributes = element->attributes(); |
| unsigned length = attributes->length(); |
| for (unsigned int i = 0; i < length; i++) |
| appendAttribute(out, element, *attributes->attributeItem(i), namespaces); |
| |
| appendCloseTag(out, element); |
| } |
| |
| void MarkupAccumulator::appendOpenTag(Vector<UChar>& out, Element* element, Namespaces* namespaces) |
| { |
| out.append('<'); |
| append(out, element->nodeNamePreservingCase()); |
| if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element)) |
| appendNamespace(out, element->prefix(), element->namespaceURI(), *namespaces); |
| } |
| |
| void MarkupAccumulator::appendCloseTag(Vector<UChar>& out, Element* element) |
| { |
| if (shouldSelfClose(element)) { |
| if (element->isHTMLElement()) |
| out.append(' '); // XHTML 1.0 <-> HTML compatibility. |
| out.append('/'); |
| } |
| out.append('>'); |
| } |
| |
| void MarkupAccumulator::appendAttribute(Vector<UChar>& out, Element* element, const Attribute& attribute, Namespaces* namespaces) |
| { |
| bool documentIsHTML = element->document()->isHTMLDocument(); |
| |
| out.append(' '); |
| |
| if (documentIsHTML) |
| append(out, attribute.name().localName()); |
| else |
| append(out, attribute.name().toString()); |
| |
| out.append('='); |
| |
| if (element->isURLAttribute(const_cast<Attribute*>(&attribute))) { |
| // We don't want to complete file:/// URLs because it may contain sensitive information |
| // about the user's system. |
| if (shouldResolveURLs() && !element->document()->url().isLocalFile()) |
| appendQuotedURLAttributeValue(out, element->document()->completeURL(attribute.value()).string()); |
| else |
| appendQuotedURLAttributeValue(out, attribute.value()); |
| } else { |
| out.append('\"'); |
| appendAttributeValue(out, attribute.value(), documentIsHTML); |
| out.append('\"'); |
| } |
| |
| if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces)) |
| appendNamespace(out, attribute.prefix(), attribute.namespaceURI(), *namespaces); |
| } |
| |
| void MarkupAccumulator::appendCDATASection(Vector<UChar>& out, const String& section) |
| { |
| // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". |
| append(out, "<![CDATA["); |
| append(out, section); |
| append(out, "]]>"); |
| } |
| |
| void MarkupAccumulator::appendStartMarkup(Vector<UChar>& result, const Node* node, Namespaces* namespaces) |
| { |
| if (namespaces) |
| namespaces->checkConsistency(); |
| |
| switch (node->nodeType()) { |
| case Node::TEXT_NODE: |
| appendText(result, static_cast<Text*>(const_cast<Node*>(node))); |
| break; |
| case Node::COMMENT_NODE: |
| appendComment(result, static_cast<const Comment*>(node)->data()); |
| break; |
| case Node::DOCUMENT_NODE: |
| case Node::DOCUMENT_FRAGMENT_NODE: |
| break; |
| case Node::DOCUMENT_TYPE_NODE: |
| appendDocumentType(result, static_cast<const DocumentType*>(node)); |
| break; |
| case Node::PROCESSING_INSTRUCTION_NODE: |
| appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data()); |
| break; |
| case Node::ELEMENT_NODE: |
| appendElement(result, static_cast<Element*>(const_cast<Node*>(node)), namespaces); |
| break; |
| case Node::CDATA_SECTION_NODE: |
| appendCDATASection(result, static_cast<const CDATASection*>(node)->data()); |
| break; |
| case Node::ATTRIBUTE_NODE: |
| case Node::ENTITY_NODE: |
| case Node::ENTITY_REFERENCE_NODE: |
| case Node::NOTATION_NODE: |
| case Node::XPATH_NAMESPACE_NODE: |
| ASSERT_NOT_REACHED(); |
| break; |
| } |
| } |
| |
| // Rules of self-closure |
| // 1. No elements in HTML documents use the self-closing syntax. |
| // 2. Elements w/ children never self-close because they use a separate end tag. |
| // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. |
| // 4. Other elements self-close. |
| bool MarkupAccumulator::shouldSelfClose(const Node* node) |
| { |
| if (node->document()->isHTMLDocument()) |
| return false; |
| if (node->hasChildNodes()) |
| return false; |
| if (node->isHTMLElement() && !elementCannotHaveEndTag(node)) |
| return false; |
| return true; |
| } |
| |
| bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node) |
| { |
| if (!node->isHTMLElement()) |
| return false; |
| |
| // FIXME: ieForbidsInsertHTML may not be the right function to call here |
| // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML |
| // or createContextualFragment. It does not necessarily align with |
| // which elements should be serialized w/o end tags. |
| return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML(); |
| } |
| |
| void MarkupAccumulator::appendEndMarkup(Vector<UChar>& result, const Node* node) |
| { |
| if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node))) |
| return; |
| |
| result.append('<'); |
| result.append('/'); |
| append(result, static_cast<const Element*>(node)->nodeNamePreservingCase()); |
| result.append('>'); |
| } |
| |
| } |