| from __future__ import absolute_import, division, unicode_literals |
| from pip._vendor.six import text_type |
| |
| from ..constants import scopingElements, tableInsertModeElements, namespaces |
| |
| # The scope markers are inserted when entering object elements, |
| # marquees, table cells, and table captions, and are used to prevent formatting |
| # from "leaking" into tables, object elements, and marquees. |
| Marker = None |
| |
| listElementsMap = { |
| None: (frozenset(scopingElements), False), |
| "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), |
| "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), |
| (namespaces["html"], "ul")])), False), |
| "table": (frozenset([(namespaces["html"], "html"), |
| (namespaces["html"], "table")]), False), |
| "select": (frozenset([(namespaces["html"], "optgroup"), |
| (namespaces["html"], "option")]), True) |
| } |
| |
| |
| class Node(object): |
| """Represents an item in the tree""" |
| def __init__(self, name): |
| """Creates a Node |
| |
| :arg name: The tag name associated with the node |
| |
| """ |
| # The tag name assocaited with the node |
| self.name = name |
| # The parent of the current node (or None for the document node) |
| self.parent = None |
| # The value of the current node (applies to text nodes and comments) |
| self.value = None |
| # A dict holding name -> value pairs for attributes of the node |
| self.attributes = {} |
| # A list of child nodes of the current node. This must include all |
| # elements but not necessarily other node types. |
| self.childNodes = [] |
| # A list of miscellaneous flags that can be set on the node. |
| self._flags = [] |
| |
| def __str__(self): |
| attributesStr = " ".join(["%s=\"%s\"" % (name, value) |
| for name, value in |
| self.attributes.items()]) |
| if attributesStr: |
| return "<%s %s>" % (self.name, attributesStr) |
| else: |
| return "<%s>" % (self.name) |
| |
| def __repr__(self): |
| return "<%s>" % (self.name) |
| |
| def appendChild(self, node): |
| """Insert node as a child of the current node |
| |
| :arg node: the node to insert |
| |
| """ |
| raise NotImplementedError |
| |
| def insertText(self, data, insertBefore=None): |
| """Insert data as text in the current node, positioned before the |
| start of node insertBefore or to the end of the node's text. |
| |
| :arg data: the data to insert |
| |
| :arg insertBefore: True if you want to insert the text before the node |
| and False if you want to insert it after the node |
| |
| """ |
| raise NotImplementedError |
| |
| def insertBefore(self, node, refNode): |
| """Insert node as a child of the current node, before refNode in the |
| list of child nodes. Raises ValueError if refNode is not a child of |
| the current node |
| |
| :arg node: the node to insert |
| |
| :arg refNode: the child node to insert the node before |
| |
| """ |
| raise NotImplementedError |
| |
| def removeChild(self, node): |
| """Remove node from the children of the current node |
| |
| :arg node: the child node to remove |
| |
| """ |
| raise NotImplementedError |
| |
| def reparentChildren(self, newParent): |
| """Move all the children of the current node to newParent. |
| This is needed so that trees that don't store text as nodes move the |
| text in the correct way |
| |
| :arg newParent: the node to move all this node's children to |
| |
| """ |
| # XXX - should this method be made more general? |
| for child in self.childNodes: |
| newParent.appendChild(child) |
| self.childNodes = [] |
| |
| def cloneNode(self): |
| """Return a shallow copy of the current node i.e. a node with the same |
| name and attributes but with no parent or child nodes |
| """ |
| raise NotImplementedError |
| |
| def hasContent(self): |
| """Return true if the node has children or text, false otherwise |
| """ |
| raise NotImplementedError |
| |
| |
| class ActiveFormattingElements(list): |
| def append(self, node): |
| equalCount = 0 |
| if node != Marker: |
| for element in self[::-1]: |
| if element == Marker: |
| break |
| if self.nodesEqual(element, node): |
| equalCount += 1 |
| if equalCount == 3: |
| self.remove(element) |
| break |
| list.append(self, node) |
| |
| def nodesEqual(self, node1, node2): |
| if not node1.nameTuple == node2.nameTuple: |
| return False |
| |
| if not node1.attributes == node2.attributes: |
| return False |
| |
| return True |
| |
| |
| class TreeBuilder(object): |
| """Base treebuilder implementation |
| |
| * documentClass - the class to use for the bottommost node of a document |
| * elementClass - the class to use for HTML Elements |
| * commentClass - the class to use for comments |
| * doctypeClass - the class to use for doctypes |
| |
| """ |
| # pylint:disable=not-callable |
| |
| # Document class |
| documentClass = None |
| |
| # The class to use for creating a node |
| elementClass = None |
| |
| # The class to use for creating comments |
| commentClass = None |
| |
| # The class to use for creating doctypes |
| doctypeClass = None |
| |
| # Fragment class |
| fragmentClass = None |
| |
| def __init__(self, namespaceHTMLElements): |
| """Create a TreeBuilder |
| |
| :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| |
| """ |
| if namespaceHTMLElements: |
| self.defaultNamespace = "http://www.w3.org/1999/xhtml" |
| else: |
| self.defaultNamespace = None |
| self.reset() |
| |
| def reset(self): |
| self.openElements = [] |
| self.activeFormattingElements = ActiveFormattingElements() |
| |
| # XXX - rename these to headElement, formElement |
| self.headPointer = None |
| self.formPointer = None |
| |
| self.insertFromTable = False |
| |
| self.document = self.documentClass() |
| |
| def elementInScope(self, target, variant=None): |
| |
| # If we pass a node in we match that. if we pass a string |
| # match any node with that name |
| exactNode = hasattr(target, "nameTuple") |
| if not exactNode: |
| if isinstance(target, text_type): |
| target = (namespaces["html"], target) |
| assert isinstance(target, tuple) |
| |
| listElements, invert = listElementsMap[variant] |
| |
| for node in reversed(self.openElements): |
| if exactNode and node == target: |
| return True |
| elif not exactNode and node.nameTuple == target: |
| return True |
| elif (invert ^ (node.nameTuple in listElements)): |
| return False |
| |
| assert False # We should never reach this point |
| |
| def reconstructActiveFormattingElements(self): |
| # Within this algorithm the order of steps described in the |
| # specification is not quite the same as the order of steps in the |
| # code. It should still do the same though. |
| |
| # Step 1: stop the algorithm when there's nothing to do. |
| if not self.activeFormattingElements: |
| return |
| |
| # Step 2 and step 3: we start with the last element. So i is -1. |
| i = len(self.activeFormattingElements) - 1 |
| entry = self.activeFormattingElements[i] |
| if entry == Marker or entry in self.openElements: |
| return |
| |
| # Step 6 |
| while entry != Marker and entry not in self.openElements: |
| if i == 0: |
| # This will be reset to 0 below |
| i = -1 |
| break |
| i -= 1 |
| # Step 5: let entry be one earlier in the list. |
| entry = self.activeFormattingElements[i] |
| |
| while True: |
| # Step 7 |
| i += 1 |
| |
| # Step 8 |
| entry = self.activeFormattingElements[i] |
| clone = entry.cloneNode() # Mainly to get a new copy of the attributes |
| |
| # Step 9 |
| element = self.insertElement({"type": "StartTag", |
| "name": clone.name, |
| "namespace": clone.namespace, |
| "data": clone.attributes}) |
| |
| # Step 10 |
| self.activeFormattingElements[i] = element |
| |
| # Step 11 |
| if element == self.activeFormattingElements[-1]: |
| break |
| |
| def clearActiveFormattingElements(self): |
| entry = self.activeFormattingElements.pop() |
| while self.activeFormattingElements and entry != Marker: |
| entry = self.activeFormattingElements.pop() |
| |
| def elementInActiveFormattingElements(self, name): |
| """Check if an element exists between the end of the active |
| formatting elements and the last marker. If it does, return it, else |
| return false""" |
| |
| for item in self.activeFormattingElements[::-1]: |
| # Check for Marker first because if it's a Marker it doesn't have a |
| # name attribute. |
| if item == Marker: |
| break |
| elif item.name == name: |
| return item |
| return False |
| |
| def insertRoot(self, token): |
| element = self.createElement(token) |
| self.openElements.append(element) |
| self.document.appendChild(element) |
| |
| def insertDoctype(self, token): |
| name = token["name"] |
| publicId = token["publicId"] |
| systemId = token["systemId"] |
| |
| doctype = self.doctypeClass(name, publicId, systemId) |
| self.document.appendChild(doctype) |
| |
| def insertComment(self, token, parent=None): |
| if parent is None: |
| parent = self.openElements[-1] |
| parent.appendChild(self.commentClass(token["data"])) |
| |
| def createElement(self, token): |
| """Create an element but don't insert it anywhere""" |
| name = token["name"] |
| namespace = token.get("namespace", self.defaultNamespace) |
| element = self.elementClass(name, namespace) |
| element.attributes = token["data"] |
| return element |
| |
| def _getInsertFromTable(self): |
| return self._insertFromTable |
| |
| def _setInsertFromTable(self, value): |
| """Switch the function used to insert an element from the |
| normal one to the misnested table one and back again""" |
| self._insertFromTable = value |
| if value: |
| self.insertElement = self.insertElementTable |
| else: |
| self.insertElement = self.insertElementNormal |
| |
| insertFromTable = property(_getInsertFromTable, _setInsertFromTable) |
| |
| def insertElementNormal(self, token): |
| name = token["name"] |
| assert isinstance(name, text_type), "Element %s not unicode" % name |
| namespace = token.get("namespace", self.defaultNamespace) |
| element = self.elementClass(name, namespace) |
| element.attributes = token["data"] |
| self.openElements[-1].appendChild(element) |
| self.openElements.append(element) |
| return element |
| |
| def insertElementTable(self, token): |
| """Create an element and insert it into the tree""" |
| element = self.createElement(token) |
| if self.openElements[-1].name not in tableInsertModeElements: |
| return self.insertElementNormal(token) |
| else: |
| # We should be in the InTable mode. This means we want to do |
| # special magic element rearranging |
| parent, insertBefore = self.getTableMisnestedNodePosition() |
| if insertBefore is None: |
| parent.appendChild(element) |
| else: |
| parent.insertBefore(element, insertBefore) |
| self.openElements.append(element) |
| return element |
| |
| def insertText(self, data, parent=None): |
| """Insert text data.""" |
| if parent is None: |
| parent = self.openElements[-1] |
| |
| if (not self.insertFromTable or (self.insertFromTable and |
| self.openElements[-1].name |
| not in tableInsertModeElements)): |
| parent.insertText(data) |
| else: |
| # We should be in the InTable mode. This means we want to do |
| # special magic element rearranging |
| parent, insertBefore = self.getTableMisnestedNodePosition() |
| parent.insertText(data, insertBefore) |
| |
| def getTableMisnestedNodePosition(self): |
| """Get the foster parent element, and sibling to insert before |
| (or None) when inserting a misnested table node""" |
| # The foster parent element is the one which comes before the most |
| # recently opened table element |
| # XXX - this is really inelegant |
| lastTable = None |
| fosterParent = None |
| insertBefore = None |
| for elm in self.openElements[::-1]: |
| if elm.name == "table": |
| lastTable = elm |
| break |
| if lastTable: |
| # XXX - we should really check that this parent is actually a |
| # node here |
| if lastTable.parent: |
| fosterParent = lastTable.parent |
| insertBefore = lastTable |
| else: |
| fosterParent = self.openElements[ |
| self.openElements.index(lastTable) - 1] |
| else: |
| fosterParent = self.openElements[0] |
| return fosterParent, insertBefore |
| |
| def generateImpliedEndTags(self, exclude=None): |
| name = self.openElements[-1].name |
| # XXX td, th and tr are not actually needed |
| if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and |
| name != exclude): |
| self.openElements.pop() |
| # XXX This is not entirely what the specification says. We should |
| # investigate it more closely. |
| self.generateImpliedEndTags(exclude) |
| |
| def getDocument(self): |
| """Return the final tree""" |
| return self.document |
| |
| def getFragment(self): |
| """Return the final fragment""" |
| # assert self.innerHTML |
| fragment = self.fragmentClass() |
| self.openElements[0].reparentChildren(fragment) |
| return fragment |
| |
| def testSerializer(self, node): |
| """Serialize the subtree of node in the format required by unit tests |
| |
| :arg node: the node from which to start serializing |
| |
| """ |
| raise NotImplementedError |