| from __future__ import absolute_import, division, unicode_literals |
| from pip._vendor.six import with_metaclass, viewkeys |
| |
| import types |
| from collections import OrderedDict |
| |
| from . import _inputstream |
| from . import _tokenizer |
| |
| from . import treebuilders |
| from .treebuilders.base import Marker |
| |
| from . import _utils |
| from .constants import ( |
| spaceCharacters, asciiUpper2Lower, |
| specialElements, headingElements, cdataElements, rcdataElements, |
| tokenTypes, tagTokenTypes, |
| namespaces, |
| htmlIntegrationPointElements, mathmlTextIntegrationPointElements, |
| adjustForeignAttributes as adjustForeignAttributesMap, |
| adjustMathMLAttributes, adjustSVGAttributes, |
| E, |
| _ReparseException |
| ) |
| |
| |
| def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): |
| """Parse an HTML document as a string or file-like object into a tree |
| |
| :arg doc: the document to parse as a string or file-like object |
| |
| :arg treebuilder: the treebuilder to use when parsing |
| |
| :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| |
| :returns: parsed tree |
| |
| Example: |
| |
| >>> from html5lib.html5parser import parse |
| >>> parse('<html><body><p>This is a doc</p></body></html>') |
| <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> |
| |
| """ |
| tb = treebuilders.getTreeBuilder(treebuilder) |
| p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) |
| return p.parse(doc, **kwargs) |
| |
| |
| def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): |
| """Parse an HTML fragment as a string or file-like object into a tree |
| |
| :arg doc: the fragment to parse as a string or file-like object |
| |
| :arg container: the container context to parse the fragment in |
| |
| :arg treebuilder: the treebuilder to use when parsing |
| |
| :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| |
| :returns: parsed tree |
| |
| Example: |
| |
| >>> from html5lib.html5libparser import parseFragment |
| >>> parseFragment('<b>this is a fragment</b>') |
| <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> |
| |
| """ |
| tb = treebuilders.getTreeBuilder(treebuilder) |
| p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) |
| return p.parseFragment(doc, container=container, **kwargs) |
| |
| |
| def method_decorator_metaclass(function): |
| class Decorated(type): |
| def __new__(meta, classname, bases, classDict): |
| for attributeName, attribute in classDict.items(): |
| if isinstance(attribute, types.FunctionType): |
| attribute = function(attribute) |
| |
| classDict[attributeName] = attribute |
| return type.__new__(meta, classname, bases, classDict) |
| return Decorated |
| |
| |
| class HTMLParser(object): |
| """HTML parser |
| |
| Generates a tree structure from a stream of (possibly malformed) HTML. |
| |
| """ |
| |
| def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): |
| """ |
| :arg tree: a treebuilder class controlling the type of tree that will be |
| returned. Built in treebuilders can be accessed through |
| html5lib.treebuilders.getTreeBuilder(treeType) |
| |
| :arg strict: raise an exception when a parse error is encountered |
| |
| :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| |
| :arg debug: whether or not to enable debug mode which logs things |
| |
| Example: |
| |
| >>> from html5lib.html5parser import HTMLParser |
| >>> parser = HTMLParser() # generates parser with etree builder |
| >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict |
| |
| """ |
| |
| # Raise an exception on the first error encountered |
| self.strict = strict |
| |
| if tree is None: |
| tree = treebuilders.getTreeBuilder("etree") |
| self.tree = tree(namespaceHTMLElements) |
| self.errors = [] |
| |
| self.phases = dict([(name, cls(self, self.tree)) for name, cls in |
| getPhases(debug).items()]) |
| |
| def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): |
| |
| self.innerHTMLMode = innerHTML |
| self.container = container |
| self.scripting = scripting |
| self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) |
| self.reset() |
| |
| try: |
| self.mainLoop() |
| except _ReparseException: |
| self.reset() |
| self.mainLoop() |
| |
| def reset(self): |
| self.tree.reset() |
| self.firstStartTag = False |
| self.errors = [] |
| self.log = [] # only used with debug mode |
| # "quirks" / "limited quirks" / "no quirks" |
| self.compatMode = "no quirks" |
| |
| if self.innerHTMLMode: |
| self.innerHTML = self.container.lower() |
| |
| if self.innerHTML in cdataElements: |
| self.tokenizer.state = self.tokenizer.rcdataState |
| elif self.innerHTML in rcdataElements: |
| self.tokenizer.state = self.tokenizer.rawtextState |
| elif self.innerHTML == 'plaintext': |
| self.tokenizer.state = self.tokenizer.plaintextState |
| else: |
| # state already is data state |
| # self.tokenizer.state = self.tokenizer.dataState |
| pass |
| self.phase = self.phases["beforeHtml"] |
| self.phase.insertHtmlElement() |
| self.resetInsertionMode() |
| else: |
| self.innerHTML = False # pylint:disable=redefined-variable-type |
| self.phase = self.phases["initial"] |
| |
| self.lastPhase = None |
| |
| self.beforeRCDataPhase = None |
| |
| self.framesetOK = True |
| |
| @property |
| def documentEncoding(self): |
| """Name of the character encoding that was used to decode the input stream, or |
| :obj:`None` if that is not determined yet |
| |
| """ |
| if not hasattr(self, 'tokenizer'): |
| return None |
| return self.tokenizer.stream.charEncoding[0].name |
| |
| def isHTMLIntegrationPoint(self, element): |
| if (element.name == "annotation-xml" and |
| element.namespace == namespaces["mathml"]): |
| return ("encoding" in element.attributes and |
| element.attributes["encoding"].translate( |
| asciiUpper2Lower) in |
| ("text/html", "application/xhtml+xml")) |
| else: |
| return (element.namespace, element.name) in htmlIntegrationPointElements |
| |
| def isMathMLTextIntegrationPoint(self, element): |
| return (element.namespace, element.name) in mathmlTextIntegrationPointElements |
| |
| def mainLoop(self): |
| CharactersToken = tokenTypes["Characters"] |
| SpaceCharactersToken = tokenTypes["SpaceCharacters"] |
| StartTagToken = tokenTypes["StartTag"] |
| EndTagToken = tokenTypes["EndTag"] |
| CommentToken = tokenTypes["Comment"] |
| DoctypeToken = tokenTypes["Doctype"] |
| ParseErrorToken = tokenTypes["ParseError"] |
| |
| for token in self.normalizedTokens(): |
| prev_token = None |
| new_token = token |
| while new_token is not None: |
| prev_token = new_token |
| currentNode = self.tree.openElements[-1] if self.tree.openElements else None |
| currentNodeNamespace = currentNode.namespace if currentNode else None |
| currentNodeName = currentNode.name if currentNode else None |
| |
| type = new_token["type"] |
| |
| if type == ParseErrorToken: |
| self.parseError(new_token["data"], new_token.get("datavars", {})) |
| new_token = None |
| else: |
| if (len(self.tree.openElements) == 0 or |
| currentNodeNamespace == self.tree.defaultNamespace or |
| (self.isMathMLTextIntegrationPoint(currentNode) and |
| ((type == StartTagToken and |
| token["name"] not in frozenset(["mglyph", "malignmark"])) or |
| type in (CharactersToken, SpaceCharactersToken))) or |
| (currentNodeNamespace == namespaces["mathml"] and |
| currentNodeName == "annotation-xml" and |
| type == StartTagToken and |
| token["name"] == "svg") or |
| (self.isHTMLIntegrationPoint(currentNode) and |
| type in (StartTagToken, CharactersToken, SpaceCharactersToken))): |
| phase = self.phase |
| else: |
| phase = self.phases["inForeignContent"] |
| |
| if type == CharactersToken: |
| new_token = phase.processCharacters(new_token) |
| elif type == SpaceCharactersToken: |
| new_token = phase.processSpaceCharacters(new_token) |
| elif type == StartTagToken: |
| new_token = phase.processStartTag(new_token) |
| elif type == EndTagToken: |
| new_token = phase.processEndTag(new_token) |
| elif type == CommentToken: |
| new_token = phase.processComment(new_token) |
| elif type == DoctypeToken: |
| new_token = phase.processDoctype(new_token) |
| |
| if (type == StartTagToken and prev_token["selfClosing"] and |
| not prev_token["selfClosingAcknowledged"]): |
| self.parseError("non-void-element-with-trailing-solidus", |
| {"name": prev_token["name"]}) |
| |
| # When the loop finishes it's EOF |
| reprocess = True |
| phases = [] |
| while reprocess: |
| phases.append(self.phase) |
| reprocess = self.phase.processEOF() |
| if reprocess: |
| assert self.phase not in phases |
| |
| def normalizedTokens(self): |
| for token in self.tokenizer: |
| yield self.normalizeToken(token) |
| |
| def parse(self, stream, *args, **kwargs): |
| """Parse a HTML document into a well-formed tree |
| |
| :arg stream: a file-like object or string containing the HTML to be parsed |
| |
| The optional encoding parameter must be a string that indicates |
| the encoding. If specified, that encoding will be used, |
| regardless of any BOM or later declaration (such as in a meta |
| element). |
| |
| :arg scripting: treat noscript elements as if JavaScript was turned on |
| |
| :returns: parsed tree |
| |
| Example: |
| |
| >>> from html5lib.html5parser import HTMLParser |
| >>> parser = HTMLParser() |
| >>> parser.parse('<html><body><p>This is a doc</p></body></html>') |
| <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> |
| |
| """ |
| self._parse(stream, False, None, *args, **kwargs) |
| return self.tree.getDocument() |
| |
| def parseFragment(self, stream, *args, **kwargs): |
| """Parse a HTML fragment into a well-formed tree fragment |
| |
| :arg container: name of the element we're setting the innerHTML |
| property if set to None, default to 'div' |
| |
| :arg stream: a file-like object or string containing the HTML to be parsed |
| |
| The optional encoding parameter must be a string that indicates |
| the encoding. If specified, that encoding will be used, |
| regardless of any BOM or later declaration (such as in a meta |
| element) |
| |
| :arg scripting: treat noscript elements as if JavaScript was turned on |
| |
| :returns: parsed tree |
| |
| Example: |
| |
| >>> from html5lib.html5libparser import HTMLParser |
| >>> parser = HTMLParser() |
| >>> parser.parseFragment('<b>this is a fragment</b>') |
| <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> |
| |
| """ |
| self._parse(stream, True, *args, **kwargs) |
| return self.tree.getFragment() |
| |
| def parseError(self, errorcode="XXX-undefined-error", datavars=None): |
| # XXX The idea is to make errorcode mandatory. |
| if datavars is None: |
| datavars = {} |
| self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) |
| if self.strict: |
| raise ParseError(E[errorcode] % datavars) |
| |
| def normalizeToken(self, token): |
| # HTML5 specific normalizations to the token stream |
| if token["type"] == tokenTypes["StartTag"]: |
| raw = token["data"] |
| token["data"] = OrderedDict(raw) |
| if len(raw) > len(token["data"]): |
| # we had some duplicated attribute, fix so first wins |
| token["data"].update(raw[::-1]) |
| |
| return token |
| |
| def adjustMathMLAttributes(self, token): |
| adjust_attributes(token, adjustMathMLAttributes) |
| |
| def adjustSVGAttributes(self, token): |
| adjust_attributes(token, adjustSVGAttributes) |
| |
| def adjustForeignAttributes(self, token): |
| adjust_attributes(token, adjustForeignAttributesMap) |
| |
| def reparseTokenNormal(self, token): |
| # pylint:disable=unused-argument |
| self.parser.phase() |
| |
| def resetInsertionMode(self): |
| # The name of this method is mostly historical. (It's also used in the |
| # specification.) |
| last = False |
| newModes = { |
| "select": "inSelect", |
| "td": "inCell", |
| "th": "inCell", |
| "tr": "inRow", |
| "tbody": "inTableBody", |
| "thead": "inTableBody", |
| "tfoot": "inTableBody", |
| "caption": "inCaption", |
| "colgroup": "inColumnGroup", |
| "table": "inTable", |
| "head": "inBody", |
| "body": "inBody", |
| "frameset": "inFrameset", |
| "html": "beforeHead" |
| } |
| for node in self.tree.openElements[::-1]: |
| nodeName = node.name |
| new_phase = None |
| if node == self.tree.openElements[0]: |
| assert self.innerHTML |
| last = True |
| nodeName = self.innerHTML |
| # Check for conditions that should only happen in the innerHTML |
| # case |
| if nodeName in ("select", "colgroup", "head", "html"): |
| assert self.innerHTML |
| |
| if not last and node.namespace != self.tree.defaultNamespace: |
| continue |
| |
| if nodeName in newModes: |
| new_phase = self.phases[newModes[nodeName]] |
| break |
| elif last: |
| new_phase = self.phases["inBody"] |
| break |
| |
| self.phase = new_phase |
| |
| def parseRCDataRawtext(self, token, contentType): |
| # Generic RCDATA/RAWTEXT Parsing algorithm |
| assert contentType in ("RAWTEXT", "RCDATA") |
| |
| self.tree.insertElement(token) |
| |
| if contentType == "RAWTEXT": |
| self.tokenizer.state = self.tokenizer.rawtextState |
| else: |
| self.tokenizer.state = self.tokenizer.rcdataState |
| |
| self.originalPhase = self.phase |
| |
| self.phase = self.phases["text"] |
| |
| |
| @_utils.memoize |
| def getPhases(debug): |
| def log(function): |
| """Logger that records which phase processes each token""" |
| type_names = dict((value, key) for key, value in |
| tokenTypes.items()) |
| |
| def wrapped(self, *args, **kwargs): |
| if function.__name__.startswith("process") and len(args) > 0: |
| token = args[0] |
| try: |
| info = {"type": type_names[token['type']]} |
| except: |
| raise |
| if token['type'] in tagTokenTypes: |
| info["name"] = token['name'] |
| |
| self.parser.log.append((self.parser.tokenizer.state.__name__, |
| self.parser.phase.__class__.__name__, |
| self.__class__.__name__, |
| function.__name__, |
| info)) |
| return function(self, *args, **kwargs) |
| else: |
| return function(self, *args, **kwargs) |
| return wrapped |
| |
| def getMetaclass(use_metaclass, metaclass_func): |
| if use_metaclass: |
| return method_decorator_metaclass(metaclass_func) |
| else: |
| return type |
| |
| # pylint:disable=unused-argument |
| class Phase(with_metaclass(getMetaclass(debug, log))): |
| """Base class for helper object that implements each phase of processing |
| """ |
| |
| def __init__(self, parser, tree): |
| self.parser = parser |
| self.tree = tree |
| |
| def processEOF(self): |
| raise NotImplementedError |
| |
| def processComment(self, token): |
| # For most phases the following is correct. Where it's not it will be |
| # overridden. |
| self.tree.insertComment(token, self.tree.openElements[-1]) |
| |
| def processDoctype(self, token): |
| self.parser.parseError("unexpected-doctype") |
| |
| def processCharacters(self, token): |
| self.tree.insertText(token["data"]) |
| |
| def processSpaceCharacters(self, token): |
| self.tree.insertText(token["data"]) |
| |
| def processStartTag(self, token): |
| return self.startTagHandler[token["name"]](token) |
| |
| def startTagHtml(self, token): |
| if not self.parser.firstStartTag and token["name"] == "html": |
| self.parser.parseError("non-html-root") |
| # XXX Need a check here to see if the first start tag token emitted is |
| # this token... If it's not, invoke self.parser.parseError(). |
| for attr, value in token["data"].items(): |
| if attr not in self.tree.openElements[0].attributes: |
| self.tree.openElements[0].attributes[attr] = value |
| self.parser.firstStartTag = False |
| |
| def processEndTag(self, token): |
| return self.endTagHandler[token["name"]](token) |
| |
| class InitialPhase(Phase): |
| def processSpaceCharacters(self, token): |
| pass |
| |
| def processComment(self, token): |
| self.tree.insertComment(token, self.tree.document) |
| |
| def processDoctype(self, token): |
| name = token["name"] |
| publicId = token["publicId"] |
| systemId = token["systemId"] |
| correct = token["correct"] |
| |
| if (name != "html" or publicId is not None or |
| systemId is not None and systemId != "about:legacy-compat"): |
| self.parser.parseError("unknown-doctype") |
| |
| if publicId is None: |
| publicId = "" |
| |
| self.tree.insertDoctype(token) |
| |
| if publicId != "": |
| publicId = publicId.translate(asciiUpper2Lower) |
| |
| if (not correct or token["name"] != "html" or |
| publicId.startswith( |
| ("+//silmaril//dtd html pro v0r11 19970101//", |
| "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", |
| "-//as//dtd html 3.0 aswedit + extensions//", |
| "-//ietf//dtd html 2.0 level 1//", |
| "-//ietf//dtd html 2.0 level 2//", |
| "-//ietf//dtd html 2.0 strict level 1//", |
| "-//ietf//dtd html 2.0 strict level 2//", |
| "-//ietf//dtd html 2.0 strict//", |
| "-//ietf//dtd html 2.0//", |
| "-//ietf//dtd html 2.1e//", |
| "-//ietf//dtd html 3.0//", |
| "-//ietf//dtd html 3.2 final//", |
| "-//ietf//dtd html 3.2//", |
| "-//ietf//dtd html 3//", |
| "-//ietf//dtd html level 0//", |
| "-//ietf//dtd html level 1//", |
| "-//ietf//dtd html level 2//", |
| "-//ietf//dtd html level 3//", |
| "-//ietf//dtd html strict level 0//", |
| "-//ietf//dtd html strict level 1//", |
| "-//ietf//dtd html strict level 2//", |
| "-//ietf//dtd html strict level 3//", |
| "-//ietf//dtd html strict//", |
| "-//ietf//dtd html//", |
| "-//metrius//dtd metrius presentational//", |
| "-//microsoft//dtd internet explorer 2.0 html strict//", |
| "-//microsoft//dtd internet explorer 2.0 html//", |
| "-//microsoft//dtd internet explorer 2.0 tables//", |
| "-//microsoft//dtd internet explorer 3.0 html strict//", |
| "-//microsoft//dtd internet explorer 3.0 html//", |
| "-//microsoft//dtd internet explorer 3.0 tables//", |
| "-//netscape comm. corp.//dtd html//", |
| "-//netscape comm. corp.//dtd strict html//", |
| "-//o'reilly and associates//dtd html 2.0//", |
| "-//o'reilly and associates//dtd html extended 1.0//", |
| "-//o'reilly and associates//dtd html extended relaxed 1.0//", |
| "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", |
| "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", |
| "-//spyglass//dtd html 2.0 extended//", |
| "-//sq//dtd html 2.0 hotmetal + extensions//", |
| "-//sun microsystems corp.//dtd hotjava html//", |
| "-//sun microsystems corp.//dtd hotjava strict html//", |
| "-//w3c//dtd html 3 1995-03-24//", |
| "-//w3c//dtd html 3.2 draft//", |
| "-//w3c//dtd html 3.2 final//", |
| "-//w3c//dtd html 3.2//", |
| "-//w3c//dtd html 3.2s draft//", |
| "-//w3c//dtd html 4.0 frameset//", |
| "-//w3c//dtd html 4.0 transitional//", |
| "-//w3c//dtd html experimental 19960712//", |
| "-//w3c//dtd html experimental 970421//", |
| "-//w3c//dtd w3 html//", |
| "-//w3o//dtd w3 html 3.0//", |
| "-//webtechs//dtd mozilla html 2.0//", |
| "-//webtechs//dtd mozilla html//")) or |
| publicId in ("-//w3o//dtd w3 html strict 3.0//en//", |
| "-/w3c/dtd html 4.0 transitional/en", |
| "html") or |
| publicId.startswith( |
| ("-//w3c//dtd html 4.01 frameset//", |
| "-//w3c//dtd html 4.01 transitional//")) and |
| systemId is None or |
| systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): |
| self.parser.compatMode = "quirks" |
| elif (publicId.startswith( |
| ("-//w3c//dtd xhtml 1.0 frameset//", |
| "-//w3c//dtd xhtml 1.0 transitional//")) or |
| publicId.startswith( |
| ("-//w3c//dtd html 4.01 frameset//", |
| "-//w3c//dtd html 4.01 transitional//")) and |
| systemId is not None): |
| self.parser.compatMode = "limited quirks" |
| |
| self.parser.phase = self.parser.phases["beforeHtml"] |
| |
| def anythingElse(self): |
| self.parser.compatMode = "quirks" |
| self.parser.phase = self.parser.phases["beforeHtml"] |
| |
| def processCharacters(self, token): |
| self.parser.parseError("expected-doctype-but-got-chars") |
| self.anythingElse() |
| return token |
| |
| def processStartTag(self, token): |
| self.parser.parseError("expected-doctype-but-got-start-tag", |
| {"name": token["name"]}) |
| self.anythingElse() |
| return token |
| |
| def processEndTag(self, token): |
| self.parser.parseError("expected-doctype-but-got-end-tag", |
| {"name": token["name"]}) |
| self.anythingElse() |
| return token |
| |
| def processEOF(self): |
| self.parser.parseError("expected-doctype-but-got-eof") |
| self.anythingElse() |
| return True |
| |
| class BeforeHtmlPhase(Phase): |
| # helper methods |
| def insertHtmlElement(self): |
| self.tree.insertRoot(impliedTagToken("html", "StartTag")) |
| self.parser.phase = self.parser.phases["beforeHead"] |
| |
| # other |
| def processEOF(self): |
| self.insertHtmlElement() |
| return True |
| |
| def processComment(self, token): |
| self.tree.insertComment(token, self.tree.document) |
| |
| def processSpaceCharacters(self, token): |
| pass |
| |
| def processCharacters(self, token): |
| self.insertHtmlElement() |
| return token |
| |
| def processStartTag(self, token): |
| if token["name"] == "html": |
| self.parser.firstStartTag = True |
| self.insertHtmlElement() |
| return token |
| |
| def processEndTag(self, token): |
| if token["name"] not in ("head", "body", "html", "br"): |
| self.parser.parseError("unexpected-end-tag-before-html", |
| {"name": token["name"]}) |
| else: |
| self.insertHtmlElement() |
| return token |
| |
| class BeforeHeadPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("head", self.startTagHead) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| (("head", "body", "html", "br"), self.endTagImplyHead) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| self.startTagHead(impliedTagToken("head", "StartTag")) |
| return True |
| |
| def processSpaceCharacters(self, token): |
| pass |
| |
| def processCharacters(self, token): |
| self.startTagHead(impliedTagToken("head", "StartTag")) |
| return token |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagHead(self, token): |
| self.tree.insertElement(token) |
| self.tree.headPointer = self.tree.openElements[-1] |
| self.parser.phase = self.parser.phases["inHead"] |
| |
| def startTagOther(self, token): |
| self.startTagHead(impliedTagToken("head", "StartTag")) |
| return token |
| |
| def endTagImplyHead(self, token): |
| self.startTagHead(impliedTagToken("head", "StartTag")) |
| return token |
| |
| def endTagOther(self, token): |
| self.parser.parseError("end-tag-after-implied-root", |
| {"name": token["name"]}) |
| |
| class InHeadPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("title", self.startTagTitle), |
| (("noframes", "style"), self.startTagNoFramesStyle), |
| ("noscript", self.startTagNoscript), |
| ("script", self.startTagScript), |
| (("base", "basefont", "bgsound", "command", "link"), |
| self.startTagBaseLinkCommand), |
| ("meta", self.startTagMeta), |
| ("head", self.startTagHead) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("head", self.endTagHead), |
| (("br", "html", "body"), self.endTagHtmlBodyBr) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| # the real thing |
| def processEOF(self): |
| self.anythingElse() |
| return True |
| |
| def processCharacters(self, token): |
| self.anythingElse() |
| return token |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagHead(self, token): |
| self.parser.parseError("two-heads-are-not-better-than-one") |
| |
| def startTagBaseLinkCommand(self, token): |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| def startTagMeta(self, token): |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| attributes = token["data"] |
| if self.parser.tokenizer.stream.charEncoding[1] == "tentative": |
| if "charset" in attributes: |
| self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) |
| elif ("content" in attributes and |
| "http-equiv" in attributes and |
| attributes["http-equiv"].lower() == "content-type"): |
| # Encoding it as UTF-8 here is a hack, as really we should pass |
| # the abstract Unicode string, and just use the |
| # ContentAttrParser on that, but using UTF-8 allows all chars |
| # to be encoded and as a ASCII-superset works. |
| data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) |
| parser = _inputstream.ContentAttrParser(data) |
| codec = parser.parse() |
| self.parser.tokenizer.stream.changeEncoding(codec) |
| |
| def startTagTitle(self, token): |
| self.parser.parseRCDataRawtext(token, "RCDATA") |
| |
| def startTagNoFramesStyle(self, token): |
| # Need to decide whether to implement the scripting-disabled case |
| self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| |
| def startTagNoscript(self, token): |
| if self.parser.scripting: |
| self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| else: |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inHeadNoscript"] |
| |
| def startTagScript(self, token): |
| self.tree.insertElement(token) |
| self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState |
| self.parser.originalPhase = self.parser.phase |
| self.parser.phase = self.parser.phases["text"] |
| |
| def startTagOther(self, token): |
| self.anythingElse() |
| return token |
| |
| def endTagHead(self, token): |
| node = self.parser.tree.openElements.pop() |
| assert node.name == "head", "Expected head got %s" % node.name |
| self.parser.phase = self.parser.phases["afterHead"] |
| |
| def endTagHtmlBodyBr(self, token): |
| self.anythingElse() |
| return token |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def anythingElse(self): |
| self.endTagHead(impliedTagToken("head")) |
| |
| class InHeadNoscriptPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), |
| (("head", "noscript"), self.startTagHeadNoscript), |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("noscript", self.endTagNoscript), |
| ("br", self.endTagBr), |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| self.parser.parseError("eof-in-head-noscript") |
| self.anythingElse() |
| return True |
| |
| def processComment(self, token): |
| return self.parser.phases["inHead"].processComment(token) |
| |
| def processCharacters(self, token): |
| self.parser.parseError("char-in-head-noscript") |
| self.anythingElse() |
| return token |
| |
| def processSpaceCharacters(self, token): |
| return self.parser.phases["inHead"].processSpaceCharacters(token) |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagBaseLinkCommand(self, token): |
| return self.parser.phases["inHead"].processStartTag(token) |
| |
| def startTagHeadNoscript(self, token): |
| self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) |
| self.anythingElse() |
| return token |
| |
| def endTagNoscript(self, token): |
| node = self.parser.tree.openElements.pop() |
| assert node.name == "noscript", "Expected noscript got %s" % node.name |
| self.parser.phase = self.parser.phases["inHead"] |
| |
| def endTagBr(self, token): |
| self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) |
| self.anythingElse() |
| return token |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def anythingElse(self): |
| # Caller must raise parse error first! |
| self.endTagNoscript(impliedTagToken("noscript")) |
| |
| class AfterHeadPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("body", self.startTagBody), |
| ("frameset", self.startTagFrameset), |
| (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", |
| "style", "title"), |
| self.startTagFromHead), |
| ("head", self.startTagHead) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), |
| self.endTagHtmlBodyBr)]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| self.anythingElse() |
| return True |
| |
| def processCharacters(self, token): |
| self.anythingElse() |
| return token |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagBody(self, token): |
| self.parser.framesetOK = False |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inBody"] |
| |
| def startTagFrameset(self, token): |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inFrameset"] |
| |
| def startTagFromHead(self, token): |
| self.parser.parseError("unexpected-start-tag-out-of-my-head", |
| {"name": token["name"]}) |
| self.tree.openElements.append(self.tree.headPointer) |
| self.parser.phases["inHead"].processStartTag(token) |
| for node in self.tree.openElements[::-1]: |
| if node.name == "head": |
| self.tree.openElements.remove(node) |
| break |
| |
| def startTagHead(self, token): |
| self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) |
| |
| def startTagOther(self, token): |
| self.anythingElse() |
| return token |
| |
| def endTagHtmlBodyBr(self, token): |
| self.anythingElse() |
| return token |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def anythingElse(self): |
| self.tree.insertElement(impliedTagToken("body", "StartTag")) |
| self.parser.phase = self.parser.phases["inBody"] |
| self.parser.framesetOK = True |
| |
| class InBodyPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody |
| # the really-really-really-very crazy mode |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| # Set this to the default handler |
| self.processSpaceCharacters = self.processSpaceCharactersNonPre |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| (("base", "basefont", "bgsound", "command", "link", "meta", |
| "script", "style", "title"), |
| self.startTagProcessInHead), |
| ("body", self.startTagBody), |
| ("frameset", self.startTagFrameset), |
| (("address", "article", "aside", "blockquote", "center", "details", |
| "dir", "div", "dl", "fieldset", "figcaption", "figure", |
| "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", |
| "section", "summary", "ul"), |
| self.startTagCloseP), |
| (headingElements, self.startTagHeading), |
| (("pre", "listing"), self.startTagPreListing), |
| ("form", self.startTagForm), |
| (("li", "dd", "dt"), self.startTagListItem), |
| ("plaintext", self.startTagPlaintext), |
| ("a", self.startTagA), |
| (("b", "big", "code", "em", "font", "i", "s", "small", "strike", |
| "strong", "tt", "u"), self.startTagFormatting), |
| ("nobr", self.startTagNobr), |
| ("button", self.startTagButton), |
| (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), |
| ("xmp", self.startTagXmp), |
| ("table", self.startTagTable), |
| (("area", "br", "embed", "img", "keygen", "wbr"), |
| self.startTagVoidFormatting), |
| (("param", "source", "track"), self.startTagParamSource), |
| ("input", self.startTagInput), |
| ("hr", self.startTagHr), |
| ("image", self.startTagImage), |
| ("isindex", self.startTagIsIndex), |
| ("textarea", self.startTagTextarea), |
| ("iframe", self.startTagIFrame), |
| ("noscript", self.startTagNoscript), |
| (("noembed", "noframes"), self.startTagRawtext), |
| ("select", self.startTagSelect), |
| (("rp", "rt"), self.startTagRpRt), |
| (("option", "optgroup"), self.startTagOpt), |
| (("math"), self.startTagMath), |
| (("svg"), self.startTagSvg), |
| (("caption", "col", "colgroup", "frame", "head", |
| "tbody", "td", "tfoot", "th", "thead", |
| "tr"), self.startTagMisplaced) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("body", self.endTagBody), |
| ("html", self.endTagHtml), |
| (("address", "article", "aside", "blockquote", "button", "center", |
| "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", |
| "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", |
| "section", "summary", "ul"), self.endTagBlock), |
| ("form", self.endTagForm), |
| ("p", self.endTagP), |
| (("dd", "dt", "li"), self.endTagListItem), |
| (headingElements, self.endTagHeading), |
| (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", |
| "strike", "strong", "tt", "u"), self.endTagFormatting), |
| (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), |
| ("br", self.endTagBr), |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def isMatchingFormattingElement(self, node1, node2): |
| return (node1.name == node2.name and |
| node1.namespace == node2.namespace and |
| node1.attributes == node2.attributes) |
| |
| # helper |
| def addFormattingElement(self, token): |
| self.tree.insertElement(token) |
| element = self.tree.openElements[-1] |
| |
| matchingElements = [] |
| for node in self.tree.activeFormattingElements[::-1]: |
| if node is Marker: |
| break |
| elif self.isMatchingFormattingElement(node, element): |
| matchingElements.append(node) |
| |
| assert len(matchingElements) <= 3 |
| if len(matchingElements) == 3: |
| self.tree.activeFormattingElements.remove(matchingElements[-1]) |
| self.tree.activeFormattingElements.append(element) |
| |
| # the real deal |
| def processEOF(self): |
| allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", |
| "tfoot", "th", "thead", "tr", "body", |
| "html")) |
| for node in self.tree.openElements[::-1]: |
| if node.name not in allowed_elements: |
| self.parser.parseError("expected-closing-tag-but-got-eof") |
| break |
| # Stop parsing |
| |
| def processSpaceCharactersDropNewline(self, token): |
| # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we |
| # want to drop leading newlines |
| data = token["data"] |
| self.processSpaceCharacters = self.processSpaceCharactersNonPre |
| if (data.startswith("\n") and |
| self.tree.openElements[-1].name in ("pre", "listing", "textarea") and |
| not self.tree.openElements[-1].hasContent()): |
| data = data[1:] |
| if data: |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertText(data) |
| |
| def processCharacters(self, token): |
| if token["data"] == "\u0000": |
| # The tokenizer should always emit null on its own |
| return |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertText(token["data"]) |
| # This must be bad for performance |
| if (self.parser.framesetOK and |
| any([char not in spaceCharacters |
| for char in token["data"]])): |
| self.parser.framesetOK = False |
| |
| def processSpaceCharactersNonPre(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertText(token["data"]) |
| |
| def startTagProcessInHead(self, token): |
| return self.parser.phases["inHead"].processStartTag(token) |
| |
| def startTagBody(self, token): |
| self.parser.parseError("unexpected-start-tag", {"name": "body"}) |
| if (len(self.tree.openElements) == 1 or |
| self.tree.openElements[1].name != "body"): |
| assert self.parser.innerHTML |
| else: |
| self.parser.framesetOK = False |
| for attr, value in token["data"].items(): |
| if attr not in self.tree.openElements[1].attributes: |
| self.tree.openElements[1].attributes[attr] = value |
| |
| def startTagFrameset(self, token): |
| self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) |
| if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): |
| assert self.parser.innerHTML |
| elif not self.parser.framesetOK: |
| pass |
| else: |
| if self.tree.openElements[1].parent: |
| self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) |
| while self.tree.openElements[-1].name != "html": |
| self.tree.openElements.pop() |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inFrameset"] |
| |
| def startTagCloseP(self, token): |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| self.tree.insertElement(token) |
| |
| def startTagPreListing(self, token): |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| self.tree.insertElement(token) |
| self.parser.framesetOK = False |
| self.processSpaceCharacters = self.processSpaceCharactersDropNewline |
| |
| def startTagForm(self, token): |
| if self.tree.formPointer: |
| self.parser.parseError("unexpected-start-tag", {"name": "form"}) |
| else: |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| self.tree.insertElement(token) |
| self.tree.formPointer = self.tree.openElements[-1] |
| |
| def startTagListItem(self, token): |
| self.parser.framesetOK = False |
| |
| stopNamesMap = {"li": ["li"], |
| "dt": ["dt", "dd"], |
| "dd": ["dt", "dd"]} |
| stopNames = stopNamesMap[token["name"]] |
| for node in reversed(self.tree.openElements): |
| if node.name in stopNames: |
| self.parser.phase.processEndTag( |
| impliedTagToken(node.name, "EndTag")) |
| break |
| if (node.nameTuple in specialElements and |
| node.name not in ("address", "div", "p")): |
| break |
| |
| if self.tree.elementInScope("p", variant="button"): |
| self.parser.phase.processEndTag( |
| impliedTagToken("p", "EndTag")) |
| |
| self.tree.insertElement(token) |
| |
| def startTagPlaintext(self, token): |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| self.tree.insertElement(token) |
| self.parser.tokenizer.state = self.parser.tokenizer.plaintextState |
| |
| def startTagHeading(self, token): |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| if self.tree.openElements[-1].name in headingElements: |
| self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) |
| self.tree.openElements.pop() |
| self.tree.insertElement(token) |
| |
| def startTagA(self, token): |
| afeAElement = self.tree.elementInActiveFormattingElements("a") |
| if afeAElement: |
| self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| {"startName": "a", "endName": "a"}) |
| self.endTagFormatting(impliedTagToken("a")) |
| if afeAElement in self.tree.openElements: |
| self.tree.openElements.remove(afeAElement) |
| if afeAElement in self.tree.activeFormattingElements: |
| self.tree.activeFormattingElements.remove(afeAElement) |
| self.tree.reconstructActiveFormattingElements() |
| self.addFormattingElement(token) |
| |
| def startTagFormatting(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.addFormattingElement(token) |
| |
| def startTagNobr(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| if self.tree.elementInScope("nobr"): |
| self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| {"startName": "nobr", "endName": "nobr"}) |
| self.processEndTag(impliedTagToken("nobr")) |
| # XXX Need tests that trigger the following |
| self.tree.reconstructActiveFormattingElements() |
| self.addFormattingElement(token) |
| |
| def startTagButton(self, token): |
| if self.tree.elementInScope("button"): |
| self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| {"startName": "button", "endName": "button"}) |
| self.processEndTag(impliedTagToken("button")) |
| return token |
| else: |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertElement(token) |
| self.parser.framesetOK = False |
| |
| def startTagAppletMarqueeObject(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertElement(token) |
| self.tree.activeFormattingElements.append(Marker) |
| self.parser.framesetOK = False |
| |
| def startTagXmp(self, token): |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| self.tree.reconstructActiveFormattingElements() |
| self.parser.framesetOK = False |
| self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| |
| def startTagTable(self, token): |
| if self.parser.compatMode != "quirks": |
| if self.tree.elementInScope("p", variant="button"): |
| self.processEndTag(impliedTagToken("p")) |
| self.tree.insertElement(token) |
| self.parser.framesetOK = False |
| self.parser.phase = self.parser.phases["inTable"] |
| |
| def startTagVoidFormatting(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| self.parser.framesetOK = False |
| |
| def startTagInput(self, token): |
| framesetOK = self.parser.framesetOK |
| self.startTagVoidFormatting(token) |
| if ("type" in token["data"] and |
| token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): |
| # input type=hidden doesn't change framesetOK |
| self.parser.framesetOK = framesetOK |
| |
| def startTagParamSource(self, token): |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| def startTagHr(self, token): |
| if self.tree.elementInScope("p", variant="button"): |
| self.endTagP(impliedTagToken("p")) |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| self.parser.framesetOK = False |
| |
| def startTagImage(self, token): |
| # No really... |
| self.parser.parseError("unexpected-start-tag-treated-as", |
| {"originalName": "image", "newName": "img"}) |
| self.processStartTag(impliedTagToken("img", "StartTag", |
| attributes=token["data"], |
| selfClosing=token["selfClosing"])) |
| |
| def startTagIsIndex(self, token): |
| self.parser.parseError("deprecated-tag", {"name": "isindex"}) |
| if self.tree.formPointer: |
| return |
| form_attrs = {} |
| if "action" in token["data"]: |
| form_attrs["action"] = token["data"]["action"] |
| self.processStartTag(impliedTagToken("form", "StartTag", |
| attributes=form_attrs)) |
| self.processStartTag(impliedTagToken("hr", "StartTag")) |
| self.processStartTag(impliedTagToken("label", "StartTag")) |
| # XXX Localization ... |
| if "prompt" in token["data"]: |
| prompt = token["data"]["prompt"] |
| else: |
| prompt = "This is a searchable index. Enter search keywords: " |
| self.processCharacters( |
| {"type": tokenTypes["Characters"], "data": prompt}) |
| attributes = token["data"].copy() |
| if "action" in attributes: |
| del attributes["action"] |
| if "prompt" in attributes: |
| del attributes["prompt"] |
| attributes["name"] = "isindex" |
| self.processStartTag(impliedTagToken("input", "StartTag", |
| attributes=attributes, |
| selfClosing=token["selfClosing"])) |
| self.processEndTag(impliedTagToken("label")) |
| self.processStartTag(impliedTagToken("hr", "StartTag")) |
| self.processEndTag(impliedTagToken("form")) |
| |
| def startTagTextarea(self, token): |
| self.tree.insertElement(token) |
| self.parser.tokenizer.state = self.parser.tokenizer.rcdataState |
| self.processSpaceCharacters = self.processSpaceCharactersDropNewline |
| self.parser.framesetOK = False |
| |
| def startTagIFrame(self, token): |
| self.parser.framesetOK = False |
| self.startTagRawtext(token) |
| |
| def startTagNoscript(self, token): |
| if self.parser.scripting: |
| self.startTagRawtext(token) |
| else: |
| self.startTagOther(token) |
| |
| def startTagRawtext(self, token): |
| """iframe, noembed noframes, noscript(if scripting enabled)""" |
| self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| |
| def startTagOpt(self, token): |
| if self.tree.openElements[-1].name == "option": |
| self.parser.phase.processEndTag(impliedTagToken("option")) |
| self.tree.reconstructActiveFormattingElements() |
| self.parser.tree.insertElement(token) |
| |
| def startTagSelect(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertElement(token) |
| self.parser.framesetOK = False |
| if self.parser.phase in (self.parser.phases["inTable"], |
| self.parser.phases["inCaption"], |
| self.parser.phases["inColumnGroup"], |
| self.parser.phases["inTableBody"], |
| self.parser.phases["inRow"], |
| self.parser.phases["inCell"]): |
| self.parser.phase = self.parser.phases["inSelectInTable"] |
| else: |
| self.parser.phase = self.parser.phases["inSelect"] |
| |
| def startTagRpRt(self, token): |
| if self.tree.elementInScope("ruby"): |
| self.tree.generateImpliedEndTags() |
| if self.tree.openElements[-1].name != "ruby": |
| self.parser.parseError() |
| self.tree.insertElement(token) |
| |
| def startTagMath(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.parser.adjustMathMLAttributes(token) |
| self.parser.adjustForeignAttributes(token) |
| token["namespace"] = namespaces["mathml"] |
| self.tree.insertElement(token) |
| # Need to get the parse error right for the case where the token |
| # has a namespace not equal to the xmlns attribute |
| if token["selfClosing"]: |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| def startTagSvg(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.parser.adjustSVGAttributes(token) |
| self.parser.adjustForeignAttributes(token) |
| token["namespace"] = namespaces["svg"] |
| self.tree.insertElement(token) |
| # Need to get the parse error right for the case where the token |
| # has a namespace not equal to the xmlns attribute |
| if token["selfClosing"]: |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| def startTagMisplaced(self, token): |
| """ Elements that should be children of other elements that have a |
| different insertion mode; here they are ignored |
| "caption", "col", "colgroup", "frame", "frameset", "head", |
| "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", |
| "tr", "noscript" |
| """ |
| self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) |
| |
| def startTagOther(self, token): |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertElement(token) |
| |
| def endTagP(self, token): |
| if not self.tree.elementInScope("p", variant="button"): |
| self.startTagCloseP(impliedTagToken("p", "StartTag")) |
| self.parser.parseError("unexpected-end-tag", {"name": "p"}) |
| self.endTagP(impliedTagToken("p", "EndTag")) |
| else: |
| self.tree.generateImpliedEndTags("p") |
| if self.tree.openElements[-1].name != "p": |
| self.parser.parseError("unexpected-end-tag", {"name": "p"}) |
| node = self.tree.openElements.pop() |
| while node.name != "p": |
| node = self.tree.openElements.pop() |
| |
| def endTagBody(self, token): |
| if not self.tree.elementInScope("body"): |
| self.parser.parseError() |
| return |
| elif self.tree.openElements[-1].name != "body": |
| for node in self.tree.openElements[2:]: |
| if node.name not in frozenset(("dd", "dt", "li", "optgroup", |
| "option", "p", "rp", "rt", |
| "tbody", "td", "tfoot", |
| "th", "thead", "tr", "body", |
| "html")): |
| # Not sure this is the correct name for the parse error |
| self.parser.parseError( |
| "expected-one-end-tag-but-got-another", |
| {"gotName": "body", "expectedName": node.name}) |
| break |
| self.parser.phase = self.parser.phases["afterBody"] |
| |
| def endTagHtml(self, token): |
| # We repeat the test for the body end tag token being ignored here |
| if self.tree.elementInScope("body"): |
| self.endTagBody(impliedTagToken("body")) |
| return token |
| |
| def endTagBlock(self, token): |
| # Put us back in the right whitespace handling mode |
| if token["name"] == "pre": |
| self.processSpaceCharacters = self.processSpaceCharactersNonPre |
| inScope = self.tree.elementInScope(token["name"]) |
| if inScope: |
| self.tree.generateImpliedEndTags() |
| if self.tree.openElements[-1].name != token["name"]: |
| self.parser.parseError("end-tag-too-early", {"name": token["name"]}) |
| if inScope: |
| node = self.tree.openElements.pop() |
| while node.name != token["name"]: |
| node = self.tree.openElements.pop() |
| |
| def endTagForm(self, token): |
| node = self.tree.formPointer |
| self.tree.formPointer = None |
| if node is None or not self.tree.elementInScope(node): |
| self.parser.parseError("unexpected-end-tag", |
| {"name": "form"}) |
| else: |
| self.tree.generateImpliedEndTags() |
| if self.tree.openElements[-1] != node: |
| self.parser.parseError("end-tag-too-early-ignored", |
| {"name": "form"}) |
| self.tree.openElements.remove(node) |
| |
| def endTagListItem(self, token): |
| if token["name"] == "li": |
| variant = "list" |
| else: |
| variant = None |
| if not self.tree.elementInScope(token["name"], variant=variant): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| else: |
| self.tree.generateImpliedEndTags(exclude=token["name"]) |
| if self.tree.openElements[-1].name != token["name"]: |
| self.parser.parseError( |
| "end-tag-too-early", |
| {"name": token["name"]}) |
| node = self.tree.openElements.pop() |
| while node.name != token["name"]: |
| node = self.tree.openElements.pop() |
| |
| def endTagHeading(self, token): |
| for item in headingElements: |
| if self.tree.elementInScope(item): |
| self.tree.generateImpliedEndTags() |
| break |
| if self.tree.openElements[-1].name != token["name"]: |
| self.parser.parseError("end-tag-too-early", {"name": token["name"]}) |
| |
| for item in headingElements: |
| if self.tree.elementInScope(item): |
| item = self.tree.openElements.pop() |
| while item.name not in headingElements: |
| item = self.tree.openElements.pop() |
| break |
| |
| def endTagFormatting(self, token): |
| """The much-feared adoption agency algorithm""" |
| # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 |
| # XXX Better parseError messages appreciated. |
| |
| # Step 1 |
| outerLoopCounter = 0 |
| |
| # Step 2 |
| while outerLoopCounter < 8: |
| |
| # Step 3 |
| outerLoopCounter += 1 |
| |
| # Step 4: |
| |
| # Let the formatting element be the last element in |
| # the list of active formatting elements that: |
| # - is between the end of the list and the last scope |
| # marker in the list, if any, or the start of the list |
| # otherwise, and |
| # - has the same tag name as the token. |
| formattingElement = self.tree.elementInActiveFormattingElements( |
| token["name"]) |
| if (not formattingElement or |
| (formattingElement in self.tree.openElements and |
| not self.tree.elementInScope(formattingElement.name))): |
| # If there is no such node, then abort these steps |
| # and instead act as described in the "any other |
| # end tag" entry below. |
| self.endTagOther(token) |
| return |
| |
| # Otherwise, if there is such a node, but that node is |
| # not in the stack of open elements, then this is a |
| # parse error; remove the element from the list, and |
| # abort these steps. |
| elif formattingElement not in self.tree.openElements: |
| self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) |
| self.tree.activeFormattingElements.remove(formattingElement) |
| return |
| |
| # Otherwise, if there is such a node, and that node is |
| # also in the stack of open elements, but the element |
| # is not in scope, then this is a parse error; ignore |
| # the token, and abort these steps. |
| elif not self.tree.elementInScope(formattingElement.name): |
| self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) |
| return |
| |
| # Otherwise, there is a formatting element and that |
| # element is in the stack and is in scope. If the |
| # element is not the current node, this is a parse |
| # error. In any case, proceed with the algorithm as |
| # written in the following steps. |
| else: |
| if formattingElement != self.tree.openElements[-1]: |
| self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) |
| |
| # Step 5: |
| |
| # Let the furthest block be the topmost node in the |
| # stack of open elements that is lower in the stack |
| # than the formatting element, and is an element in |
| # the special category. There might not be one. |
| afeIndex = self.tree.openElements.index(formattingElement) |
| furthestBlock = None |
| for element in self.tree.openElements[afeIndex:]: |
| if element.nameTuple in specialElements: |
| furthestBlock = element |
| break |
| |
| # Step 6: |
| |
| # If there is no furthest block, then the UA must |
| # first pop all the nodes from the bottom of the stack |
| # of open elements, from the current node up to and |
| # including the formatting element, then remove the |
| # formatting element from the list of active |
| # formatting elements, and finally abort these steps. |
| if furthestBlock is None: |
| element = self.tree.openElements.pop() |
| while element != formattingElement: |
| element = self.tree.openElements.pop() |
| self.tree.activeFormattingElements.remove(element) |
| return |
| |
| # Step 7 |
| commonAncestor = self.tree.openElements[afeIndex - 1] |
| |
| # Step 8: |
| # The bookmark is supposed to help us identify where to reinsert |
| # nodes in step 15. We have to ensure that we reinsert nodes after |
| # the node before the active formatting element. Note the bookmark |
| # can move in step 9.7 |
| bookmark = self.tree.activeFormattingElements.index(formattingElement) |
| |
| # Step 9 |
| lastNode = node = furthestBlock |
| innerLoopCounter = 0 |
| |
| index = self.tree.openElements.index(node) |
| while innerLoopCounter < 3: |
| innerLoopCounter += 1 |
| # Node is element before node in open elements |
| index -= 1 |
| node = self.tree.openElements[index] |
| if node not in self.tree.activeFormattingElements: |
| self.tree.openElements.remove(node) |
| continue |
| # Step 9.6 |
| if node == formattingElement: |
| break |
| # Step 9.7 |
| if lastNode == furthestBlock: |
| bookmark = self.tree.activeFormattingElements.index(node) + 1 |
| # Step 9.8 |
| clone = node.cloneNode() |
| # Replace node with clone |
| self.tree.activeFormattingElements[ |
| self.tree.activeFormattingElements.index(node)] = clone |
| self.tree.openElements[ |
| self.tree.openElements.index(node)] = clone |
| node = clone |
| # Step 9.9 |
| # Remove lastNode from its parents, if any |
| if lastNode.parent: |
| lastNode.parent.removeChild(lastNode) |
| node.appendChild(lastNode) |
| # Step 9.10 |
| lastNode = node |
| |
| # Step 10 |
| # Foster parent lastNode if commonAncestor is a |
| # table, tbody, tfoot, thead, or tr we need to foster |
| # parent the lastNode |
| if lastNode.parent: |
| lastNode.parent.removeChild(lastNode) |
| |
| if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): |
| parent, insertBefore = self.tree.getTableMisnestedNodePosition() |
| parent.insertBefore(lastNode, insertBefore) |
| else: |
| commonAncestor.appendChild(lastNode) |
| |
| # Step 11 |
| clone = formattingElement.cloneNode() |
| |
| # Step 12 |
| furthestBlock.reparentChildren(clone) |
| |
| # Step 13 |
| furthestBlock.appendChild(clone) |
| |
| # Step 14 |
| self.tree.activeFormattingElements.remove(formattingElement) |
| self.tree.activeFormattingElements.insert(bookmark, clone) |
| |
| # Step 15 |
| self.tree.openElements.remove(formattingElement) |
| self.tree.openElements.insert( |
| self.tree.openElements.index(furthestBlock) + 1, clone) |
| |
| def endTagAppletMarqueeObject(self, token): |
| if self.tree.elementInScope(token["name"]): |
| self.tree.generateImpliedEndTags() |
| if self.tree.openElements[-1].name != token["name"]: |
| self.parser.parseError("end-tag-too-early", {"name": token["name"]}) |
| |
| if self.tree.elementInScope(token["name"]): |
| element = self.tree.openElements.pop() |
| while element.name != token["name"]: |
| element = self.tree.openElements.pop() |
| self.tree.clearActiveFormattingElements() |
| |
| def endTagBr(self, token): |
| self.parser.parseError("unexpected-end-tag-treated-as", |
| {"originalName": "br", "newName": "br element"}) |
| self.tree.reconstructActiveFormattingElements() |
| self.tree.insertElement(impliedTagToken("br", "StartTag")) |
| self.tree.openElements.pop() |
| |
| def endTagOther(self, token): |
| for node in self.tree.openElements[::-1]: |
| if node.name == token["name"]: |
| self.tree.generateImpliedEndTags(exclude=token["name"]) |
| if self.tree.openElements[-1].name != token["name"]: |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| while self.tree.openElements.pop() != node: |
| pass |
| break |
| else: |
| if node.nameTuple in specialElements: |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| break |
| |
| class TextPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| self.startTagHandler = _utils.MethodDispatcher([]) |
| self.startTagHandler.default = self.startTagOther |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("script", self.endTagScript)]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processCharacters(self, token): |
| self.tree.insertText(token["data"]) |
| |
| def processEOF(self): |
| self.parser.parseError("expected-named-closing-tag-but-got-eof", |
| {"name": self.tree.openElements[-1].name}) |
| self.tree.openElements.pop() |
| self.parser.phase = self.parser.originalPhase |
| return True |
| |
| def startTagOther(self, token): |
| assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] |
| |
| def endTagScript(self, token): |
| node = self.tree.openElements.pop() |
| assert node.name == "script" |
| self.parser.phase = self.parser.originalPhase |
| # The rest of this method is all stuff that only happens if |
| # document.write works |
| |
| def endTagOther(self, token): |
| self.tree.openElements.pop() |
| self.parser.phase = self.parser.originalPhase |
| |
| class InTablePhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-table |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("caption", self.startTagCaption), |
| ("colgroup", self.startTagColgroup), |
| ("col", self.startTagCol), |
| (("tbody", "tfoot", "thead"), self.startTagRowGroup), |
| (("td", "th", "tr"), self.startTagImplyTbody), |
| ("table", self.startTagTable), |
| (("style", "script"), self.startTagStyleScript), |
| ("input", self.startTagInput), |
| ("form", self.startTagForm) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("table", self.endTagTable), |
| (("body", "caption", "col", "colgroup", "html", "tbody", "td", |
| "tfoot", "th", "thead", "tr"), self.endTagIgnore) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| # helper methods |
| def clearStackToTableContext(self): |
| # "clear the stack back to a table context" |
| while self.tree.openElements[-1].name not in ("table", "html"): |
| # self.parser.parseError("unexpected-implied-end-tag-in-table", |
| # {"name": self.tree.openElements[-1].name}) |
| self.tree.openElements.pop() |
| # When the current node is <html> it's an innerHTML case |
| |
| # processing methods |
| def processEOF(self): |
| if self.tree.openElements[-1].name != "html": |
| self.parser.parseError("eof-in-table") |
| else: |
| assert self.parser.innerHTML |
| # Stop parsing |
| |
| def processSpaceCharacters(self, token): |
| originalPhase = self.parser.phase |
| self.parser.phase = self.parser.phases["inTableText"] |
| self.parser.phase.originalPhase = originalPhase |
| self.parser.phase.processSpaceCharacters(token) |
| |
| def processCharacters(self, token): |
| originalPhase = self.parser.phase |
| self.parser.phase = self.parser.phases["inTableText"] |
| self.parser.phase.originalPhase = originalPhase |
| self.parser.phase.processCharacters(token) |
| |
| def insertText(self, token): |
| # If we get here there must be at least one non-whitespace character |
| # Do the table magic! |
| self.tree.insertFromTable = True |
| self.parser.phases["inBody"].processCharacters(token) |
| self.tree.insertFromTable = False |
| |
| def startTagCaption(self, token): |
| self.clearStackToTableContext() |
| self.tree.activeFormattingElements.append(Marker) |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inCaption"] |
| |
| def startTagColgroup(self, token): |
| self.clearStackToTableContext() |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inColumnGroup"] |
| |
| def startTagCol(self, token): |
| self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) |
| return token |
| |
| def startTagRowGroup(self, token): |
| self.clearStackToTableContext() |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inTableBody"] |
| |
| def startTagImplyTbody(self, token): |
| self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) |
| return token |
| |
| def startTagTable(self, token): |
| self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| {"startName": "table", "endName": "table"}) |
| self.parser.phase.processEndTag(impliedTagToken("table")) |
| if not self.parser.innerHTML: |
| return token |
| |
| def startTagStyleScript(self, token): |
| return self.parser.phases["inHead"].processStartTag(token) |
| |
| def startTagInput(self, token): |
| if ("type" in token["data"] and |
| token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): |
| self.parser.parseError("unexpected-hidden-input-in-table") |
| self.tree.insertElement(token) |
| # XXX associate with form |
| self.tree.openElements.pop() |
| else: |
| self.startTagOther(token) |
| |
| def startTagForm(self, token): |
| self.parser.parseError("unexpected-form-in-table") |
| if self.tree.formPointer is None: |
| self.tree.insertElement(token) |
| self.tree.formPointer = self.tree.openElements[-1] |
| self.tree.openElements.pop() |
| |
| def startTagOther(self, token): |
| self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) |
| # Do the table magic! |
| self.tree.insertFromTable = True |
| self.parser.phases["inBody"].processStartTag(token) |
| self.tree.insertFromTable = False |
| |
| def endTagTable(self, token): |
| if self.tree.elementInScope("table", variant="table"): |
| self.tree.generateImpliedEndTags() |
| if self.tree.openElements[-1].name != "table": |
| self.parser.parseError("end-tag-too-early-named", |
| {"gotName": "table", |
| "expectedName": self.tree.openElements[-1].name}) |
| while self.tree.openElements[-1].name != "table": |
| self.tree.openElements.pop() |
| self.tree.openElements.pop() |
| self.parser.resetInsertionMode() |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def endTagIgnore(self, token): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) |
| # Do the table magic! |
| self.tree.insertFromTable = True |
| self.parser.phases["inBody"].processEndTag(token) |
| self.tree.insertFromTable = False |
| |
| class InTableTextPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| self.originalPhase = None |
| self.characterTokens = [] |
| |
| def flushCharacters(self): |
| data = "".join([item["data"] for item in self.characterTokens]) |
| if any([item not in spaceCharacters for item in data]): |
| token = {"type": tokenTypes["Characters"], "data": data} |
| self.parser.phases["inTable"].insertText(token) |
| elif data: |
| self.tree.insertText(data) |
| self.characterTokens = [] |
| |
| def processComment(self, token): |
| self.flushCharacters() |
| self.parser.phase = self.originalPhase |
| return token |
| |
| def processEOF(self): |
| self.flushCharacters() |
| self.parser.phase = self.originalPhase |
| return True |
| |
| def processCharacters(self, token): |
| if token["data"] == "\u0000": |
| return |
| self.characterTokens.append(token) |
| |
| def processSpaceCharacters(self, token): |
| # pretty sure we should never reach here |
| self.characterTokens.append(token) |
| # assert False |
| |
| def processStartTag(self, token): |
| self.flushCharacters() |
| self.parser.phase = self.originalPhase |
| return token |
| |
| def processEndTag(self, token): |
| self.flushCharacters() |
| self.parser.phase = self.originalPhase |
| return token |
| |
| class InCaptionPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-caption |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", |
| "thead", "tr"), self.startTagTableElement) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("caption", self.endTagCaption), |
| ("table", self.endTagTable), |
| (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", |
| "thead", "tr"), self.endTagIgnore) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def ignoreEndTagCaption(self): |
| return not self.tree.elementInScope("caption", variant="table") |
| |
| def processEOF(self): |
| self.parser.phases["inBody"].processEOF() |
| |
| def processCharacters(self, token): |
| return self.parser.phases["inBody"].processCharacters(token) |
| |
| def startTagTableElement(self, token): |
| self.parser.parseError() |
| # XXX Have to duplicate logic here to find out if the tag is ignored |
| ignoreEndTag = self.ignoreEndTagCaption() |
| self.parser.phase.processEndTag(impliedTagToken("caption")) |
| if not ignoreEndTag: |
| return token |
| |
| def startTagOther(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def endTagCaption(self, token): |
| if not self.ignoreEndTagCaption(): |
| # AT this code is quite similar to endTagTable in "InTable" |
| self.tree.generateImpliedEndTags() |
| if self.tree.openElements[-1].name != "caption": |
| self.parser.parseError("expected-one-end-tag-but-got-another", |
| {"gotName": "caption", |
| "expectedName": self.tree.openElements[-1].name}) |
| while self.tree.openElements[-1].name != "caption": |
| self.tree.openElements.pop() |
| self.tree.openElements.pop() |
| self.tree.clearActiveFormattingElements() |
| self.parser.phase = self.parser.phases["inTable"] |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def endTagTable(self, token): |
| self.parser.parseError() |
| ignoreEndTag = self.ignoreEndTagCaption() |
| self.parser.phase.processEndTag(impliedTagToken("caption")) |
| if not ignoreEndTag: |
| return token |
| |
| def endTagIgnore(self, token): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def endTagOther(self, token): |
| return self.parser.phases["inBody"].processEndTag(token) |
| |
| class InColumnGroupPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-column |
| |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("col", self.startTagCol) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("colgroup", self.endTagColgroup), |
| ("col", self.endTagCol) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def ignoreEndTagColgroup(self): |
| return self.tree.openElements[-1].name == "html" |
| |
| def processEOF(self): |
| if self.tree.openElements[-1].name == "html": |
| assert self.parser.innerHTML |
| return |
| else: |
| ignoreEndTag = self.ignoreEndTagColgroup() |
| self.endTagColgroup(impliedTagToken("colgroup")) |
| if not ignoreEndTag: |
| return True |
| |
| def processCharacters(self, token): |
| ignoreEndTag = self.ignoreEndTagColgroup() |
| self.endTagColgroup(impliedTagToken("colgroup")) |
| if not ignoreEndTag: |
| return token |
| |
| def startTagCol(self, token): |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| def startTagOther(self, token): |
| ignoreEndTag = self.ignoreEndTagColgroup() |
| self.endTagColgroup(impliedTagToken("colgroup")) |
| if not ignoreEndTag: |
| return token |
| |
| def endTagColgroup(self, token): |
| if self.ignoreEndTagColgroup(): |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| else: |
| self.tree.openElements.pop() |
| self.parser.phase = self.parser.phases["inTable"] |
| |
| def endTagCol(self, token): |
| self.parser.parseError("no-end-tag", {"name": "col"}) |
| |
| def endTagOther(self, token): |
| ignoreEndTag = self.ignoreEndTagColgroup() |
| self.endTagColgroup(impliedTagToken("colgroup")) |
| if not ignoreEndTag: |
| return token |
| |
| class InTableBodyPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("tr", self.startTagTr), |
| (("td", "th"), self.startTagTableCell), |
| (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), |
| self.startTagTableOther) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), |
| ("table", self.endTagTable), |
| (("body", "caption", "col", "colgroup", "html", "td", "th", |
| "tr"), self.endTagIgnore) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| # helper methods |
| def clearStackToTableBodyContext(self): |
| while self.tree.openElements[-1].name not in ("tbody", "tfoot", |
| "thead", "html"): |
| # self.parser.parseError("unexpected-implied-end-tag-in-table", |
| # {"name": self.tree.openElements[-1].name}) |
| self.tree.openElements.pop() |
| if self.tree.openElements[-1].name == "html": |
| assert self.parser.innerHTML |
| |
| # the rest |
| def processEOF(self): |
| self.parser.phases["inTable"].processEOF() |
| |
| def processSpaceCharacters(self, token): |
| return self.parser.phases["inTable"].processSpaceCharacters(token) |
| |
| def processCharacters(self, token): |
| return self.parser.phases["inTable"].processCharacters(token) |
| |
| def startTagTr(self, token): |
| self.clearStackToTableBodyContext() |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inRow"] |
| |
| def startTagTableCell(self, token): |
| self.parser.parseError("unexpected-cell-in-table-body", |
| {"name": token["name"]}) |
| self.startTagTr(impliedTagToken("tr", "StartTag")) |
| return token |
| |
| def startTagTableOther(self, token): |
| # XXX AT Any ideas on how to share this with endTagTable? |
| if (self.tree.elementInScope("tbody", variant="table") or |
| self.tree.elementInScope("thead", variant="table") or |
| self.tree.elementInScope("tfoot", variant="table")): |
| self.clearStackToTableBodyContext() |
| self.endTagTableRowGroup( |
| impliedTagToken(self.tree.openElements[-1].name)) |
| return token |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def startTagOther(self, token): |
| return self.parser.phases["inTable"].processStartTag(token) |
| |
| def endTagTableRowGroup(self, token): |
| if self.tree.elementInScope(token["name"], variant="table"): |
| self.clearStackToTableBodyContext() |
| self.tree.openElements.pop() |
| self.parser.phase = self.parser.phases["inTable"] |
| else: |
| self.parser.parseError("unexpected-end-tag-in-table-body", |
| {"name": token["name"]}) |
| |
| def endTagTable(self, token): |
| if (self.tree.elementInScope("tbody", variant="table") or |
| self.tree.elementInScope("thead", variant="table") or |
| self.tree.elementInScope("tfoot", variant="table")): |
| self.clearStackToTableBodyContext() |
| self.endTagTableRowGroup( |
| impliedTagToken(self.tree.openElements[-1].name)) |
| return token |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def endTagIgnore(self, token): |
| self.parser.parseError("unexpected-end-tag-in-table-body", |
| {"name": token["name"]}) |
| |
| def endTagOther(self, token): |
| return self.parser.phases["inTable"].processEndTag(token) |
| |
| class InRowPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-row |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| (("td", "th"), self.startTagTableCell), |
| (("caption", "col", "colgroup", "tbody", "tfoot", "thead", |
| "tr"), self.startTagTableOther) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("tr", self.endTagTr), |
| ("table", self.endTagTable), |
| (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), |
| (("body", "caption", "col", "colgroup", "html", "td", "th"), |
| self.endTagIgnore) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| # helper methods (XXX unify this with other table helper methods) |
| def clearStackToTableRowContext(self): |
| while self.tree.openElements[-1].name not in ("tr", "html"): |
| self.parser.parseError("unexpected-implied-end-tag-in-table-row", |
| {"name": self.tree.openElements[-1].name}) |
| self.tree.openElements.pop() |
| |
| def ignoreEndTagTr(self): |
| return not self.tree.elementInScope("tr", variant="table") |
| |
| # the rest |
| def processEOF(self): |
| self.parser.phases["inTable"].processEOF() |
| |
| def processSpaceCharacters(self, token): |
| return self.parser.phases["inTable"].processSpaceCharacters(token) |
| |
| def processCharacters(self, token): |
| return self.parser.phases["inTable"].processCharacters(token) |
| |
| def startTagTableCell(self, token): |
| self.clearStackToTableRowContext() |
| self.tree.insertElement(token) |
| self.parser.phase = self.parser.phases["inCell"] |
| self.tree.activeFormattingElements.append(Marker) |
| |
| def startTagTableOther(self, token): |
| ignoreEndTag = self.ignoreEndTagTr() |
| self.endTagTr(impliedTagToken("tr")) |
| # XXX how are we sure it's always ignored in the innerHTML case? |
| if not ignoreEndTag: |
| return token |
| |
| def startTagOther(self, token): |
| return self.parser.phases["inTable"].processStartTag(token) |
| |
| def endTagTr(self, token): |
| if not self.ignoreEndTagTr(): |
| self.clearStackToTableRowContext() |
| self.tree.openElements.pop() |
| self.parser.phase = self.parser.phases["inTableBody"] |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def endTagTable(self, token): |
| ignoreEndTag = self.ignoreEndTagTr() |
| self.endTagTr(impliedTagToken("tr")) |
| # Reprocess the current tag if the tr end tag was not ignored |
| # XXX how are we sure it's always ignored in the innerHTML case? |
| if not ignoreEndTag: |
| return token |
| |
| def endTagTableRowGroup(self, token): |
| if self.tree.elementInScope(token["name"], variant="table"): |
| self.endTagTr(impliedTagToken("tr")) |
| return token |
| else: |
| self.parser.parseError() |
| |
| def endTagIgnore(self, token): |
| self.parser.parseError("unexpected-end-tag-in-table-row", |
| {"name": token["name"]}) |
| |
| def endTagOther(self, token): |
| return self.parser.phases["inTable"].processEndTag(token) |
| |
| class InCellPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-cell |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", |
| "thead", "tr"), self.startTagTableOther) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| (("td", "th"), self.endTagTableCell), |
| (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), |
| (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| # helper |
| def closeCell(self): |
| if self.tree.elementInScope("td", variant="table"): |
| self.endTagTableCell(impliedTagToken("td")) |
| elif self.tree.elementInScope("th", variant="table"): |
| self.endTagTableCell(impliedTagToken("th")) |
| |
| # the rest |
| def processEOF(self): |
| self.parser.phases["inBody"].processEOF() |
| |
| def processCharacters(self, token): |
| return self.parser.phases["inBody"].processCharacters(token) |
| |
| def startTagTableOther(self, token): |
| if (self.tree.elementInScope("td", variant="table") or |
| self.tree.elementInScope("th", variant="table")): |
| self.closeCell() |
| return token |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def startTagOther(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def endTagTableCell(self, token): |
| if self.tree.elementInScope(token["name"], variant="table"): |
| self.tree.generateImpliedEndTags(token["name"]) |
| if self.tree.openElements[-1].name != token["name"]: |
| self.parser.parseError("unexpected-cell-end-tag", |
| {"name": token["name"]}) |
| while True: |
| node = self.tree.openElements.pop() |
| if node.name == token["name"]: |
| break |
| else: |
| self.tree.openElements.pop() |
| self.tree.clearActiveFormattingElements() |
| self.parser.phase = self.parser.phases["inRow"] |
| else: |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def endTagIgnore(self, token): |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| def endTagImply(self, token): |
| if self.tree.elementInScope(token["name"], variant="table"): |
| self.closeCell() |
| return token |
| else: |
| # sometimes innerHTML case |
| self.parser.parseError() |
| |
| def endTagOther(self, token): |
| return self.parser.phases["inBody"].processEndTag(token) |
| |
| class InSelectPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("option", self.startTagOption), |
| ("optgroup", self.startTagOptgroup), |
| ("select", self.startTagSelect), |
| (("input", "keygen", "textarea"), self.startTagInput), |
| ("script", self.startTagScript) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("option", self.endTagOption), |
| ("optgroup", self.endTagOptgroup), |
| ("select", self.endTagSelect) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-select |
| def processEOF(self): |
| if self.tree.openElements[-1].name != "html": |
| self.parser.parseError("eof-in-select") |
| else: |
| assert self.parser.innerHTML |
| |
| def processCharacters(self, token): |
| if token["data"] == "\u0000": |
| return |
| self.tree.insertText(token["data"]) |
| |
| def startTagOption(self, token): |
| # We need to imply </option> if <option> is the current node. |
| if self.tree.openElements[-1].name == "option": |
| self.tree.openElements.pop() |
| self.tree.insertElement(token) |
| |
| def startTagOptgroup(self, token): |
| if self.tree.openElements[-1].name == "option": |
| self.tree.openElements.pop() |
| if self.tree.openElements[-1].name == "optgroup": |
| self.tree.openElements.pop() |
| self.tree.insertElement(token) |
| |
| def startTagSelect(self, token): |
| self.parser.parseError("unexpected-select-in-select") |
| self.endTagSelect(impliedTagToken("select")) |
| |
| def startTagInput(self, token): |
| self.parser.parseError("unexpected-input-in-select") |
| if self.tree.elementInScope("select", variant="select"): |
| self.endTagSelect(impliedTagToken("select")) |
| return token |
| else: |
| assert self.parser.innerHTML |
| |
| def startTagScript(self, token): |
| return self.parser.phases["inHead"].processStartTag(token) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("unexpected-start-tag-in-select", |
| {"name": token["name"]}) |
| |
| def endTagOption(self, token): |
| if self.tree.openElements[-1].name == "option": |
| self.tree.openElements.pop() |
| else: |
| self.parser.parseError("unexpected-end-tag-in-select", |
| {"name": "option"}) |
| |
| def endTagOptgroup(self, token): |
| # </optgroup> implicitly closes <option> |
| if (self.tree.openElements[-1].name == "option" and |
| self.tree.openElements[-2].name == "optgroup"): |
| self.tree.openElements.pop() |
| # It also closes </optgroup> |
| if self.tree.openElements[-1].name == "optgroup": |
| self.tree.openElements.pop() |
| # But nothing else |
| else: |
| self.parser.parseError("unexpected-end-tag-in-select", |
| {"name": "optgroup"}) |
| |
| def endTagSelect(self, token): |
| if self.tree.elementInScope("select", variant="select"): |
| node = self.tree.openElements.pop() |
| while node.name != "select": |
| node = self.tree.openElements.pop() |
| self.parser.resetInsertionMode() |
| else: |
| # innerHTML case |
| assert self.parser.innerHTML |
| self.parser.parseError() |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag-in-select", |
| {"name": token["name"]}) |
| |
| class InSelectInTablePhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), |
| self.startTagTable) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), |
| self.endTagTable) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| self.parser.phases["inSelect"].processEOF() |
| |
| def processCharacters(self, token): |
| return self.parser.phases["inSelect"].processCharacters(token) |
| |
| def startTagTable(self, token): |
| self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) |
| self.endTagOther(impliedTagToken("select")) |
| return token |
| |
| def startTagOther(self, token): |
| return self.parser.phases["inSelect"].processStartTag(token) |
| |
| def endTagTable(self, token): |
| self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) |
| if self.tree.elementInScope(token["name"], variant="table"): |
| self.endTagOther(impliedTagToken("select")) |
| return token |
| |
| def endTagOther(self, token): |
| return self.parser.phases["inSelect"].processEndTag(token) |
| |
| class InForeignContentPhase(Phase): |
| breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", |
| "center", "code", "dd", "div", "dl", "dt", |
| "em", "embed", "h1", "h2", "h3", |
| "h4", "h5", "h6", "head", "hr", "i", "img", |
| "li", "listing", "menu", "meta", "nobr", |
| "ol", "p", "pre", "ruby", "s", "small", |
| "span", "strong", "strike", "sub", "sup", |
| "table", "tt", "u", "ul", "var"]) |
| |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| def adjustSVGTagNames(self, token): |
| replacements = {"altglyph": "altGlyph", |
| "altglyphdef": "altGlyphDef", |
| "altglyphitem": "altGlyphItem", |
| "animatecolor": "animateColor", |
| "animatemotion": "animateMotion", |
| "animatetransform": "animateTransform", |
| "clippath": "clipPath", |
| "feblend": "feBlend", |
| "fecolormatrix": "feColorMatrix", |
| "fecomponenttransfer": "feComponentTransfer", |
| "fecomposite": "feComposite", |
| "feconvolvematrix": "feConvolveMatrix", |
| "fediffuselighting": "feDiffuseLighting", |
| "fedisplacementmap": "feDisplacementMap", |
| "fedistantlight": "feDistantLight", |
| "feflood": "feFlood", |
| "fefunca": "feFuncA", |
| "fefuncb": "feFuncB", |
| "fefuncg": "feFuncG", |
| "fefuncr": "feFuncR", |
| "fegaussianblur": "feGaussianBlur", |
| "feimage": "feImage", |
| "femerge": "feMerge", |
| "femergenode": "feMergeNode", |
| "femorphology": "feMorphology", |
| "feoffset": "feOffset", |
| "fepointlight": "fePointLight", |
| "fespecularlighting": "feSpecularLighting", |
| "fespotlight": "feSpotLight", |
| "fetile": "feTile", |
| "feturbulence": "feTurbulence", |
| "foreignobject": "foreignObject", |
| "glyphref": "glyphRef", |
| "lineargradient": "linearGradient", |
| "radialgradient": "radialGradient", |
| "textpath": "textPath"} |
| |
| if token["name"] in replacements: |
| token["name"] = replacements[token["name"]] |
| |
| def processCharacters(self, token): |
| if token["data"] == "\u0000": |
| token["data"] = "\uFFFD" |
| elif (self.parser.framesetOK and |
| any(char not in spaceCharacters for char in token["data"])): |
| self.parser.framesetOK = False |
| Phase.processCharacters(self, token) |
| |
| def processStartTag(self, token): |
| currentNode = self.tree.openElements[-1] |
| if (token["name"] in self.breakoutElements or |
| (token["name"] == "font" and |
| set(token["data"].keys()) & set(["color", "face", "size"]))): |
| self.parser.parseError("unexpected-html-element-in-foreign-content", |
| {"name": token["name"]}) |
| while (self.tree.openElements[-1].namespace != |
| self.tree.defaultNamespace and |
| not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and |
| not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): |
| self.tree.openElements.pop() |
| return token |
| |
| else: |
| if currentNode.namespace == namespaces["mathml"]: |
| self.parser.adjustMathMLAttributes(token) |
| elif currentNode.namespace == namespaces["svg"]: |
| self.adjustSVGTagNames(token) |
| self.parser.adjustSVGAttributes(token) |
| self.parser.adjustForeignAttributes(token) |
| token["namespace"] = currentNode.namespace |
| self.tree.insertElement(token) |
| if token["selfClosing"]: |
| self.tree.openElements.pop() |
| token["selfClosingAcknowledged"] = True |
| |
| def processEndTag(self, token): |
| nodeIndex = len(self.tree.openElements) - 1 |
| node = self.tree.openElements[-1] |
| if node.name.translate(asciiUpper2Lower) != token["name"]: |
| self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
| while True: |
| if node.name.translate(asciiUpper2Lower) == token["name"]: |
| # XXX this isn't in the spec but it seems necessary |
| if self.parser.phase == self.parser.phases["inTableText"]: |
| self.parser.phase.flushCharacters() |
| self.parser.phase = self.parser.phase.originalPhase |
| while self.tree.openElements.pop() != node: |
| assert self.tree.openElements |
| new_token = None |
| break |
| nodeIndex -= 1 |
| |
| node = self.tree.openElements[nodeIndex] |
| if node.namespace != self.tree.defaultNamespace: |
| continue |
| else: |
| new_token = self.parser.phase.processEndTag(token) |
| break |
| return new_token |
| |
| class AfterBodyPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| # Stop parsing |
| pass |
| |
| def processComment(self, token): |
| # This is needed because data is to be appended to the <html> element |
| # here and not to whatever is currently open. |
| self.tree.insertComment(token, self.tree.openElements[0]) |
| |
| def processCharacters(self, token): |
| self.parser.parseError("unexpected-char-after-body") |
| self.parser.phase = self.parser.phases["inBody"] |
| return token |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("unexpected-start-tag-after-body", |
| {"name": token["name"]}) |
| self.parser.phase = self.parser.phases["inBody"] |
| return token |
| |
| def endTagHtml(self, name): |
| if self.parser.innerHTML: |
| self.parser.parseError("unexpected-end-tag-after-body-innerhtml") |
| else: |
| self.parser.phase = self.parser.phases["afterAfterBody"] |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag-after-body", |
| {"name": token["name"]}) |
| self.parser.phase = self.parser.phases["inBody"] |
| return token |
| |
| class InFramesetPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("frameset", self.startTagFrameset), |
| ("frame", self.startTagFrame), |
| ("noframes", self.startTagNoframes) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("frameset", self.endTagFrameset) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| if self.tree.openElements[-1].name != "html": |
| self.parser.parseError("eof-in-frameset") |
| else: |
| assert self.parser.innerHTML |
| |
| def processCharacters(self, token): |
| self.parser.parseError("unexpected-char-in-frameset") |
| |
| def startTagFrameset(self, token): |
| self.tree.insertElement(token) |
| |
| def startTagFrame(self, token): |
| self.tree.insertElement(token) |
| self.tree.openElements.pop() |
| |
| def startTagNoframes(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("unexpected-start-tag-in-frameset", |
| {"name": token["name"]}) |
| |
| def endTagFrameset(self, token): |
| if self.tree.openElements[-1].name == "html": |
| # innerHTML case |
| self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") |
| else: |
| self.tree.openElements.pop() |
| if (not self.parser.innerHTML and |
| self.tree.openElements[-1].name != "frameset"): |
| # If we're not in innerHTML mode and the current node is not a |
| # "frameset" element (anymore) then switch. |
| self.parser.phase = self.parser.phases["afterFrameset"] |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag-in-frameset", |
| {"name": token["name"]}) |
| |
| class AfterFramesetPhase(Phase): |
| # http://www.whatwg.org/specs/web-apps/current-work/#after3 |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("noframes", self.startTagNoframes) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| self.endTagHandler = _utils.MethodDispatcher([ |
| ("html", self.endTagHtml) |
| ]) |
| self.endTagHandler.default = self.endTagOther |
| |
| def processEOF(self): |
| # Stop parsing |
| pass |
| |
| def processCharacters(self, token): |
| self.parser.parseError("unexpected-char-after-frameset") |
| |
| def startTagNoframes(self, token): |
| return self.parser.phases["inHead"].processStartTag(token) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("unexpected-start-tag-after-frameset", |
| {"name": token["name"]}) |
| |
| def endTagHtml(self, token): |
| self.parser.phase = self.parser.phases["afterAfterFrameset"] |
| |
| def endTagOther(self, token): |
| self.parser.parseError("unexpected-end-tag-after-frameset", |
| {"name": token["name"]}) |
| |
| class AfterAfterBodyPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| def processEOF(self): |
| pass |
| |
| def processComment(self, token): |
| self.tree.insertComment(token, self.tree.document) |
| |
| def processSpaceCharacters(self, token): |
| return self.parser.phases["inBody"].processSpaceCharacters(token) |
| |
| def processCharacters(self, token): |
| self.parser.parseError("expected-eof-but-got-char") |
| self.parser.phase = self.parser.phases["inBody"] |
| return token |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("expected-eof-but-got-start-tag", |
| {"name": token["name"]}) |
| self.parser.phase = self.parser.phases["inBody"] |
| return token |
| |
| def processEndTag(self, token): |
| self.parser.parseError("expected-eof-but-got-end-tag", |
| {"name": token["name"]}) |
| self.parser.phase = self.parser.phases["inBody"] |
| return token |
| |
| class AfterAfterFramesetPhase(Phase): |
| def __init__(self, parser, tree): |
| Phase.__init__(self, parser, tree) |
| |
| self.startTagHandler = _utils.MethodDispatcher([ |
| ("html", self.startTagHtml), |
| ("noframes", self.startTagNoFrames) |
| ]) |
| self.startTagHandler.default = self.startTagOther |
| |
| def processEOF(self): |
| pass |
| |
| def processComment(self, token): |
| self.tree.insertComment(token, self.tree.document) |
| |
| def processSpaceCharacters(self, token): |
| return self.parser.phases["inBody"].processSpaceCharacters(token) |
| |
| def processCharacters(self, token): |
| self.parser.parseError("expected-eof-but-got-char") |
| |
| def startTagHtml(self, token): |
| return self.parser.phases["inBody"].processStartTag(token) |
| |
| def startTagNoFrames(self, token): |
| return self.parser.phases["inHead"].processStartTag(token) |
| |
| def startTagOther(self, token): |
| self.parser.parseError("expected-eof-but-got-start-tag", |
| {"name": token["name"]}) |
| |
| def processEndTag(self, token): |
| self.parser.parseError("expected-eof-but-got-end-tag", |
| {"name": token["name"]}) |
| # pylint:enable=unused-argument |
| |
| return { |
| "initial": InitialPhase, |
| "beforeHtml": BeforeHtmlPhase, |
| "beforeHead": BeforeHeadPhase, |
| "inHead": InHeadPhase, |
| "inHeadNoscript": InHeadNoscriptPhase, |
| "afterHead": AfterHeadPhase, |
| "inBody": InBodyPhase, |
| "text": TextPhase, |
| "inTable": InTablePhase, |
| "inTableText": InTableTextPhase, |
| "inCaption": InCaptionPhase, |
| "inColumnGroup": InColumnGroupPhase, |
| "inTableBody": InTableBodyPhase, |
| "inRow": InRowPhase, |
| "inCell": InCellPhase, |
| "inSelect": InSelectPhase, |
| "inSelectInTable": InSelectInTablePhase, |
| "inForeignContent": InForeignContentPhase, |
| "afterBody": AfterBodyPhase, |
| "inFrameset": InFramesetPhase, |
| "afterFrameset": AfterFramesetPhase, |
| "afterAfterBody": AfterAfterBodyPhase, |
| "afterAfterFrameset": AfterAfterFramesetPhase, |
| # XXX after after frameset |
| } |
| |
| |
| def adjust_attributes(token, replacements): |
| needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) |
| if needs_adjustment: |
| token['data'] = OrderedDict((replacements.get(k, k), v) |
| for k, v in token['data'].items()) |
| |
| |
| def impliedTagToken(name, type="EndTag", attributes=None, |
| selfClosing=False): |
| if attributes is None: |
| attributes = {} |
| return {"type": tokenTypes[type], "name": name, "data": attributes, |
| "selfClosing": selfClosing} |
| |
| |
| class ParseError(Exception): |
| """Error in parsed document""" |
| pass |