| From 46046c0f7125911ff8205f09a7574573bb953105 Mon Sep 17 00:00:00 2001 |
| From: Geoffrey Sneddon <geoffers@gmail.com> |
| Date: Mon, 23 Nov 2015 15:17:07 +0000 |
| Subject: [PATCH 1/3] Make lxml tree-builder coerce comments to work with lxml |
| 3.5. |
| |
| --- |
| html5lib/ihatexml.py | 2 ++ |
| html5lib/treebuilders/etree_lxml.py | 2 +- |
| 2 files changed, 3 insertions(+), 1 deletion(-) |
| |
| diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py |
| index 0fc7930..b5b2e98 100644 |
| --- a/html5lib/ihatexml.py |
| +++ b/html5lib/ihatexml.py |
| @@ -225,6 +225,8 @@ def coerceComment(self, data): |
| while "--" in data: |
| warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) |
| data = data.replace("--", "- -") |
| + if data.endswith("-"): |
| + data += " " |
| return data |
| |
| def coerceCharacters(self, data): |
| diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py |
| index 35d08ef..17007e3 100644 |
| --- a/html5lib/treebuilders/etree_lxml.py |
| +++ b/html5lib/treebuilders/etree_lxml.py |
| @@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder): |
| |
| def __init__(self, namespaceHTMLElements, fullTree=False): |
| builder = etree_builders.getETreeModule(etree, fullTree=fullTree) |
| - infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() |
| + infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) |
| self.namespaceHTMLElements = namespaceHTMLElements |
| |
| class Attributes(dict): |
| |
| From 1c22e1ce93dd4acc81a66cfa03cf9720fbd741c7 Mon Sep 17 00:00:00 2001 |
| From: Geoffrey Sneddon <geoffers@gmail.com> |
| Date: Mon, 23 Nov 2015 15:35:21 +0000 |
| Subject: [PATCH 2/3] fixup! Make lxml tree-builder coerce comments to work |
| with lxml 3.5. |
| |
| --- |
| html5lib/ihatexml.py | 1 + |
| html5lib/treebuilders/etree_lxml.py | 7 ++++--- |
| 2 files changed, 5 insertions(+), 3 deletions(-) |
| |
| diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py |
| index b5b2e98..5a81a12 100644 |
| --- a/html5lib/ihatexml.py |
| +++ b/html5lib/ihatexml.py |
| @@ -226,6 +226,7 @@ def coerceComment(self, data): |
| warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) |
| data = data.replace("--", "- -") |
| if data.endswith("-"): |
| + warnings.warn("Comments cannot contain end in a dash", DataLossWarning) |
| data += " " |
| return data |
| |
| diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py |
| index 17007e3..c6c981f 100644 |
| --- a/html5lib/treebuilders/etree_lxml.py |
| +++ b/html5lib/treebuilders/etree_lxml.py |
| @@ -54,7 +54,7 @@ def _getChildNodes(self): |
| def testSerializer(element): |
| rv = [] |
| finalText = None |
| - infosetFilter = ihatexml.InfosetFilter() |
| + infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True) |
| |
| def serializeElement(element, indent=0): |
| if not hasattr(element, "tag"): |
| @@ -257,7 +257,7 @@ def _getData(self): |
| data = property(_getData, _setData) |
| |
| self.elementClass = Element |
| - self.commentClass = builder.Comment |
| + self.commentClass = Comment |
| # self.fragmentClass = builder.DocumentFragment |
| _base.TreeBuilder.__init__(self, namespaceHTMLElements) |
| |
| @@ -344,7 +344,8 @@ def insertRoot(self, token): |
| |
| # Append the initial comments: |
| for comment_token in self.initial_comments: |
| - root.addprevious(etree.Comment(comment_token["data"])) |
| + comment = self.commentClass(comment_token["data"]) |
| + root.addprevious(comment._element) |
| |
| # Create the root document and add the ElementTree to it |
| self.document = self.documentClass() |
| |
| From 235a6d7ac7e0a3e2b431766e051094c2d3110ba3 Mon Sep 17 00:00:00 2001 |
| From: Geoffrey Sneddon <geoffers@gmail.com> |
| Date: Mon, 23 Nov 2015 15:42:12 +0000 |
| Subject: [PATCH 3/3] fixup! Make lxml tree-builder coerce comments to work |
| with lxml 3.5. |
| |
| --- |
| html5lib/ihatexml.py | 2 +- |
| 1 file changed, 1 insertion(+), 1 deletion(-) |
| |
| diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py |
| index 5a81a12..5da5d93 100644 |
| --- a/html5lib/ihatexml.py |
| +++ b/html5lib/ihatexml.py |
| @@ -226,7 +226,7 @@ def coerceComment(self, data): |
| warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) |
| data = data.replace("--", "- -") |
| if data.endswith("-"): |
| - warnings.warn("Comments cannot contain end in a dash", DataLossWarning) |
| + warnings.warn("Comments cannot end in a dash", DataLossWarning) |
| data += " " |
| return data |
| |