blob: fecfab9a4fb4fe11e306afe45e111a92d45bc7e3 [file] [log] [blame]
From 46046c0f7125911ff8205f09a7574573bb953105 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:17:07 +0000
Subject: [PATCH 1/3] Make lxml tree-builder coerce comments to work with lxml
3.5.
---
html5lib/ihatexml.py | 2 ++
html5lib/treebuilders/etree_lxml.py | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index 0fc7930..b5b2e98 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -225,6 +225,8 @@ def coerceComment(self, data):
while "--" in data:
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
data = data.replace("--", "- -")
+ if data.endswith("-"):
+ data += " "
return data
def coerceCharacters(self, data):
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 35d08ef..17007e3 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
def __init__(self, namespaceHTMLElements, fullTree=False):
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
- infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
+ infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
self.namespaceHTMLElements = namespaceHTMLElements
class Attributes(dict):
From 1c22e1ce93dd4acc81a66cfa03cf9720fbd741c7 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:35:21 +0000
Subject: [PATCH 2/3] fixup! Make lxml tree-builder coerce comments to work
with lxml 3.5.
---
html5lib/ihatexml.py | 1 +
html5lib/treebuilders/etree_lxml.py | 7 ++++---
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index b5b2e98..5a81a12 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -226,6 +226,7 @@ def coerceComment(self, data):
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
data = data.replace("--", "- -")
if data.endswith("-"):
+ warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
data += " "
return data
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 17007e3..c6c981f 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -54,7 +54,7 @@ def _getChildNodes(self):
def testSerializer(element):
rv = []
finalText = None
- infosetFilter = ihatexml.InfosetFilter()
+ infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
def serializeElement(element, indent=0):
if not hasattr(element, "tag"):
@@ -257,7 +257,7 @@ def _getData(self):
data = property(_getData, _setData)
self.elementClass = Element
- self.commentClass = builder.Comment
+ self.commentClass = Comment
# self.fragmentClass = builder.DocumentFragment
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
@@ -344,7 +344,8 @@ def insertRoot(self, token):
# Append the initial comments:
for comment_token in self.initial_comments:
- root.addprevious(etree.Comment(comment_token["data"]))
+ comment = self.commentClass(comment_token["data"])
+ root.addprevious(comment._element)
# Create the root document and add the ElementTree to it
self.document = self.documentClass()
From 235a6d7ac7e0a3e2b431766e051094c2d3110ba3 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:42:12 +0000
Subject: [PATCH 3/3] fixup! Make lxml tree-builder coerce comments to work
with lxml 3.5.
---
html5lib/ihatexml.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index 5a81a12..5da5d93 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -226,7 +226,7 @@ def coerceComment(self, data):
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
data = data.replace("--", "- -")
if data.endswith("-"):
- warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
+ warnings.warn("Comments cannot end in a dash", DataLossWarning)
data += " "
return data