blob: 285559a872a65fbcac886f4aa0f1c3b4bf871ee2 [file] [log] [blame]
Eli Bendersky865756a2012-03-09 13:38:15 +02001# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00007
Serhiy Storchakad7a44152015-11-12 11:23:04 +02008import copy
Serhiy Storchaka762ec972017-03-30 18:12:06 +03009import functools
Georg Brandl1f7fffb2010-10-15 15:57:45 +000010import html
Eli Benderskyf996e772012-03-16 05:53:30 +020011import io
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -040012import itertools
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +020013import locale
Eli Bendersky698bdb22013-01-10 06:01:06 -080014import operator
Stefan Behnele1d5dd62019-05-01 22:34:13 +020015import os
Eli Bendersky7ec45f72012-12-30 06:17:49 -080016import pickle
Eli Bendersky0192ba32012-03-30 16:38:33 +030017import sys
Stefan Behneldde3eeb2019-05-01 21:49:58 +020018import textwrap
Eli Benderskye26fa1b2013-05-19 17:49:54 -070019import types
Victor Stinner6c6f8512010-08-07 10:09:35 +000020import unittest
Serhiy Storchaka05744ac2015-06-29 22:35:58 +030021import warnings
Eli Benderskya5e82202012-03-31 13:55:38 +030022import weakref
Armin Rigo9ed73062005-12-14 18:10:45 +000023
Stefan Behnele1d5dd62019-05-01 22:34:13 +020024from functools import partial
Stefan Behneldde3eeb2019-05-01 21:49:58 +020025from itertools import product, islice
Benjamin Petersonee8712c2008-05-20 21:35:26 +000026from test import support
Hai Shideb01622020-07-06 20:29:49 +080027from test.support import os_helper
28from test.support import warnings_helper
Serhiy Storchaka78738842021-09-30 19:56:41 +030029from test.support import findfile, gc_collect, swap_attr, swap_item
Hai Shideb01622020-07-06 20:29:49 +080030from test.support.import_helper import import_fresh_module
31from test.support.os_helper import TESTFN
32
Armin Rigo9ed73062005-12-14 18:10:45 +000033
Eli Bendersky698bdb22013-01-10 06:01:06 -080034# pyET is the pure-Python implementation.
Eli Bendersky458c0d5a2013-01-10 06:07:00 -080035#
Eli Bendersky698bdb22013-01-10 06:01:06 -080036# ET is pyET in test_xml_etree and is the C accelerated version in
37# test_xml_etree_c.
Eli Bendersky64d11e62012-06-15 07:42:50 +030038pyET = None
39ET = None
Florent Xiclunaf15351d2010-03-13 23:24:31 +000040
41SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
Victor Stinner6c6f8512010-08-07 10:09:35 +000042try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +000043 SIMPLE_XMLFILE.encode("utf-8")
Victor Stinner6c6f8512010-08-07 10:09:35 +000044except UnicodeEncodeError:
45 raise unittest.SkipTest("filename is not encodable to utf8")
Florent Xiclunaf15351d2010-03-13 23:24:31 +000046SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
Victor Stinnere6d9fcb2017-09-25 01:27:34 -070047UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
Florent Xiclunaf15351d2010-03-13 23:24:31 +000048
49SAMPLE_XML = """\
Armin Rigo9ed73062005-12-14 18:10:45 +000050<body>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000051 <tag class='a'>text</tag>
52 <tag class='b' />
Armin Rigo9ed73062005-12-14 18:10:45 +000053 <section>
Florent Xiclunaf15351d2010-03-13 23:24:31 +000054 <tag class='b' id='inner'>subtext</tag>
Armin Rigo9ed73062005-12-14 18:10:45 +000055 </section>
56</body>
57"""
58
Florent Xiclunaf15351d2010-03-13 23:24:31 +000059SAMPLE_SECTION = """\
60<section>
61 <tag class='b' id='inner'>subtext</tag>
62 <nexttag />
63 <nextsection>
64 <tag />
65 </nextsection>
66</section>
67"""
68
Armin Rigo9ed73062005-12-14 18:10:45 +000069SAMPLE_XML_NS = """
70<body xmlns="http://effbot.org/ns">
71 <tag>text</tag>
72 <tag />
73 <section>
74 <tag>subtext</tag>
75 </section>
76</body>
77"""
78
Eli Bendersky737b1732012-05-29 06:02:56 +030079SAMPLE_XML_NS_ELEMS = """
80<root>
81<h:table xmlns:h="hello">
82 <h:tr>
83 <h:td>Apples</h:td>
84 <h:td>Bananas</h:td>
85 </h:tr>
86</h:table>
87
88<f:table xmlns:f="foo">
89 <f:name>African Coffee Table</f:name>
90 <f:width>80</f:width>
91 <f:length>120</f:length>
92</f:table>
93</root>
94"""
Florent Xiclunaf15351d2010-03-13 23:24:31 +000095
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +020096ENTITY_XML = """\
97<!DOCTYPE points [
98<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
99%user-entities;
100]>
101<document>&entity;</document>
102"""
Armin Rigo9ed73062005-12-14 18:10:45 +0000103
Christian Heimes17b1d5d2018-09-23 09:50:25 +0200104EXTERNAL_ENTITY_XML = """\
105<!DOCTYPE points [
106<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
107]>
108<document>&entity;</document>
109"""
Armin Rigo9ed73062005-12-14 18:10:45 +0000110
Felix C. Stegerman1f433402021-02-24 03:25:31 +0100111ATTLIST_XML = """\
112<?xml version="1.0" encoding="UTF-8"?>
113<!DOCTYPE Foo [
114<!ELEMENT foo (bar*)>
115<!ELEMENT bar (#PCDATA)*>
116<!ATTLIST bar xml:lang CDATA "eng">
117<!ENTITY qux "quux">
118]>
119<foo>
120<bar>&qux;</bar>
121</foo>
122"""
123
Serhiy Storchaka762ec972017-03-30 18:12:06 +0300124def checkwarnings(*filters, quiet=False):
125 def decorator(test):
126 def newtest(*args, **kwargs):
Hai Shifcce8c62020-08-08 05:55:35 +0800127 with warnings_helper.check_warnings(*filters, quiet=quiet):
Serhiy Storchaka762ec972017-03-30 18:12:06 +0300128 test(*args, **kwargs)
129 functools.update_wrapper(newtest, test)
130 return newtest
131 return decorator
132
133
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200134class ModuleTest(unittest.TestCase):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200135 def test_sanity(self):
136 # Import sanity.
137
138 from xml.etree import ElementTree
139 from xml.etree import ElementInclude
140 from xml.etree import ElementPath
141
Martin Panterdcfebb32016-04-01 06:55:55 +0000142 def test_all(self):
143 names = ("xml.etree.ElementTree", "_elementtree")
Victor Stinnerfbf43f02020-08-17 07:20:40 +0200144 support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",))
Martin Panterdcfebb32016-04-01 06:55:55 +0000145
Armin Rigo9ed73062005-12-14 18:10:45 +0000146
Florent Xiclunac17f1722010-08-08 19:48:29 +0000147def serialize(elem, to_string=True, encoding='unicode', **options):
Florent Xiclunac17f1722010-08-08 19:48:29 +0000148 if encoding != 'unicode':
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000149 file = io.BytesIO()
150 else:
151 file = io.StringIO()
Armin Rigo9ed73062005-12-14 18:10:45 +0000152 tree = ET.ElementTree(elem)
Florent Xiclunac17f1722010-08-08 19:48:29 +0000153 tree.write(file, encoding=encoding, **options)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000154 if to_string:
155 return file.getvalue()
156 else:
157 file.seek(0)
158 return file
Armin Rigo9ed73062005-12-14 18:10:45 +0000159
Armin Rigo9ed73062005-12-14 18:10:45 +0000160def summarize_list(seq):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200161 return [elem.tag for elem in seq]
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000162
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000163
Eli Bendersky698bdb22013-01-10 06:01:06 -0800164class ElementTestCase:
165 @classmethod
166 def setUpClass(cls):
167 cls.modules = {pyET, ET}
168
Serhiy Storchakabad12572014-12-15 14:03:42 +0200169 def pickleRoundTrip(self, obj, name, dumper, loader, proto):
Eli Bendersky698bdb22013-01-10 06:01:06 -0800170 try:
Serhiy Storchaka78738842021-09-30 19:56:41 +0300171 with swap_item(sys.modules, name, dumper):
172 temp = pickle.dumps(obj, proto)
173 with swap_item(sys.modules, name, loader):
174 result = pickle.loads(temp)
Eli Bendersky698bdb22013-01-10 06:01:06 -0800175 except pickle.PicklingError as pe:
176 # pyET must be second, because pyET may be (equal to) ET.
177 human = dict([(ET, "cET"), (pyET, "pyET")])
178 raise support.TestFailed("Failed to round-trip %r from %r to %r"
179 % (obj,
180 human.get(dumper, dumper),
181 human.get(loader, loader))) from pe
Eli Bendersky698bdb22013-01-10 06:01:06 -0800182 return result
183
184 def assertEqualElements(self, alice, bob):
185 self.assertIsInstance(alice, (ET.Element, pyET.Element))
186 self.assertIsInstance(bob, (ET.Element, pyET.Element))
187 self.assertEqual(len(list(alice)), len(list(bob)))
188 for x, y in zip(alice, bob):
189 self.assertEqualElements(x, y)
190 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
191 self.assertEqual(properties(alice), properties(bob))
192
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000193# --------------------------------------------------------------------
194# element tree tests
Armin Rigo9ed73062005-12-14 18:10:45 +0000195
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200196class ElementTreeTest(unittest.TestCase):
Armin Rigo9ed73062005-12-14 18:10:45 +0000197
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200198 def serialize_check(self, elem, expected):
199 self.assertEqual(serialize(elem), expected)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000200
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200201 def test_interface(self):
202 # Test element tree interface.
Armin Rigo9ed73062005-12-14 18:10:45 +0000203
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200204 def check_string(string):
205 len(string)
206 for char in string:
207 self.assertEqual(len(char), 1,
208 msg="expected one-character string, got %r" % char)
209 new_string = string + ""
210 new_string = string + " "
211 string[:0]
Armin Rigo9ed73062005-12-14 18:10:45 +0000212
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200213 def check_mapping(mapping):
214 len(mapping)
215 keys = mapping.keys()
216 items = mapping.items()
217 for key in keys:
218 item = mapping[key]
219 mapping["key"] = "value"
220 self.assertEqual(mapping["key"], "value",
221 msg="expected value string, got %r" % mapping["key"])
Armin Rigo9ed73062005-12-14 18:10:45 +0000222
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200223 def check_element(element):
224 self.assertTrue(ET.iselement(element), msg="not an element")
Serhiy Storchakadde08152015-11-25 15:28:13 +0200225 direlem = dir(element)
226 for attr in 'tag', 'attrib', 'text', 'tail':
227 self.assertTrue(hasattr(element, attr),
228 msg='no %s member' % attr)
229 self.assertIn(attr, direlem,
230 msg='no %s visible by dir' % attr)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000231
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200232 check_string(element.tag)
233 check_mapping(element.attrib)
234 if element.text is not None:
235 check_string(element.text)
236 if element.tail is not None:
237 check_string(element.tail)
238 for elem in element:
239 check_element(elem)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000240
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200241 element = ET.Element("tag")
242 check_element(element)
243 tree = ET.ElementTree(element)
244 check_element(tree.getroot())
245 element = ET.Element("t\xe4g", key="value")
246 tree = ET.ElementTree(element)
247 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
248 element = ET.Element("tag", key="value")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000249
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200250 # Make sure all standard element methods exist.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000251
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200252 def check_method(method):
253 self.assertTrue(hasattr(method, '__call__'),
254 msg="%s not callable" % method)
Armin Rigo9ed73062005-12-14 18:10:45 +0000255
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200256 check_method(element.append)
257 check_method(element.extend)
258 check_method(element.insert)
259 check_method(element.remove)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200260 check_method(element.find)
261 check_method(element.iterfind)
262 check_method(element.findall)
263 check_method(element.findtext)
264 check_method(element.clear)
265 check_method(element.get)
266 check_method(element.set)
267 check_method(element.keys)
268 check_method(element.items)
269 check_method(element.iter)
270 check_method(element.itertext)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000271
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200272 # These methods return an iterable. See bug 6472.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000273
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200274 def check_iter(it):
275 check_method(it.__next__)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000276
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200277 check_iter(element.iterfind("tag"))
278 check_iter(element.iterfind("*"))
279 check_iter(tree.iterfind("tag"))
280 check_iter(tree.iterfind("*"))
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000281
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200282 # These aliases are provided:
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000283
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200284 self.assertEqual(ET.XML, ET.fromstring)
285 self.assertEqual(ET.PI, ET.ProcessingInstruction)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000286
Serhiy Storchakab6aa5372015-11-23 08:42:25 +0200287 def test_set_attribute(self):
288 element = ET.Element('tag')
289
290 self.assertEqual(element.tag, 'tag')
291 element.tag = 'Tag'
292 self.assertEqual(element.tag, 'Tag')
293 element.tag = 'TAG'
294 self.assertEqual(element.tag, 'TAG')
295
296 self.assertIsNone(element.text)
297 element.text = 'Text'
298 self.assertEqual(element.text, 'Text')
299 element.text = 'TEXT'
300 self.assertEqual(element.text, 'TEXT')
301
302 self.assertIsNone(element.tail)
303 element.tail = 'Tail'
304 self.assertEqual(element.tail, 'Tail')
305 element.tail = 'TAIL'
306 self.assertEqual(element.tail, 'TAIL')
307
308 self.assertEqual(element.attrib, {})
309 element.attrib = {'a': 'b', 'c': 'd'}
310 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
311 element.attrib = {'A': 'B', 'C': 'D'}
312 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
313
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200314 def test_simpleops(self):
315 # Basic method sanity checks.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000316
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200317 elem = ET.XML("<body><tag/></body>")
318 self.serialize_check(elem, '<body><tag /></body>')
319 e = ET.Element("tag2")
320 elem.append(e)
321 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
322 elem.remove(e)
323 self.serialize_check(elem, '<body><tag /></body>')
324 elem.insert(0, e)
325 self.serialize_check(elem, '<body><tag2 /><tag /></body>')
326 elem.remove(e)
327 elem.extend([e])
328 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
329 elem.remove(e)
Alex Prengère51a85dd2021-03-30 23:11:29 +0200330 elem.extend(iter([e]))
331 self.serialize_check(elem, '<body><tag /><tag2 /></body>')
332 elem.remove(e)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000333
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200334 element = ET.Element("tag", key="value")
335 self.serialize_check(element, '<tag key="value" />') # 1
336 subelement = ET.Element("subtag")
337 element.append(subelement)
338 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
339 element.insert(0, subelement)
340 self.serialize_check(element,
341 '<tag key="value"><subtag /><subtag /></tag>') # 3
342 element.remove(subelement)
343 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
344 element.remove(subelement)
345 self.serialize_check(element, '<tag key="value" />') # 5
346 with self.assertRaises(ValueError) as cm:
347 element.remove(subelement)
348 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
349 self.serialize_check(element, '<tag key="value" />') # 6
350 element[0:0] = [subelement, subelement, subelement]
351 self.serialize_check(element[1], '<subtag />')
352 self.assertEqual(element[1:9], [element[1], element[2]])
353 self.assertEqual(element[:9:2], [element[0], element[2]])
354 del element[1:2]
355 self.serialize_check(element,
356 '<tag key="value"><subtag /><subtag /></tag>')
Florent Xiclunaa72a98f2012-02-13 11:03:30 +0100357
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200358 def test_cdata(self):
359 # Test CDATA handling (etc).
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000360
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200361 self.serialize_check(ET.XML("<tag>hello</tag>"),
362 '<tag>hello</tag>')
363 self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
364 '<tag>hello</tag>')
365 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
366 '<tag>hello</tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000367
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200368 def test_file_init(self):
369 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
370 tree = ET.ElementTree(file=stringfile)
371 self.assertEqual(tree.find("tag").tag, 'tag')
372 self.assertEqual(tree.find("section/tag").tag, 'tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000373
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200374 tree = ET.ElementTree(file=SIMPLE_XMLFILE)
375 self.assertEqual(tree.find("element").tag, 'element')
376 self.assertEqual(tree.find("element/../empty-element").tag,
377 'empty-element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000378
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200379 def test_path_cache(self):
380 # Check that the path cache behaves sanely.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000381
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200382 from xml.etree import ElementPath
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000383
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200384 elem = ET.XML(SAMPLE_XML)
385 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
386 cache_len_10 = len(ElementPath._cache)
387 for i in range(10): ET.ElementTree(elem).find('./'+str(i))
388 self.assertEqual(len(ElementPath._cache), cache_len_10)
389 for i in range(20): ET.ElementTree(elem).find('./'+str(i))
390 self.assertGreater(len(ElementPath._cache), cache_len_10)
391 for i in range(600): ET.ElementTree(elem).find('./'+str(i))
392 self.assertLess(len(ElementPath._cache), 500)
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000393
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200394 def test_copy(self):
395 # Test copy handling (etc).
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000396
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200397 import copy
398 e1 = ET.XML("<tag>hello<foo/></tag>")
399 e2 = copy.copy(e1)
400 e3 = copy.deepcopy(e1)
401 e1.find("foo").tag = "bar"
402 self.serialize_check(e1, '<tag>hello<bar /></tag>')
403 self.serialize_check(e2, '<tag>hello<bar /></tag>')
404 self.serialize_check(e3, '<tag>hello<foo /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000405
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200406 def test_attrib(self):
407 # Test attribute handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000408
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200409 elem = ET.Element("tag")
410 elem.get("key") # 1.1
411 self.assertEqual(elem.get("key", "default"), 'default') # 1.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000412
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200413 elem.set("key", "value")
414 self.assertEqual(elem.get("key"), 'value') # 1.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000415
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200416 elem = ET.Element("tag", key="value")
417 self.assertEqual(elem.get("key"), 'value') # 2.1
418 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000419
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200420 attrib = {"key": "value"}
421 elem = ET.Element("tag", attrib)
422 attrib.clear() # check for aliasing issues
423 self.assertEqual(elem.get("key"), 'value') # 3.1
424 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000425
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200426 attrib = {"key": "value"}
427 elem = ET.Element("tag", **attrib)
428 attrib.clear() # check for aliasing issues
429 self.assertEqual(elem.get("key"), 'value') # 4.1
430 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000431
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200432 elem = ET.Element("tag", {"key": "other"}, key="value")
433 self.assertEqual(elem.get("key"), 'value') # 5.1
434 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000435
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200436 elem = ET.Element('test')
437 elem.text = "aa"
438 elem.set('testa', 'testval')
439 elem.set('testb', 'test2')
440 self.assertEqual(ET.tostring(elem),
441 b'<test testa="testval" testb="test2">aa</test>')
442 self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
443 self.assertEqual(sorted(elem.items()),
444 [('testa', 'testval'), ('testb', 'test2')])
445 self.assertEqual(elem.attrib['testb'], 'test2')
446 elem.attrib['testb'] = 'test1'
447 elem.attrib['testc'] = 'test2'
448 self.assertEqual(ET.tostring(elem),
449 b'<test testa="testval" testb="test1" testc="test2">aa</test>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000450
mefistotelis5fd81232020-04-12 14:51:58 +0200451 # Test preserving white space chars in attributes
Raymond Hettinger076366c2016-09-11 23:18:03 -0700452 elem = ET.Element('test')
453 elem.set('a', '\r')
454 elem.set('b', '\r\n')
455 elem.set('c', '\t\n\r ')
mefistotelis5fd81232020-04-12 14:51:58 +0200456 elem.set('d', '\n\n\r\r\t\t ')
Raymond Hettinger076366c2016-09-11 23:18:03 -0700457 self.assertEqual(ET.tostring(elem),
mefistotelis5fd81232020-04-12 14:51:58 +0200458 b'<test a="&#13;" b="&#13;&#10;" c="&#09;&#10;&#13; " d="&#10;&#10;&#13;&#13;&#09;&#09; " />')
Raymond Hettinger076366c2016-09-11 23:18:03 -0700459
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200460 def test_makeelement(self):
461 # Test makeelement handling.
Antoine Pitroub86680e2010-10-14 21:15:17 +0000462
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200463 elem = ET.Element("tag")
464 attrib = {"key": "value"}
465 subelem = elem.makeelement("subtag", attrib)
466 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
467 elem.append(subelem)
468 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000469
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200470 elem.clear()
471 self.serialize_check(elem, '<tag />')
472 elem.append(subelem)
473 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
474 elem.extend([subelem, subelem])
475 self.serialize_check(elem,
476 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
477 elem[:] = [subelem]
478 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
479 elem[:] = tuple([subelem])
480 self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000481
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200482 def test_parsefile(self):
483 # Test parsing from file.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000484
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200485 tree = ET.parse(SIMPLE_XMLFILE)
486 stream = io.StringIO()
487 tree.write(stream, encoding='unicode')
488 self.assertEqual(stream.getvalue(),
489 '<root>\n'
490 ' <element key="value">text</element>\n'
491 ' <element>text</element>tail\n'
492 ' <empty-element />\n'
493 '</root>')
494 tree = ET.parse(SIMPLE_NS_XMLFILE)
495 stream = io.StringIO()
496 tree.write(stream, encoding='unicode')
497 self.assertEqual(stream.getvalue(),
498 '<ns0:root xmlns:ns0="namespace">\n'
499 ' <ns0:element key="value">text</ns0:element>\n'
500 ' <ns0:element>text</ns0:element>tail\n'
501 ' <ns0:empty-element />\n'
502 '</ns0:root>')
Armin Rigo9ed73062005-12-14 18:10:45 +0000503
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200504 with open(SIMPLE_XMLFILE) as f:
505 data = f.read()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000506
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200507 parser = ET.XMLParser()
508 self.assertRegex(parser.version, r'^Expat ')
509 parser.feed(data)
510 self.serialize_check(parser.close(),
511 '<root>\n'
512 ' <element key="value">text</element>\n'
513 ' <element>text</element>tail\n'
514 ' <empty-element />\n'
515 '</root>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000516
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200517 target = ET.TreeBuilder()
518 parser = ET.XMLParser(target=target)
519 parser.feed(data)
520 self.serialize_check(parser.close(),
521 '<root>\n'
522 ' <element key="value">text</element>\n'
523 ' <element>text</element>tail\n'
524 ' <empty-element />\n'
525 '</root>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000526
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200527 def test_parseliteral(self):
528 element = ET.XML("<html><body>text</body></html>")
529 self.assertEqual(ET.tostring(element, encoding='unicode'),
530 '<html><body>text</body></html>')
531 element = ET.fromstring("<html><body>text</body></html>")
532 self.assertEqual(ET.tostring(element, encoding='unicode'),
533 '<html><body>text</body></html>')
534 sequence = ["<html><body>", "text</bo", "dy></html>"]
535 element = ET.fromstringlist(sequence)
536 self.assertEqual(ET.tostring(element),
537 b'<html><body>text</body></html>')
538 self.assertEqual(b"".join(ET.tostringlist(element)),
539 b'<html><body>text</body></html>')
540 self.assertEqual(ET.tostring(element, "ascii"),
541 b"<?xml version='1.0' encoding='ascii'?>\n"
542 b"<html><body>text</body></html>")
543 _, ids = ET.XMLID("<html><body>text</body></html>")
544 self.assertEqual(len(ids), 0)
545 _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
546 self.assertEqual(len(ids), 1)
547 self.assertEqual(ids["body"].tag, 'body')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000548
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200549 def test_iterparse(self):
550 # Test iterparse interface.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000551
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200552 iterparse = ET.iterparse
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000553
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200554 context = iterparse(SIMPLE_XMLFILE)
555 action, elem = next(context)
556 self.assertEqual((action, elem.tag), ('end', 'element'))
557 self.assertEqual([(action, elem.tag) for action, elem in context], [
558 ('end', 'element'),
559 ('end', 'empty-element'),
560 ('end', 'root'),
561 ])
562 self.assertEqual(context.root.tag, 'root')
563
564 context = iterparse(SIMPLE_NS_XMLFILE)
565 self.assertEqual([(action, elem.tag) for action, elem in context], [
566 ('end', '{namespace}element'),
567 ('end', '{namespace}element'),
568 ('end', '{namespace}empty-element'),
569 ('end', '{namespace}root'),
570 ])
571
572 events = ()
573 context = iterparse(SIMPLE_XMLFILE, events)
574 self.assertEqual([(action, elem.tag) for action, elem in context], [])
575
576 events = ()
577 context = iterparse(SIMPLE_XMLFILE, events=events)
578 self.assertEqual([(action, elem.tag) for action, elem in context], [])
579
580 events = ("start", "end")
581 context = iterparse(SIMPLE_XMLFILE, events)
582 self.assertEqual([(action, elem.tag) for action, elem in context], [
583 ('start', 'root'),
584 ('start', 'element'),
585 ('end', 'element'),
586 ('start', 'element'),
587 ('end', 'element'),
588 ('start', 'empty-element'),
589 ('end', 'empty-element'),
590 ('end', 'root'),
591 ])
592
593 events = ("start", "end", "start-ns", "end-ns")
594 context = iterparse(SIMPLE_NS_XMLFILE, events)
Eli Bendersky23687042013-02-26 05:53:23 -0800595 self.assertEqual([(action, elem.tag) if action in ("start", "end")
596 else (action, elem)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200597 for action, elem in context], [
598 ('start-ns', ('', 'namespace')),
599 ('start', '{namespace}root'),
600 ('start', '{namespace}element'),
601 ('end', '{namespace}element'),
602 ('start', '{namespace}element'),
603 ('end', '{namespace}element'),
604 ('start', '{namespace}empty-element'),
605 ('end', '{namespace}empty-element'),
606 ('end', '{namespace}root'),
607 ('end-ns', None),
608 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000609
Eli Bendersky5dd40e52013-11-28 06:31:58 -0800610 events = ('start-ns', 'end-ns')
611 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
612 res = [action for action, elem in context]
613 self.assertEqual(res, ['start-ns', 'end-ns'])
614
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200615 events = ("start", "end", "bogus")
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200616 with open(SIMPLE_XMLFILE, "rb") as f:
617 with self.assertRaises(ValueError) as cm:
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200618 iterparse(f, events)
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200619 self.assertFalse(f.closed)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200620 self.assertEqual(str(cm.exception), "unknown event 'bogus'")
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000621
Hai Shideb01622020-07-06 20:29:49 +0800622 with warnings_helper.check_no_resource_warning(self):
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200623 with self.assertRaises(ValueError) as cm:
624 iterparse(SIMPLE_XMLFILE, events)
625 self.assertEqual(str(cm.exception), "unknown event 'bogus'")
626 del cm
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200627
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200628 source = io.BytesIO(
629 b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
630 b"<body xmlns='http://&#233;ffbot.org/ns'\n"
631 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
632 events = ("start-ns",)
633 context = iterparse(source, events)
634 self.assertEqual([(action, elem) for action, elem in context], [
635 ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
636 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
637 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000638
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200639 source = io.StringIO("<document />junk")
640 it = iterparse(source)
641 action, elem = next(it)
642 self.assertEqual((action, elem.tag), ('end', 'document'))
643 with self.assertRaises(ET.ParseError) as cm:
644 next(it)
645 self.assertEqual(str(cm.exception),
646 'junk after document element: line 1, column 12')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000647
Hai Shideb01622020-07-06 20:29:49 +0800648 self.addCleanup(os_helper.unlink, TESTFN)
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200649 with open(TESTFN, "wb") as f:
650 f.write(b"<document />junk")
651 it = iterparse(TESTFN)
652 action, elem = next(it)
653 self.assertEqual((action, elem.tag), ('end', 'document'))
Hai Shideb01622020-07-06 20:29:49 +0800654 with warnings_helper.check_no_resource_warning(self):
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200655 with self.assertRaises(ET.ParseError) as cm:
656 next(it)
657 self.assertEqual(str(cm.exception),
658 'junk after document element: line 1, column 12')
659 del cm, it
Serhiy Storchakae3d4ec42015-11-23 15:44:03 +0200660
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200661 def test_writefile(self):
662 elem = ET.Element("tag")
663 elem.text = "text"
664 self.serialize_check(elem, '<tag>text</tag>')
665 ET.SubElement(elem, "subtag").text = "subtext"
666 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000667
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200668 # Test tag suppression
669 elem.tag = None
670 self.serialize_check(elem, 'text<subtag>subtext</subtag>')
671 elem.insert(0, ET.Comment("comment"))
672 self.serialize_check(elem,
673 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000674
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200675 elem[0] = ET.PI("key", "value")
676 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000677
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200678 def test_custom_builder(self):
679 # Test parser w. custom builder.
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000680
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200681 with open(SIMPLE_XMLFILE) as f:
682 data = f.read()
683 class Builder(list):
684 def start(self, tag, attrib):
685 self.append(("start", tag))
686 def end(self, tag):
687 self.append(("end", tag))
688 def data(self, text):
689 pass
690 builder = Builder()
691 parser = ET.XMLParser(target=builder)
692 parser.feed(data)
693 self.assertEqual(builder, [
694 ('start', 'root'),
695 ('start', 'element'),
696 ('end', 'element'),
697 ('start', 'element'),
698 ('end', 'element'),
699 ('start', 'empty-element'),
700 ('end', 'empty-element'),
701 ('end', 'root'),
702 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000703
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200704 with open(SIMPLE_NS_XMLFILE) as f:
705 data = f.read()
706 class Builder(list):
707 def start(self, tag, attrib):
708 self.append(("start", tag))
709 def end(self, tag):
710 self.append(("end", tag))
711 def data(self, text):
712 pass
713 def pi(self, target, data):
714 self.append(("pi", target, data))
715 def comment(self, data):
716 self.append(("comment", data))
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200717 def start_ns(self, prefix, uri):
718 self.append(("start-ns", prefix, uri))
719 def end_ns(self, prefix):
720 self.append(("end-ns", prefix))
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200721 builder = Builder()
722 parser = ET.XMLParser(target=builder)
723 parser.feed(data)
724 self.assertEqual(builder, [
725 ('pi', 'pi', 'data'),
726 ('comment', ' comment '),
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200727 ('start-ns', '', 'namespace'),
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200728 ('start', '{namespace}root'),
729 ('start', '{namespace}element'),
730 ('end', '{namespace}element'),
731 ('start', '{namespace}element'),
732 ('end', '{namespace}element'),
733 ('start', '{namespace}empty-element'),
734 ('end', '{namespace}empty-element'),
735 ('end', '{namespace}root'),
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200736 ('end-ns', ''),
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200737 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000738
Stefan Behneldde3eeb2019-05-01 21:49:58 +0200739 def test_custom_builder_only_end_ns(self):
740 class Builder(list):
741 def end_ns(self, prefix):
742 self.append(("end-ns", prefix))
743
744 builder = Builder()
745 parser = ET.XMLParser(target=builder)
746 parser.feed(textwrap.dedent("""\
747 <?pi data?>
748 <!-- comment -->
749 <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
750 <a:element key='value'>text</a:element>
751 <p:element>text</p:element>tail
752 <empty-element/>
753 </root>
754 """))
755 self.assertEqual(builder, [
756 ('end-ns', 'a'),
757 ('end-ns', 'p'),
758 ('end-ns', ''),
759 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000760
Serhiy Storchakaf02ea622019-09-01 11:18:35 +0300761 def test_children(self):
762 # Test Element children iteration
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000763
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200764 with open(SIMPLE_XMLFILE, "rb") as f:
765 tree = ET.parse(f)
Serhiy Storchakaf02ea622019-09-01 11:18:35 +0300766 self.assertEqual([summarize_list(elem)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200767 for elem in tree.getroot().iter()], [
768 ['element', 'element', 'empty-element'],
769 [],
770 [],
771 [],
772 ])
Serhiy Storchakaf02ea622019-09-01 11:18:35 +0300773 self.assertEqual([summarize_list(elem)
774 for elem in tree.iter()], [
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200775 ['element', 'element', 'empty-element'],
776 [],
777 [],
778 [],
779 ])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000780
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200781 elem = ET.XML(SAMPLE_XML)
Serhiy Storchakaf02ea622019-09-01 11:18:35 +0300782 self.assertEqual(len(list(elem)), 3)
783 self.assertEqual(len(list(elem[2])), 1)
784 self.assertEqual(elem[:], list(elem))
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200785 child1 = elem[0]
786 child2 = elem[2]
787 del elem[1:2]
Serhiy Storchakaf02ea622019-09-01 11:18:35 +0300788 self.assertEqual(len(list(elem)), 2)
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200789 self.assertEqual(child1, elem[0])
790 self.assertEqual(child2, elem[1])
791 elem[0:2] = [child2, child1]
792 self.assertEqual(child2, elem[0])
793 self.assertEqual(child1, elem[1])
794 self.assertNotEqual(child1, elem[0])
795 elem.clear()
Serhiy Storchakaf02ea622019-09-01 11:18:35 +0300796 self.assertEqual(list(elem), [])
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000797
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +0200798 def test_writestring(self):
799 elem = ET.XML("<html><body>text</body></html>")
800 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
801 elem = ET.fromstring("<html><body>text</body></html>")
802 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +0000803
Stefan Behnelb5d3cee2019-08-23 16:44:25 +0200804 def test_indent(self):
805 elem = ET.XML("<root></root>")
806 ET.indent(elem)
807 self.assertEqual(ET.tostring(elem), b'<root />')
808
809 elem = ET.XML("<html><body>text</body></html>")
810 ET.indent(elem)
811 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
812
813 elem = ET.XML("<html> <body>text</body> </html>")
814 ET.indent(elem)
815 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
816
817 elem = ET.XML("<html><body>text</body>tail</html>")
818 ET.indent(elem)
819 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
820
821 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
822 ET.indent(elem)
823 self.assertEqual(
824 ET.tostring(elem),
825 b'<html>\n'
826 b' <body>\n'
827 b' <p>par</p>\n'
828 b' <p>text</p>\n'
829 b' <p>\n'
830 b' <br />\n'
831 b' </p>\n'
832 b' </body>\n'
833 b'</html>'
834 )
835
836 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
837 ET.indent(elem)
838 self.assertEqual(
839 ET.tostring(elem),
840 b'<html>\n'
841 b' <body>\n'
842 b' <p>pre<br />post</p>\n'
843 b' <p>text</p>\n'
844 b' </body>\n'
845 b'</html>'
846 )
847
848 def test_indent_space(self):
849 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
850 ET.indent(elem, space='\t')
851 self.assertEqual(
852 ET.tostring(elem),
853 b'<html>\n'
854 b'\t<body>\n'
855 b'\t\t<p>pre<br />post</p>\n'
856 b'\t\t<p>text</p>\n'
857 b'\t</body>\n'
858 b'</html>'
859 )
860
861 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
862 ET.indent(elem, space='')
863 self.assertEqual(
864 ET.tostring(elem),
865 b'<html>\n'
866 b'<body>\n'
867 b'<p>pre<br />post</p>\n'
868 b'<p>text</p>\n'
869 b'</body>\n'
870 b'</html>'
871 )
872
873 def test_indent_space_caching(self):
874 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
875 ET.indent(elem)
876 self.assertEqual(
877 {el.tail for el in elem.iter()},
878 {None, "\n", "\n ", "\n "}
879 )
880 self.assertEqual(
881 {el.text for el in elem.iter()},
882 {None, "\n ", "\n ", "\n ", "par", "text"}
883 )
884 self.assertEqual(
885 len({el.tail for el in elem.iter()}),
886 len({id(el.tail) for el in elem.iter()}),
887 )
888
889 def test_indent_level(self):
890 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
891 with self.assertRaises(ValueError):
892 ET.indent(elem, level=-1)
893 self.assertEqual(
894 ET.tostring(elem),
895 b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
896 )
897
898 ET.indent(elem, level=2)
899 self.assertEqual(
900 ET.tostring(elem),
901 b'<html>\n'
902 b' <body>\n'
903 b' <p>pre<br />post</p>\n'
904 b' <p>text</p>\n'
905 b' </body>\n'
906 b' </html>'
907 )
908
909 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
910 ET.indent(elem, level=1, space=' ')
911 self.assertEqual(
912 ET.tostring(elem),
913 b'<html>\n'
914 b' <body>\n'
915 b' <p>pre<br />post</p>\n'
916 b' <p>text</p>\n'
917 b' </body>\n'
918 b' </html>'
919 )
920
Bernt Røskar Brennaffca16e2019-04-14 10:07:02 +0200921 def test_tostring_default_namespace(self):
922 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
923 self.assertEqual(
924 ET.tostring(elem, encoding='unicode'),
925 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
926 )
927 self.assertEqual(
928 ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
929 '<body xmlns="http://effbot.org/ns"><tag /></body>'
930 )
931
932 def test_tostring_default_namespace_different_namespace(self):
933 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
934 self.assertEqual(
935 ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
936 '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
937 )
938
939 def test_tostring_default_namespace_original_no_namespace(self):
940 elem = ET.XML('<body><tag/></body>')
941 EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
942 with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
943 ET.tostring(elem, encoding='unicode', default_namespace='foobar')
944
945 def test_tostring_no_xml_declaration(self):
946 elem = ET.XML('<body><tag/></body>')
947 self.assertEqual(
948 ET.tostring(elem, encoding='unicode'),
949 '<body><tag /></body>'
950 )
951
952 def test_tostring_xml_declaration(self):
953 elem = ET.XML('<body><tag/></body>')
954 self.assertEqual(
955 ET.tostring(elem, encoding='utf8', xml_declaration=True),
956 b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
957 )
958
959 def test_tostring_xml_declaration_unicode_encoding(self):
960 elem = ET.XML('<body><tag/></body>')
961 preferredencoding = locale.getpreferredencoding()
962 self.assertEqual(
963 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
964 ET.tostring(elem, encoding='unicode', xml_declaration=True)
965 )
966
967 def test_tostring_xml_declaration_cases(self):
968 elem = ET.XML('<body><tag>ø</tag></body>')
969 preferredencoding = locale.getpreferredencoding()
970 TESTCASES = [
971 # (expected_retval, encoding, xml_declaration)
972 # ... xml_declaration = None
973 (b'<body><tag>&#248;</tag></body>', None, None),
974 (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
975 (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
976 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
977 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
978 ('<body><tag>ø</tag></body>', 'unicode', None),
979
980 # ... xml_declaration = False
981 (b"<body><tag>&#248;</tag></body>", None, False),
982 (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
983 (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
984 (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
985 ("<body><tag>ø</tag></body>", 'unicode', False),
986
987 # ... xml_declaration = True
988 (b"<?xml version='1.0' encoding='us-ascii'?>\n"
989 b"<body><tag>&#248;</tag></body>", None, True),
990 (b"<?xml version='1.0' encoding='UTF-8'?>\n"
991 b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
992 (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
993 b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
994 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
995 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
996 (f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
997 "<body><tag>ø</tag></body>", 'unicode', True),
998
999 ]
1000 for expected_retval, encoding, xml_declaration in TESTCASES:
1001 with self.subTest(f'encoding={encoding} '
1002 f'xml_declaration={xml_declaration}'):
1003 self.assertEqual(
1004 ET.tostring(
1005 elem,
1006 encoding=encoding,
1007 xml_declaration=xml_declaration
1008 ),
1009 expected_retval
1010 )
1011
1012 def test_tostringlist_default_namespace(self):
1013 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
1014 self.assertEqual(
1015 ''.join(ET.tostringlist(elem, encoding='unicode')),
1016 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
1017 )
1018 self.assertEqual(
1019 ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
1020 '<body xmlns="http://effbot.org/ns"><tag /></body>'
1021 )
1022
1023 def test_tostringlist_xml_declaration(self):
1024 elem = ET.XML('<body><tag/></body>')
1025 self.assertEqual(
1026 ''.join(ET.tostringlist(elem, encoding='unicode')),
1027 '<body><tag /></body>'
1028 )
1029 self.assertEqual(
1030 b''.join(ET.tostringlist(elem, xml_declaration=True)),
1031 b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
1032 )
1033
1034 preferredencoding = locale.getpreferredencoding()
1035 stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
1036 self.assertEqual(
1037 ''.join(stringlist),
1038 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
1039 )
1040 self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
1041 self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
1042
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001043 def test_encoding(self):
1044 def check(encoding, body=''):
1045 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
1046 (encoding, body))
1047 self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
1048 self.assertEqual(ET.XML(xml).text, body)
1049 check("ascii", 'a')
1050 check("us-ascii", 'a')
1051 check("iso-8859-1", '\xbd')
1052 check("iso-8859-15", '\u20ac')
1053 check("cp437", '\u221a')
1054 check("mac-roman", '\u02da')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001055
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001056 def xml(encoding):
1057 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
1058 def bxml(encoding):
1059 return xml(encoding).encode(encoding)
1060 supported_encodings = [
1061 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
1062 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
1063 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
1064 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
1065 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
1066 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
Serhiy Storchakabe0c3252013-11-23 18:52:23 +02001067 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
1068 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1069 'cp1256', 'cp1257', 'cp1258',
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001070 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
1071 'mac-roman', 'mac-turkish',
1072 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
1073 'iso2022-jp-3', 'iso2022-jp-ext',
Serhiy Storchakaf0eeedf2015-05-12 23:24:19 +03001074 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001075 'hz', 'ptcp154',
1076 ]
1077 for encoding in supported_encodings:
1078 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
1079
1080 unsupported_ascii_compatible_encodings = [
1081 'big5', 'big5hkscs',
1082 'cp932', 'cp949', 'cp950',
1083 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
1084 'gb2312', 'gbk', 'gb18030',
1085 'iso2022-kr', 'johab',
1086 'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
1087 'utf-7',
1088 ]
1089 for encoding in unsupported_ascii_compatible_encodings:
1090 self.assertRaises(ValueError, ET.XML, bxml(encoding))
1091
1092 unsupported_ascii_incompatible_encodings = [
1093 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
1094 'utf_32', 'utf_32_be', 'utf_32_le',
1095 ]
1096 for encoding in unsupported_ascii_incompatible_encodings:
1097 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
1098
1099 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
1100 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
1101
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001102 def test_methods(self):
1103 # Test serialization methods.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001104
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001105 e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
1106 e.tail = "\n"
1107 self.assertEqual(serialize(e),
1108 '<html><link /><script>1 &lt; 2</script></html>\n')
1109 self.assertEqual(serialize(e, method=None),
1110 '<html><link /><script>1 &lt; 2</script></html>\n')
1111 self.assertEqual(serialize(e, method="xml"),
1112 '<html><link /><script>1 &lt; 2</script></html>\n')
1113 self.assertEqual(serialize(e, method="html"),
1114 '<html><link><script>1 < 2</script></html>\n')
1115 self.assertEqual(serialize(e, method="text"), '1 < 2\n')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001116
Christian Heimes54ad7e32013-07-05 01:39:49 +02001117 def test_issue18347(self):
1118 e = ET.XML('<html><CamelCase>text</CamelCase></html>')
1119 self.assertEqual(serialize(e),
1120 '<html><CamelCase>text</CamelCase></html>')
1121 self.assertEqual(serialize(e, method="html"),
1122 '<html><CamelCase>text</CamelCase></html>')
1123
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001124 def test_entity(self):
1125 # Test entity handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001126
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001127 # 1) good entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001128
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001129 e = ET.XML("<document title='&#x8230;'>test</document>")
1130 self.assertEqual(serialize(e, encoding="us-ascii"),
1131 b'<document title="&#33328;">test</document>')
1132 self.serialize_check(e, '<document title="\u8230">test</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001133
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001134 # 2) bad entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001135
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001136 with self.assertRaises(ET.ParseError) as cm:
1137 ET.XML("<document>&entity;</document>")
1138 self.assertEqual(str(cm.exception),
1139 'undefined entity: line 1, column 10')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001140
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001141 with self.assertRaises(ET.ParseError) as cm:
1142 ET.XML(ENTITY_XML)
1143 self.assertEqual(str(cm.exception),
1144 'undefined entity &entity;: line 5, column 10')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001145
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001146 # 3) custom entity
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001147
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001148 parser = ET.XMLParser()
1149 parser.entity["entity"] = "text"
1150 parser.feed(ENTITY_XML)
1151 root = parser.close()
1152 self.serialize_check(root, '<document>text</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001153
Christian Heimes17b1d5d2018-09-23 09:50:25 +02001154 # 4) external (SYSTEM) entity
1155
1156 with self.assertRaises(ET.ParseError) as cm:
1157 ET.XML(EXTERNAL_ENTITY_XML)
1158 self.assertEqual(str(cm.exception),
1159 'undefined entity &entity;: line 4, column 10')
1160
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001161 def test_namespace(self):
1162 # Test namespace issues.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001163
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001164 # 1) xml namespace
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001165
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001166 elem = ET.XML("<tag xml:lang='en' />")
1167 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001168
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001169 # 2) other "well-known" namespaces
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001170
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001171 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1172 self.serialize_check(elem,
1173 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001174
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001175 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1176 self.serialize_check(elem,
1177 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001178
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001179 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1180 self.serialize_check(elem,
1181 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001182
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001183 # 3) unknown namespaces
1184 elem = ET.XML(SAMPLE_XML_NS)
1185 self.serialize_check(elem,
1186 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
1187 ' <ns0:tag>text</ns0:tag>\n'
1188 ' <ns0:tag />\n'
1189 ' <ns0:section>\n'
1190 ' <ns0:tag>subtext</ns0:tag>\n'
1191 ' </ns0:section>\n'
1192 '</ns0:body>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001193
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001194 def test_qname(self):
1195 # Test QName handling.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001196
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001197 # 1) decorated tags
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001198
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001199 elem = ET.Element("{uri}tag")
1200 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
1201 elem = ET.Element(ET.QName("{uri}tag"))
1202 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
1203 elem = ET.Element(ET.QName("uri", "tag"))
1204 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
1205 elem = ET.Element(ET.QName("uri", "tag"))
1206 subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1207 subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1208 self.serialize_check(elem,
1209 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001210
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001211 # 2) decorated attributes
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001212
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001213 elem.clear()
1214 elem.attrib["{uri}key"] = "value"
1215 self.serialize_check(elem,
1216 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001217
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001218 elem.clear()
1219 elem.attrib[ET.QName("{uri}key")] = "value"
1220 self.serialize_check(elem,
1221 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001222
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001223 # 3) decorated values are not converted by default, but the
1224 # QName wrapper can be used for values
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001225
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001226 elem.clear()
1227 elem.attrib["{uri}key"] = "{uri}value"
1228 self.serialize_check(elem,
1229 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001230
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001231 elem.clear()
1232 elem.attrib["{uri}key"] = ET.QName("{uri}value")
1233 self.serialize_check(elem,
1234 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001235
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001236 elem.clear()
1237 subelem = ET.Element("tag")
1238 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1239 elem.append(subelem)
1240 elem.append(subelem)
1241 self.serialize_check(elem,
1242 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
1243 '<tag ns1:key="ns2:value" />'
1244 '<tag ns1:key="ns2:value" />'
1245 '</ns0:tag>') # 3.3
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001246
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001247 # 4) Direct QName tests
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001248
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001249 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
1250 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
1251 q1 = ET.QName('ns', 'tag')
1252 q2 = ET.QName('ns', 'tag')
1253 self.assertEqual(q1, q2)
1254 q2 = ET.QName('ns', 'other-tag')
1255 self.assertNotEqual(q1, q2)
1256 self.assertNotEqual(q1, 'ns:tag')
1257 self.assertEqual(q1, '{ns}tag')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001258
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001259 def test_doctype_public(self):
1260 # Test PUBLIC doctype.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001261
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001262 elem = ET.XML('<!DOCTYPE html PUBLIC'
1263 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1264 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1265 '<html>text</html>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001266
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001267 def test_xpath_tokenizer(self):
1268 # Test the XPath tokenizer.
1269 from xml.etree import ElementPath
Stefan Behnel47541682019-05-03 20:58:16 +02001270 def check(p, expected, namespaces=None):
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001271 self.assertEqual([op or tag
Stefan Behnel47541682019-05-03 20:58:16 +02001272 for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001273 expected)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001274
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001275 # tests from the xml specification
1276 check("*", ['*'])
1277 check("text()", ['text', '()'])
1278 check("@name", ['@', 'name'])
1279 check("@*", ['@', '*'])
1280 check("para[1]", ['para', '[', '1', ']'])
1281 check("para[last()]", ['para', '[', 'last', '()', ']'])
1282 check("*/para", ['*', '/', 'para'])
1283 check("/doc/chapter[5]/section[2]",
1284 ['/', 'doc', '/', 'chapter', '[', '5', ']',
1285 '/', 'section', '[', '2', ']'])
1286 check("chapter//para", ['chapter', '//', 'para'])
1287 check("//para", ['//', 'para'])
1288 check("//olist/item", ['//', 'olist', '/', 'item'])
1289 check(".", ['.'])
1290 check(".//para", ['.', '//', 'para'])
1291 check("..", ['..'])
1292 check("../@lang", ['..', '/', '@', 'lang'])
1293 check("chapter[title]", ['chapter', '[', 'title', ']'])
1294 check("employee[@secretary and @assistant]", ['employee',
1295 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001296
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001297 # additional tests
Stefan Behnel88db8bd2019-05-09 07:22:47 +02001298 check("@{ns}attr", ['@', '{ns}attr'])
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001299 check("{http://spam}egg", ['{http://spam}egg'])
1300 check("./spam.egg", ['.', '/', 'spam.egg'])
1301 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
Stefan Behnel88db8bd2019-05-09 07:22:47 +02001302
1303 # wildcard tags
1304 check("{ns}*", ['{ns}*'])
1305 check("{}*", ['{}*'])
1306 check("{*}tag", ['{*}tag'])
1307 check("{*}*", ['{*}*'])
1308 check(".//{*}tag", ['.', '//', '{*}tag'])
1309
1310 # namespace prefix resolution
Stefan Behnel47541682019-05-03 20:58:16 +02001311 check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
1312 {'xsd': 'http://www.w3.org/2001/XMLSchema'})
Stefan Behnel88db8bd2019-05-09 07:22:47 +02001313 check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
1314 {'': 'http://www.w3.org/2001/XMLSchema'})
1315 check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
1316 {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1317 check("@type", ['@', 'type'],
1318 {'': 'http://www.w3.org/2001/XMLSchema'})
1319 check("@{*}type", ['@', '{*}type'],
1320 {'': 'http://www.w3.org/2001/XMLSchema'})
1321 check("@{ns}attr", ['@', '{ns}attr'],
1322 {'': 'http://www.w3.org/2001/XMLSchema',
1323 'ns': 'http://www.w3.org/2001/XMLSchema'})
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001324
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001325 def test_processinginstruction(self):
1326 # Test ProcessingInstruction directly
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001327
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001328 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1329 b'<?test instruction?>')
1330 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1331 b'<?test instruction?>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001332
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001333 # Issue #2746
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001334
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001335 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1336 b'<?test <testing&>?>')
1337 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1338 b"<?xml version='1.0' encoding='latin-1'?>\n"
1339 b"<?test <testing&>\xe3?>")
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001340
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001341 def test_html_empty_elems_serialization(self):
1342 # issue 15970
1343 # from http://www.w3.org/TR/html401/index/elements.html
1344 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1345 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
1346 for elem in [element, element.lower()]:
1347 expected = '<%s>' % elem
1348 serialized = serialize(ET.XML('<%s />' % elem), method='html')
1349 self.assertEqual(serialized, expected)
1350 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1351 method='html')
1352 self.assertEqual(serialized, expected)
Antoine Pitrou99f69ee2010-02-09 17:25:47 +00001353
Raymond Hettingere3685fd2018-10-28 11:18:22 -07001354 def test_dump_attribute_order(self):
1355 # See BPO 34160
1356 e = ET.Element('cirriculum', status='public', company='example')
1357 with support.captured_stdout() as stdout:
1358 ET.dump(e)
1359 self.assertEqual(stdout.getvalue(),
1360 '<cirriculum status="public" company="example" />\n')
1361
1362 def test_tree_write_attribute_order(self):
1363 # See BPO 34160
1364 root = ET.Element('cirriculum', status='public', company='example')
Serhiy Storchaka3b05ad72018-10-29 19:31:04 +02001365 self.assertEqual(serialize(root),
1366 '<cirriculum status="public" company="example" />')
1367 self.assertEqual(serialize(root, method='html'),
1368 '<cirriculum status="public" company="example"></cirriculum>')
Raymond Hettingere3685fd2018-10-28 11:18:22 -07001369
Felix C. Stegerman1f433402021-02-24 03:25:31 +01001370 def test_attlist_default(self):
1371 # Test default attribute values; See BPO 42151.
1372 root = ET.fromstring(ATTLIST_XML)
1373 self.assertEqual(root[0].attrib,
1374 {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
1375
Fredrik Lundh8911ca3d2005-12-16 22:07:17 +00001376
Eli Benderskyb5869342013-08-30 05:51:20 -07001377class XMLPullParserTest(unittest.TestCase):
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001378
1379 def _feed(self, parser, data, chunk_size=None):
1380 if chunk_size is None:
Eli Benderskyb5869342013-08-30 05:51:20 -07001381 parser.feed(data)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001382 else:
1383 for i in range(0, len(data), chunk_size):
Eli Benderskyb5869342013-08-30 05:51:20 -07001384 parser.feed(data[i:i+chunk_size])
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001385
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001386 def assert_events(self, parser, expected, max_events=None):
Stefan Behnel43851a22019-05-01 21:20:38 +02001387 self.assertEqual(
1388 [(event, (elem.tag, elem.text))
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001389 for event, elem in islice(parser.read_events(), max_events)],
Stefan Behnel43851a22019-05-01 21:20:38 +02001390 expected)
1391
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001392 def assert_event_tuples(self, parser, expected, max_events=None):
1393 self.assertEqual(
1394 list(islice(parser.read_events(), max_events)),
1395 expected)
1396
1397 def assert_event_tags(self, parser, expected, max_events=None):
1398 events = islice(parser.read_events(), max_events)
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001399 self.assertEqual([(action, elem.tag) for action, elem in events],
1400 expected)
1401
1402 def test_simple_xml(self):
1403 for chunk_size in (None, 1, 5):
1404 with self.subTest(chunk_size=chunk_size):
Eli Benderskyb5869342013-08-30 05:51:20 -07001405 parser = ET.XMLPullParser()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001406 self.assert_event_tags(parser, [])
1407 self._feed(parser, "<!-- comment -->\n", chunk_size)
1408 self.assert_event_tags(parser, [])
1409 self._feed(parser,
1410 "<root>\n <element key='value'>text</element",
1411 chunk_size)
1412 self.assert_event_tags(parser, [])
1413 self._feed(parser, ">\n", chunk_size)
1414 self.assert_event_tags(parser, [('end', 'element')])
1415 self._feed(parser, "<element>text</element>tail\n", chunk_size)
1416 self._feed(parser, "<empty-element/>\n", chunk_size)
1417 self.assert_event_tags(parser, [
1418 ('end', 'element'),
1419 ('end', 'empty-element'),
1420 ])
1421 self._feed(parser, "</root>\n", chunk_size)
1422 self.assert_event_tags(parser, [('end', 'root')])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001423 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001424
Eli Benderskyb5869342013-08-30 05:51:20 -07001425 def test_feed_while_iterating(self):
1426 parser = ET.XMLPullParser()
1427 it = parser.read_events()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001428 self._feed(parser, "<root>\n <element key='value'>text</element>\n")
1429 action, elem = next(it)
1430 self.assertEqual((action, elem.tag), ('end', 'element'))
1431 self._feed(parser, "</root>\n")
1432 action, elem = next(it)
1433 self.assertEqual((action, elem.tag), ('end', 'root'))
1434 with self.assertRaises(StopIteration):
1435 next(it)
1436
1437 def test_simple_xml_with_ns(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001438 parser = ET.XMLPullParser()
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001439 self.assert_event_tags(parser, [])
1440 self._feed(parser, "<!-- comment -->\n")
1441 self.assert_event_tags(parser, [])
1442 self._feed(parser, "<root xmlns='namespace'>\n")
1443 self.assert_event_tags(parser, [])
1444 self._feed(parser, "<element key='value'>text</element")
1445 self.assert_event_tags(parser, [])
1446 self._feed(parser, ">\n")
1447 self.assert_event_tags(parser, [('end', '{namespace}element')])
1448 self._feed(parser, "<element>text</element>tail\n")
1449 self._feed(parser, "<empty-element/>\n")
1450 self.assert_event_tags(parser, [
1451 ('end', '{namespace}element'),
1452 ('end', '{namespace}empty-element'),
1453 ])
1454 self._feed(parser, "</root>\n")
1455 self.assert_event_tags(parser, [('end', '{namespace}root')])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001456 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001457
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001458 def test_ns_events(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001459 parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001460 self._feed(parser, "<!-- comment -->\n")
1461 self._feed(parser, "<root xmlns='namespace'>\n")
1462 self.assertEqual(
Eli Benderskyb5869342013-08-30 05:51:20 -07001463 list(parser.read_events()),
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001464 [('start-ns', ('', 'namespace'))])
1465 self._feed(parser, "<element key='value'>text</element")
1466 self._feed(parser, ">\n")
1467 self._feed(parser, "<element>text</element>tail\n")
1468 self._feed(parser, "<empty-element/>\n")
1469 self._feed(parser, "</root>\n")
Eli Benderskyb5869342013-08-30 05:51:20 -07001470 self.assertEqual(list(parser.read_events()), [('end-ns', None)])
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001471 self.assertIsNone(parser.close())
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001472
Stefan Behneldde3eeb2019-05-01 21:49:58 +02001473 def test_ns_events_start(self):
1474 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1475 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1476 self.assert_event_tuples(parser, [
1477 ('start-ns', ('', 'abc')),
1478 ('start-ns', ('p', 'xyz')),
1479 ], max_events=2)
1480 self.assert_event_tags(parser, [
1481 ('start', '{abc}tag'),
1482 ], max_events=1)
1483
1484 self._feed(parser, "<child />\n")
1485 self.assert_event_tags(parser, [
1486 ('start', '{abc}child'),
1487 ('end', '{abc}child'),
1488 ])
1489
1490 self._feed(parser, "</tag>\n")
1491 parser.close()
1492 self.assert_event_tags(parser, [
1493 ('end', '{abc}tag'),
1494 ])
1495
1496 def test_ns_events_start_end(self):
1497 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1498 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1499 self.assert_event_tuples(parser, [
1500 ('start-ns', ('', 'abc')),
1501 ('start-ns', ('p', 'xyz')),
1502 ], max_events=2)
1503 self.assert_event_tags(parser, [
1504 ('start', '{abc}tag'),
1505 ], max_events=1)
1506
1507 self._feed(parser, "<child />\n")
1508 self.assert_event_tags(parser, [
1509 ('start', '{abc}child'),
1510 ('end', '{abc}child'),
1511 ])
1512
1513 self._feed(parser, "</tag>\n")
1514 parser.close()
1515 self.assert_event_tags(parser, [
1516 ('end', '{abc}tag'),
1517 ], max_events=1)
1518 self.assert_event_tuples(parser, [
1519 ('end-ns', None),
1520 ('end-ns', None),
1521 ])
1522
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001523 def test_events(self):
Eli Benderskyb5869342013-08-30 05:51:20 -07001524 parser = ET.XMLPullParser(events=())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001525 self._feed(parser, "<root/>\n")
1526 self.assert_event_tags(parser, [])
1527
Eli Benderskyb5869342013-08-30 05:51:20 -07001528 parser = ET.XMLPullParser(events=('start', 'end'))
Stefan Behnel43851a22019-05-01 21:20:38 +02001529 self._feed(parser, "<!-- text here -->\n")
1530 self.assert_events(parser, [])
1531
1532 parser = ET.XMLPullParser(events=('start', 'end'))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001533 self._feed(parser, "<root>\n")
1534 self.assert_event_tags(parser, [('start', 'root')])
1535 self._feed(parser, "<element key='value'>text</element")
1536 self.assert_event_tags(parser, [('start', 'element')])
1537 self._feed(parser, ">\n")
1538 self.assert_event_tags(parser, [('end', 'element')])
1539 self._feed(parser,
1540 "<element xmlns='foo'>text<empty-element/></element>tail\n")
1541 self.assert_event_tags(parser, [
1542 ('start', '{foo}element'),
1543 ('start', '{foo}empty-element'),
1544 ('end', '{foo}empty-element'),
1545 ('end', '{foo}element'),
1546 ])
1547 self._feed(parser, "</root>")
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001548 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001549 self.assert_event_tags(parser, [('end', 'root')])
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001550
Eli Benderskyb5869342013-08-30 05:51:20 -07001551 parser = ET.XMLPullParser(events=('start',))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001552 self._feed(parser, "<!-- comment -->\n")
1553 self.assert_event_tags(parser, [])
1554 self._feed(parser, "<root>\n")
1555 self.assert_event_tags(parser, [('start', 'root')])
1556 self._feed(parser, "<element key='value'>text</element")
1557 self.assert_event_tags(parser, [('start', 'element')])
1558 self._feed(parser, ">\n")
1559 self.assert_event_tags(parser, [])
1560 self._feed(parser,
1561 "<element xmlns='foo'>text<empty-element/></element>tail\n")
1562 self.assert_event_tags(parser, [
1563 ('start', '{foo}element'),
1564 ('start', '{foo}empty-element'),
1565 ])
1566 self._feed(parser, "</root>")
Nick Coghlan4cc2afa2013-09-28 23:50:35 +10001567 self.assertIsNone(parser.close())
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001568
Stefan Behnel43851a22019-05-01 21:20:38 +02001569 def test_events_comment(self):
1570 parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
1571 self._feed(parser, "<!-- text here -->\n")
1572 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1573 self._feed(parser, "<!-- more text here -->\n")
1574 self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
1575 self._feed(parser, "<root-tag>text")
1576 self.assert_event_tags(parser, [('start', 'root-tag')])
1577 self._feed(parser, "<!-- inner comment-->\n")
1578 self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
1579 self._feed(parser, "</root-tag>\n")
1580 self.assert_event_tags(parser, [('end', 'root-tag')])
1581 self._feed(parser, "<!-- outer comment -->\n")
1582 self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
1583
1584 parser = ET.XMLPullParser(events=('comment',))
1585 self._feed(parser, "<!-- text here -->\n")
1586 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1587
1588 def test_events_pi(self):
1589 parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
1590 self._feed(parser, "<?pitarget?>\n")
1591 self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
1592 parser = ET.XMLPullParser(events=('pi',))
1593 self._feed(parser, "<?pitarget some text ?>\n")
1594 self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
1595
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001596 def test_events_sequence(self):
1597 # Test that events can be some sequence that's not just a tuple or list
1598 eventset = {'end', 'start'}
Eli Benderskyb5869342013-08-30 05:51:20 -07001599 parser = ET.XMLPullParser(events=eventset)
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001600 self._feed(parser, "<foo>bar</foo>")
1601 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1602
1603 class DummyIter:
1604 def __init__(self):
1605 self.events = iter(['start', 'end', 'start-ns'])
1606 def __iter__(self):
1607 return self
1608 def __next__(self):
1609 return next(self.events)
1610
Eli Benderskyb5869342013-08-30 05:51:20 -07001611 parser = ET.XMLPullParser(events=DummyIter())
Eli Bendersky3a4fbd82013-05-19 09:01:49 -07001612 self._feed(parser, "<foo>bar</foo>")
1613 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1614
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001615 def test_unknown_event(self):
1616 with self.assertRaises(ValueError):
Eli Benderskyb5869342013-08-30 05:51:20 -07001617 ET.XMLPullParser(events=('start', 'end', 'bogus'))
Antoine Pitrou5b235d02013-04-18 19:37:06 +02001618
1619
Armin Rigo9ed73062005-12-14 18:10:45 +00001620#
1621# xinclude tests (samples from appendix C of the xinclude specification)
1622
1623XINCLUDE = {}
1624
1625XINCLUDE["C1.xml"] = """\
1626<?xml version='1.0'?>
1627<document xmlns:xi="http://www.w3.org/2001/XInclude">
1628 <p>120 Mz is adequate for an average home user.</p>
1629 <xi:include href="disclaimer.xml"/>
1630</document>
1631"""
1632
1633XINCLUDE["disclaimer.xml"] = """\
1634<?xml version='1.0'?>
1635<disclaimer>
1636 <p>The opinions represented herein represent those of the individual
1637 and should not be interpreted as official policy endorsed by this
1638 organization.</p>
1639</disclaimer>
1640"""
1641
1642XINCLUDE["C2.xml"] = """\
1643<?xml version='1.0'?>
1644<document xmlns:xi="http://www.w3.org/2001/XInclude">
1645 <p>This document has been accessed
1646 <xi:include href="count.txt" parse="text"/> times.</p>
1647</document>
1648"""
1649
1650XINCLUDE["count.txt"] = "324387"
1651
Florent Xiclunaba8a9862010-08-08 23:08:41 +00001652XINCLUDE["C2b.xml"] = """\
1653<?xml version='1.0'?>
1654<document xmlns:xi="http://www.w3.org/2001/XInclude">
1655 <p>This document has been <em>accessed</em>
1656 <xi:include href="count.txt" parse="text"/> times.</p>
1657</document>
1658"""
1659
Armin Rigo9ed73062005-12-14 18:10:45 +00001660XINCLUDE["C3.xml"] = """\
1661<?xml version='1.0'?>
1662<document xmlns:xi="http://www.w3.org/2001/XInclude">
1663 <p>The following is the source of the "data.xml" resource:</p>
1664 <example><xi:include href="data.xml" parse="text"/></example>
1665</document>
1666"""
1667
1668XINCLUDE["data.xml"] = """\
1669<?xml version='1.0'?>
1670<data>
1671 <item><![CDATA[Brooks & Shields]]></item>
1672</data>
1673"""
1674
1675XINCLUDE["C5.xml"] = """\
1676<?xml version='1.0'?>
1677<div xmlns:xi="http://www.w3.org/2001/XInclude">
1678 <xi:include href="example.txt" parse="text">
1679 <xi:fallback>
1680 <xi:include href="fallback-example.txt" parse="text">
1681 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1682 </xi:include>
1683 </xi:fallback>
1684 </xi:include>
1685</div>
1686"""
1687
1688XINCLUDE["default.xml"] = """\
1689<?xml version='1.0'?>
1690<document xmlns:xi="http://www.w3.org/2001/XInclude">
1691 <p>Example.</p>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001692 <xi:include href="{}"/>
Armin Rigo9ed73062005-12-14 18:10:45 +00001693</document>
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001694""".format(html.escape(SIMPLE_XMLFILE, True))
Armin Rigo9ed73062005-12-14 18:10:45 +00001695
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +01001696XINCLUDE["include_c1_repeated.xml"] = """\
1697<?xml version='1.0'?>
1698<document xmlns:xi="http://www.w3.org/2001/XInclude">
1699 <p>The following is the source code of Recursive1.xml:</p>
1700 <xi:include href="C1.xml"/>
1701 <xi:include href="C1.xml"/>
1702 <xi:include href="C1.xml"/>
1703 <xi:include href="C1.xml"/>
1704</document>
1705"""
1706
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001707#
1708# badly formatted xi:include tags
1709
1710XINCLUDE_BAD = {}
1711
1712XINCLUDE_BAD["B1.xml"] = """\
1713<?xml version='1.0'?>
1714<document xmlns:xi="http://www.w3.org/2001/XInclude">
1715 <p>120 Mz is adequate for an average home user.</p>
1716 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1717</document>
1718"""
1719
1720XINCLUDE_BAD["B2.xml"] = """\
1721<?xml version='1.0'?>
1722<div xmlns:xi="http://www.w3.org/2001/XInclude">
1723 <xi:fallback></xi:fallback>
1724</div>
1725"""
1726
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +01001727XINCLUDE["Recursive1.xml"] = """\
1728<?xml version='1.0'?>
1729<document xmlns:xi="http://www.w3.org/2001/XInclude">
1730 <p>The following is the source code of Recursive2.xml:</p>
1731 <xi:include href="Recursive2.xml"/>
1732</document>
1733"""
1734
1735XINCLUDE["Recursive2.xml"] = """\
1736<?xml version='1.0'?>
1737<document xmlns:xi="http://www.w3.org/2001/XInclude">
1738 <p>The following is the source code of Recursive3.xml:</p>
1739 <xi:include href="Recursive3.xml"/>
1740</document>
1741"""
1742
1743XINCLUDE["Recursive3.xml"] = """\
1744<?xml version='1.0'?>
1745<document xmlns:xi="http://www.w3.org/2001/XInclude">
1746 <p>The following is the source code of Recursive1.xml:</p>
1747 <xi:include href="Recursive1.xml"/>
1748</document>
1749"""
1750
1751
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001752class XIncludeTest(unittest.TestCase):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001753
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001754 def xinclude_loader(self, href, parse="xml", encoding=None):
1755 try:
1756 data = XINCLUDE[href]
1757 except KeyError:
1758 raise OSError("resource not found")
1759 if parse == "xml":
1760 data = ET.XML(data)
1761 return data
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001762
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001763 def none_loader(self, href, parser, encoding=None):
1764 return None
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001765
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001766 def _my_loader(self, href, parse):
1767 # Used to avoid a test-dependency problem where the default loader
1768 # of ElementInclude uses the pyET parser for cET tests.
1769 if parse == 'xml':
1770 with open(href, 'rb') as f:
1771 return ET.parse(f).getroot()
1772 else:
1773 return None
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001774
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001775 def test_xinclude_default(self):
1776 from xml.etree import ElementInclude
1777 doc = self.xinclude_loader('default.xml')
1778 ElementInclude.include(doc, self._my_loader)
1779 self.assertEqual(serialize(doc),
1780 '<document>\n'
1781 ' <p>Example.</p>\n'
1782 ' <root>\n'
1783 ' <element key="value">text</element>\n'
1784 ' <element>text</element>tail\n'
1785 ' <empty-element />\n'
1786 '</root>\n'
1787 '</document>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001788
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001789 def test_xinclude(self):
1790 from xml.etree import ElementInclude
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001791
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001792 # Basic inclusion example (XInclude C.1)
1793 document = self.xinclude_loader("C1.xml")
1794 ElementInclude.include(document, self.xinclude_loader)
1795 self.assertEqual(serialize(document),
1796 '<document>\n'
1797 ' <p>120 Mz is adequate for an average home user.</p>\n'
1798 ' <disclaimer>\n'
1799 ' <p>The opinions represented herein represent those of the individual\n'
1800 ' and should not be interpreted as official policy endorsed by this\n'
1801 ' organization.</p>\n'
1802 '</disclaimer>\n'
1803 '</document>') # C1
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001804
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001805 # Textual inclusion example (XInclude C.2)
1806 document = self.xinclude_loader("C2.xml")
1807 ElementInclude.include(document, self.xinclude_loader)
1808 self.assertEqual(serialize(document),
1809 '<document>\n'
1810 ' <p>This document has been accessed\n'
1811 ' 324387 times.</p>\n'
1812 '</document>') # C2
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001813
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001814 # Textual inclusion after sibling element (based on modified XInclude C.2)
1815 document = self.xinclude_loader("C2b.xml")
1816 ElementInclude.include(document, self.xinclude_loader)
1817 self.assertEqual(serialize(document),
1818 '<document>\n'
1819 ' <p>This document has been <em>accessed</em>\n'
1820 ' 324387 times.</p>\n'
1821 '</document>') # C2b
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001822
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001823 # Textual inclusion of XML example (XInclude C.3)
1824 document = self.xinclude_loader("C3.xml")
1825 ElementInclude.include(document, self.xinclude_loader)
1826 self.assertEqual(serialize(document),
1827 '<document>\n'
1828 ' <p>The following is the source of the "data.xml" resource:</p>\n'
1829 " <example>&lt;?xml version='1.0'?&gt;\n"
1830 '&lt;data&gt;\n'
1831 ' &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1832 '&lt;/data&gt;\n'
1833 '</example>\n'
1834 '</document>') # C3
1835
1836 # Fallback example (XInclude C.5)
1837 # Note! Fallback support is not yet implemented
1838 document = self.xinclude_loader("C5.xml")
1839 with self.assertRaises(OSError) as cm:
1840 ElementInclude.include(document, self.xinclude_loader)
1841 self.assertEqual(str(cm.exception), 'resource not found')
1842 self.assertEqual(serialize(document),
1843 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1844 ' <ns0:include href="example.txt" parse="text">\n'
1845 ' <ns0:fallback>\n'
1846 ' <ns0:include href="fallback-example.txt" parse="text">\n'
1847 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1848 ' </ns0:include>\n'
1849 ' </ns0:fallback>\n'
1850 ' </ns0:include>\n'
1851 '</div>') # C5
1852
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +01001853 def test_xinclude_repeated(self):
1854 from xml.etree import ElementInclude
1855
1856 document = self.xinclude_loader("include_c1_repeated.xml")
1857 ElementInclude.include(document, self.xinclude_loader)
1858 self.assertEqual(1+4*2, len(document.findall(".//p")))
1859
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001860 def test_xinclude_failures(self):
1861 from xml.etree import ElementInclude
1862
1863 # Test failure to locate included XML file.
1864 document = ET.XML(XINCLUDE["C1.xml"])
1865 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1866 ElementInclude.include(document, loader=self.none_loader)
1867 self.assertEqual(str(cm.exception),
1868 "cannot load 'disclaimer.xml' as 'xml'")
1869
1870 # Test failure to locate included text file.
1871 document = ET.XML(XINCLUDE["C2.xml"])
1872 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1873 ElementInclude.include(document, loader=self.none_loader)
1874 self.assertEqual(str(cm.exception),
1875 "cannot load 'count.txt' as 'text'")
1876
1877 # Test bad parse type.
1878 document = ET.XML(XINCLUDE_BAD["B1.xml"])
1879 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1880 ElementInclude.include(document, loader=self.none_loader)
1881 self.assertEqual(str(cm.exception),
1882 "unknown parse type in xi:include tag ('BAD_TYPE')")
1883
1884 # Test xi:fallback outside xi:include.
1885 document = ET.XML(XINCLUDE_BAD["B2.xml"])
1886 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1887 ElementInclude.include(document, loader=self.none_loader)
1888 self.assertEqual(str(cm.exception),
1889 "xi:fallback tag must be child of xi:include "
1890 "('{http://www.w3.org/2001/XInclude}fallback')")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001891
Stefan Behnelc6a7bdb2019-11-25 16:36:25 +01001892 # Test infinitely recursive includes.
1893 document = self.xinclude_loader("Recursive1.xml")
1894 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1895 ElementInclude.include(document, self.xinclude_loader)
1896 self.assertEqual(str(cm.exception),
1897 "recursive include of Recursive2.xml")
1898
1899 # Test 'max_depth' limitation.
1900 document = self.xinclude_loader("Recursive1.xml")
1901 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1902 ElementInclude.include(document, self.xinclude_loader, max_depth=None)
1903 self.assertEqual(str(cm.exception),
1904 "recursive include of Recursive2.xml")
1905
1906 document = self.xinclude_loader("Recursive1.xml")
1907 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1908 ElementInclude.include(document, self.xinclude_loader, max_depth=0)
1909 self.assertEqual(str(cm.exception),
1910 "maximum xinclude depth reached when including file Recursive2.xml")
1911
1912 document = self.xinclude_loader("Recursive1.xml")
1913 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1914 ElementInclude.include(document, self.xinclude_loader, max_depth=1)
1915 self.assertEqual(str(cm.exception),
1916 "maximum xinclude depth reached when including file Recursive3.xml")
1917
1918 document = self.xinclude_loader("Recursive1.xml")
1919 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1920 ElementInclude.include(document, self.xinclude_loader, max_depth=2)
1921 self.assertEqual(str(cm.exception),
1922 "maximum xinclude depth reached when including file Recursive1.xml")
1923
1924 document = self.xinclude_loader("Recursive1.xml")
1925 with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1926 ElementInclude.include(document, self.xinclude_loader, max_depth=3)
1927 self.assertEqual(str(cm.exception),
1928 "recursive include of Recursive2.xml")
1929
1930
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001931# --------------------------------------------------------------------
1932# reported bugs
1933
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001934class BugsTest(unittest.TestCase):
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001935
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001936 def test_bug_xmltoolkit21(self):
1937 # marshaller gives obscure errors for non-string values
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001938
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001939 def check(elem):
1940 with self.assertRaises(TypeError) as cm:
1941 serialize(elem)
1942 self.assertEqual(str(cm.exception),
1943 'cannot serialize 123 (type int)')
Armin Rigo9ed73062005-12-14 18:10:45 +00001944
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001945 elem = ET.Element(123)
1946 check(elem) # tag
Armin Rigo9ed73062005-12-14 18:10:45 +00001947
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001948 elem = ET.Element("elem")
1949 elem.text = 123
1950 check(elem) # text
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001951
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001952 elem = ET.Element("elem")
1953 elem.tail = 123
1954 check(elem) # tail
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001955
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001956 elem = ET.Element("elem")
1957 elem.set(123, "123")
1958 check(elem) # attribute key
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001959
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001960 elem = ET.Element("elem")
1961 elem.set("123", 123)
1962 check(elem) # attribute value
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001963
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001964 def test_bug_xmltoolkit25(self):
1965 # typo in ElementTree.findtext
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001966
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001967 elem = ET.XML(SAMPLE_XML)
1968 tree = ET.ElementTree(elem)
1969 self.assertEqual(tree.findtext("tag"), 'text')
1970 self.assertEqual(tree.findtext("section/tag"), 'subtext')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001971
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001972 def test_bug_xmltoolkit28(self):
1973 # .//tag causes exceptions
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001974
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001975 tree = ET.XML("<doc><table><tbody/></table></doc>")
1976 self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1977 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001978
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001979 def test_bug_xmltoolkitX1(self):
1980 # dump() doesn't flush the output buffer
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001981
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001982 tree = ET.XML("<doc><table><tbody/></table></doc>")
1983 with support.captured_stdout() as stdout:
1984 ET.dump(tree)
1985 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001986
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001987 def test_bug_xmltoolkit39(self):
1988 # non-ascii element and attribute names doesn't work
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001989
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001990 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1991 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001992
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001993 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1994 b"<tag \xe4ttr='v&#228;lue' />")
1995 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1996 self.assertEqual(ET.tostring(tree, "utf-8"),
1997 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00001998
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02001999 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2000 b'<t\xe4g>text</t\xe4g>')
2001 self.assertEqual(ET.tostring(tree, "utf-8"),
2002 b'<t\xc3\xa4g>text</t\xc3\xa4g>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002003
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002004 tree = ET.Element("t\u00e4g")
2005 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002006
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002007 tree = ET.Element("tag")
2008 tree.set("\u00e4ttr", "v\u00e4lue")
2009 self.assertEqual(ET.tostring(tree, "utf-8"),
2010 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002011
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002012 def test_bug_xmltoolkit54(self):
2013 # problems handling internally defined entities
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002014
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002015 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
2016 '<doc>&ldots;</doc>')
2017 self.assertEqual(serialize(e, encoding="us-ascii"),
2018 b'<doc>&#33328;</doc>')
2019 self.assertEqual(serialize(e), '<doc>\u8230</doc>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002020
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002021 def test_bug_xmltoolkit55(self):
2022 # make sure we're reporting the first error, not the last
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002023
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002024 with self.assertRaises(ET.ParseError) as cm:
2025 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
2026 b'<doc>&ldots;&ndots;&rdots;</doc>')
2027 self.assertEqual(str(cm.exception),
2028 'undefined entity &ldots;: line 1, column 36')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002029
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002030 def test_bug_xmltoolkit60(self):
2031 # Handle crash in stream source.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002032
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002033 class ExceptionFile:
2034 def read(self, x):
2035 raise OSError
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002036
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002037 self.assertRaises(OSError, ET.parse, ExceptionFile())
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002038
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002039 def test_bug_xmltoolkit62(self):
2040 # Don't crash when using custom entities.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002041
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002042 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
Eli Benderskyc4e98a62013-05-19 09:24:43 -07002043 parser = ET.XMLParser()
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002044 parser.entity.update(ENTITIES)
2045 parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002046<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
2047<patent-application-publication>
2048<subdoc-abstract>
2049<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
2050</subdoc-abstract>
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002051</patent-application-publication>""")
2052 t = parser.close()
2053 self.assertEqual(t.find('.//paragraph').text,
2054 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002055
Kushal Das1de47052017-05-24 11:46:43 -07002056 @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002057 def test_bug_xmltoolkit63(self):
2058 # Check reference leak.
2059 def xmltoolkit63():
2060 tree = ET.TreeBuilder()
2061 tree.start("tag", {})
2062 tree.data("text")
2063 tree.end("tag")
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002064
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002065 xmltoolkit63()
2066 count = sys.getrefcount(None)
2067 for i in range(1000):
2068 xmltoolkit63()
2069 self.assertEqual(sys.getrefcount(None), count)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002070
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002071 def test_bug_200708_newline(self):
2072 # Preserve newlines in attributes.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002073
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002074 e = ET.Element('SomeTag', text="def _f():\n return 3\n")
2075 self.assertEqual(ET.tostring(e),
2076 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
2077 self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
2078 'def _f():\n return 3\n')
2079 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
2080 b'<SomeTag text="def _f():&#10; return 3&#10;" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002081
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002082 def test_bug_200708_close(self):
2083 # Test default builder.
2084 parser = ET.XMLParser() # default
2085 parser.feed("<element>some text</element>")
2086 self.assertEqual(parser.close().tag, 'element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002087
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002088 # Test custom builder.
2089 class EchoTarget:
2090 def close(self):
2091 return ET.Element("element") # simulate root
Serhiy Storchaka762ec972017-03-30 18:12:06 +03002092 parser = ET.XMLParser(target=EchoTarget())
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002093 parser.feed("<element>some text</element>")
2094 self.assertEqual(parser.close().tag, 'element')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002095
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002096 def test_bug_200709_default_namespace(self):
2097 e = ET.Element("{default}elem")
2098 s = ET.SubElement(e, "{default}elem")
2099 self.assertEqual(serialize(e, default_namespace="default"), # 1
2100 '<elem xmlns="default"><elem /></elem>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002101
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002102 e = ET.Element("{default}elem")
2103 s = ET.SubElement(e, "{default}elem")
2104 s = ET.SubElement(e, "{not-default}elem")
2105 self.assertEqual(serialize(e, default_namespace="default"), # 2
2106 '<elem xmlns="default" xmlns:ns1="not-default">'
2107 '<elem />'
2108 '<ns1:elem />'
2109 '</elem>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002110
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002111 e = ET.Element("{default}elem")
2112 s = ET.SubElement(e, "{default}elem")
2113 s = ET.SubElement(e, "elem") # unprefixed name
2114 with self.assertRaises(ValueError) as cm:
2115 serialize(e, default_namespace="default") # 3
2116 self.assertEqual(str(cm.exception),
2117 'cannot use non-qualified names with default_namespace option')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002118
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002119 def test_bug_200709_register_namespace(self):
2120 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2121 self.assertEqual(ET.tostring(e),
2122 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
2123 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
2124 e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2125 self.assertEqual(ET.tostring(e),
2126 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002127
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002128 # And the Dublin Core namespace is in the default list:
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002129
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002130 e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
2131 self.assertEqual(ET.tostring(e),
2132 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002133
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002134 def test_bug_200709_element_comment(self):
2135 # Not sure if this can be fixed, really (since the serializer needs
2136 # ET.Comment, not cET.comment).
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002137
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002138 a = ET.Element('a')
2139 a.append(ET.Comment('foo'))
2140 self.assertEqual(a[0].tag, ET.Comment)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002141
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002142 a = ET.Element('a')
2143 a.append(ET.PI('foo'))
2144 self.assertEqual(a[0].tag, ET.PI)
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002145
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002146 def test_bug_200709_element_insert(self):
2147 a = ET.Element('a')
2148 b = ET.SubElement(a, 'b')
2149 c = ET.SubElement(a, 'c')
2150 d = ET.Element('d')
2151 a.insert(0, d)
2152 self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
2153 a.insert(-1, d)
2154 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002155
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002156 def test_bug_200709_iter_comment(self):
2157 a = ET.Element('a')
2158 b = ET.SubElement(a, 'b')
2159 comment_b = ET.Comment("TEST-b")
2160 b.append(comment_b)
2161 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002162
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002163 # --------------------------------------------------------------------
2164 # reported on bugs.python.org
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002165
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002166 def test_bug_1534630(self):
2167 bob = ET.TreeBuilder()
2168 e = bob.data("data")
2169 e = bob.start("tag", {})
2170 e = bob.end("tag")
2171 e = bob.close()
2172 self.assertEqual(serialize(e), '<tag />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002173
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002174 def test_issue6233(self):
2175 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
2176 b'<body>t\xc3\xa3g</body>')
2177 self.assertEqual(ET.tostring(e, 'ascii'),
2178 b"<?xml version='1.0' encoding='ascii'?>\n"
2179 b'<body>t&#227;g</body>')
2180 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2181 b'<body>t\xe3g</body>')
2182 self.assertEqual(ET.tostring(e, 'ascii'),
2183 b"<?xml version='1.0' encoding='ascii'?>\n"
2184 b'<body>t&#227;g</body>')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002185
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002186 def test_issue3151(self):
2187 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
2188 self.assertEqual(e.tag, '{${stuff}}localname')
2189 t = ET.ElementTree(e)
2190 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002191
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002192 def test_issue6565(self):
2193 elem = ET.XML("<body><tag/></body>")
2194 self.assertEqual(summarize_list(elem), ['tag'])
2195 newelem = ET.XML(SAMPLE_XML)
2196 elem[:] = newelem[:]
2197 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002198
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002199 def test_issue10777(self):
2200 # Registering a namespace twice caused a "dictionary changed size during
2201 # iteration" bug.
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002202
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002203 ET.register_namespace('test10777', 'http://myuri/')
2204 ET.register_namespace('test10777', 'http://myuri/')
Georg Brandl90b20672010-12-28 10:38:33 +00002205
Serhiy Storchaka66c08d92015-12-21 11:09:48 +02002206 def test_lost_text(self):
2207 # Issue #25902: Borrowed text can disappear
2208 class Text:
2209 def __bool__(self):
2210 e.text = 'changed'
2211 return True
2212
2213 e = ET.Element('tag')
2214 e.text = Text()
2215 i = e.itertext()
2216 t = next(i)
2217 self.assertIsInstance(t, Text)
2218 self.assertIsInstance(e.text, str)
2219 self.assertEqual(e.text, 'changed')
2220
2221 def test_lost_tail(self):
2222 # Issue #25902: Borrowed tail can disappear
2223 class Text:
2224 def __bool__(self):
2225 e[0].tail = 'changed'
2226 return True
2227
2228 e = ET.Element('root')
2229 e.append(ET.Element('tag'))
2230 e[0].tail = Text()
2231 i = e.itertext()
2232 t = next(i)
2233 self.assertIsInstance(t, Text)
2234 self.assertIsInstance(e[0].tail, str)
2235 self.assertEqual(e[0].tail, 'changed')
2236
2237 def test_lost_elem(self):
2238 # Issue #25902: Borrowed element can disappear
2239 class Tag:
2240 def __eq__(self, other):
2241 e[0] = ET.Element('changed')
2242 next(i)
2243 return True
2244
2245 e = ET.Element('root')
2246 e.append(ET.Element(Tag()))
2247 e.append(ET.Element('tag'))
2248 i = e.iter('tag')
2249 try:
2250 t = next(i)
2251 except ValueError:
2252 self.skipTest('generators are not reentrant')
2253 self.assertIsInstance(t.tag, Tag)
2254 self.assertIsInstance(e[0].tag, str)
2255 self.assertEqual(e[0].tag, 'changed')
2256
Victor Stinnere6d9fcb2017-09-25 01:27:34 -07002257 def check_expat224_utf8_bug(self, text):
2258 xml = b'<a b="%s"/>' % text
2259 root = ET.XML(xml)
2260 self.assertEqual(root.get('b'), text.decode('utf-8'))
2261
2262 def test_expat224_utf8_bug(self):
2263 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
2264 # Check that Expat 2.2.4 fixed the bug.
2265 #
2266 # Test buffer bounds at odd and even positions.
2267
2268 text = b'\xc3\xa0' * 1024
2269 self.check_expat224_utf8_bug(text)
2270
2271 text = b'x' + b'\xc3\xa0' * 1024
2272 self.check_expat224_utf8_bug(text)
2273
2274 def test_expat224_utf8_bug_file(self):
2275 with open(UTF8_BUG_XMLFILE, 'rb') as fp:
2276 raw = fp.read()
2277 root = ET.fromstring(raw)
2278 xmlattr = root.get('b')
2279
2280 # "Parse" manually the XML file to extract the value of the 'b'
2281 # attribute of the <a b='xxx' /> XML element
2282 text = raw.decode('utf-8').strip()
2283 text = text.replace('\r\n', ' ')
2284 text = text[6:-4]
2285 self.assertEqual(root.get('b'), text)
2286
Shantanu4edc95c2020-03-01 22:33:24 -08002287 def test_39495_treebuilder_start(self):
2288 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag")
2289 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None)
2290
Victor Stinnere6d9fcb2017-09-25 01:27:34 -07002291
Antoine Pitrou5b235d02013-04-18 19:37:06 +02002292
Florent Xiclunaf15351d2010-03-13 23:24:31 +00002293# --------------------------------------------------------------------
2294
2295
Eli Bendersky698bdb22013-01-10 06:01:06 -08002296class BasicElementTest(ElementTestCase, unittest.TestCase):
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -04002297
2298 def test___init__(self):
2299 tag = "foo"
2300 attrib = { "zix": "wyp" }
2301
2302 element_foo = ET.Element(tag, attrib)
2303
2304 # traits of an element
2305 self.assertIsInstance(element_foo, ET.Element)
2306 self.assertIn("tag", dir(element_foo))
2307 self.assertIn("attrib", dir(element_foo))
2308 self.assertIn("text", dir(element_foo))
2309 self.assertIn("tail", dir(element_foo))
2310
2311 # string attributes have expected values
2312 self.assertEqual(element_foo.tag, tag)
2313 self.assertIsNone(element_foo.text)
2314 self.assertIsNone(element_foo.tail)
2315
2316 # attrib is a copy
2317 self.assertIsNot(element_foo.attrib, attrib)
2318 self.assertEqual(element_foo.attrib, attrib)
2319
2320 # attrib isn't linked
2321 attrib["bar"] = "baz"
2322 self.assertIsNot(element_foo.attrib, attrib)
2323 self.assertNotEqual(element_foo.attrib, attrib)
2324
Gordon P. Hemsley7d952de2019-09-10 11:22:01 -04002325 def test_copy(self):
2326 # Only run this test if Element.copy() is defined.
2327 if "copy" not in dir(ET.Element):
2328 raise unittest.SkipTest("Element.copy() not present")
2329
2330 element_foo = ET.Element("foo", { "zix": "wyp" })
2331 element_foo.append(ET.Element("bar", { "baz": "qix" }))
2332
2333 with self.assertWarns(DeprecationWarning):
2334 element_foo2 = element_foo.copy()
2335
2336 # elements are not the same
2337 self.assertIsNot(element_foo2, element_foo)
2338
2339 # string attributes are equal
2340 self.assertEqual(element_foo2.tag, element_foo.tag)
2341 self.assertEqual(element_foo2.text, element_foo.text)
2342 self.assertEqual(element_foo2.tail, element_foo.tail)
2343
2344 # number of children is the same
2345 self.assertEqual(len(element_foo2), len(element_foo))
2346
2347 # children are the same
2348 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2349 self.assertIs(child1, child2)
2350
2351 # attrib is a copy
2352 self.assertEqual(element_foo2.attrib, element_foo.attrib)
2353
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -04002354 def test___copy__(self):
2355 element_foo = ET.Element("foo", { "zix": "wyp" })
2356 element_foo.append(ET.Element("bar", { "baz": "qix" }))
2357
2358 element_foo2 = copy.copy(element_foo)
2359
2360 # elements are not the same
2361 self.assertIsNot(element_foo2, element_foo)
2362
2363 # string attributes are equal
2364 self.assertEqual(element_foo2.tag, element_foo.tag)
2365 self.assertEqual(element_foo2.text, element_foo.text)
2366 self.assertEqual(element_foo2.tail, element_foo.tail)
2367
2368 # number of children is the same
2369 self.assertEqual(len(element_foo2), len(element_foo))
2370
2371 # children are the same
2372 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2373 self.assertIs(child1, child2)
2374
2375 # attrib is a copy
2376 self.assertEqual(element_foo2.attrib, element_foo.attrib)
2377
2378 def test___deepcopy__(self):
2379 element_foo = ET.Element("foo", { "zix": "wyp" })
2380 element_foo.append(ET.Element("bar", { "baz": "qix" }))
2381
2382 element_foo2 = copy.deepcopy(element_foo)
2383
2384 # elements are not the same
2385 self.assertIsNot(element_foo2, element_foo)
2386
2387 # string attributes are equal
2388 self.assertEqual(element_foo2.tag, element_foo.tag)
2389 self.assertEqual(element_foo2.text, element_foo.text)
2390 self.assertEqual(element_foo2.tail, element_foo.tail)
2391
2392 # number of children is the same
2393 self.assertEqual(len(element_foo2), len(element_foo))
2394
2395 # children are not the same
2396 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2397 self.assertIsNot(child1, child2)
2398
2399 # attrib is a copy
2400 self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2401 self.assertEqual(element_foo2.attrib, element_foo.attrib)
2402
2403 # attrib isn't linked
2404 element_foo.attrib["bar"] = "baz"
2405 self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2406 self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
2407
Eli Bendersky396e8fc2012-03-23 14:24:20 +02002408 def test_augmentation_type_errors(self):
2409 e = ET.Element('joe')
2410 self.assertRaises(TypeError, e.append, 'b')
2411 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
2412 self.assertRaises(TypeError, e.insert, 0, 'foo')
Serhiy Storchakaf081fd82018-10-19 12:12:57 +03002413 e[:] = [ET.Element('bar')]
2414 with self.assertRaises(TypeError):
2415 e[0] = 'foo'
2416 with self.assertRaises(TypeError):
2417 e[:] = [ET.Element('bar'), 'foo']
2418
2419 if hasattr(e, '__setstate__'):
2420 state = {
2421 'tag': 'tag',
2422 '_children': [None], # non-Element
2423 'attrib': 'attr',
2424 'tail': 'tail',
2425 'text': 'text',
2426 }
2427 self.assertRaises(TypeError, e.__setstate__, state)
2428
2429 if hasattr(e, '__deepcopy__'):
2430 class E(ET.Element):
2431 def __deepcopy__(self, memo):
2432 return None # non-Element
2433 e[:] = [E('bar')]
2434 self.assertRaises(TypeError, copy.deepcopy, e)
Florent Xicluna41fe6152010-04-02 18:52:12 +00002435
Eli Bendersky0192ba32012-03-30 16:38:33 +03002436 def test_cyclic_gc(self):
Eli Benderskya5e82202012-03-31 13:55:38 +03002437 class Dummy:
2438 pass
Eli Bendersky0192ba32012-03-30 16:38:33 +03002439
Eli Benderskya5e82202012-03-31 13:55:38 +03002440 # Test the shortest cycle: d->element->d
2441 d = Dummy()
2442 d.dummyref = ET.Element('joe', attr=d)
2443 wref = weakref.ref(d)
2444 del d
2445 gc_collect()
2446 self.assertIsNone(wref())
Eli Bendersky0192ba32012-03-30 16:38:33 +03002447
Eli Benderskyebf37a22012-04-03 22:02:37 +03002448 # A longer cycle: d->e->e2->d
2449 e = ET.Element('joe')
2450 d = Dummy()
2451 d.dummyref = e
2452 wref = weakref.ref(d)
2453 e2 = ET.SubElement(e, 'foo', attr=d)
2454 del d, e, e2
2455 gc_collect()
2456 self.assertIsNone(wref())
2457
2458 # A cycle between Element objects as children of one another
2459 # e1->e2->e3->e1
2460 e1 = ET.Element('e1')
2461 e2 = ET.Element('e2')
2462 e3 = ET.Element('e3')
Eli Benderskyebf37a22012-04-03 22:02:37 +03002463 e3.append(e1)
Gordon P. Hemsley50fed0b2019-04-28 00:41:43 -04002464 e2.append(e3)
2465 e1.append(e2)
Eli Benderskyebf37a22012-04-03 22:02:37 +03002466 wref = weakref.ref(e1)
2467 del e1, e2, e3
2468 gc_collect()
2469 self.assertIsNone(wref())
2470
2471 def test_weakref(self):
2472 flag = False
2473 def wref_cb(w):
2474 nonlocal flag
2475 flag = True
2476 e = ET.Element('e')
2477 wref = weakref.ref(e, wref_cb)
2478 self.assertEqual(wref().tag, 'e')
2479 del e
Serhiy Storchaka462c1f02021-09-08 18:08:57 +03002480 gc_collect() # For PyPy or other GCs.
Eli Benderskyebf37a22012-04-03 22:02:37 +03002481 self.assertEqual(flag, True)
2482 self.assertEqual(wref(), None)
2483
Eli Benderskya8736902013-01-05 06:26:39 -08002484 def test_get_keyword_args(self):
2485 e1 = ET.Element('foo' , x=1, y=2, z=3)
2486 self.assertEqual(e1.get('x', default=7), 1)
2487 self.assertEqual(e1.get('w', default=7), 7)
2488
Eli Bendersky7ec45f72012-12-30 06:17:49 -08002489 def test_pickle(self):
Eli Bendersky698bdb22013-01-10 06:01:06 -08002490 # issue #16076: the C implementation wasn't pickleable.
Serhiy Storchakabad12572014-12-15 14:03:42 +02002491 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2492 for dumper, loader in product(self.modules, repeat=2):
2493 e = dumper.Element('foo', bar=42)
2494 e.text = "text goes here"
2495 e.tail = "opposite of head"
2496 dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
2497 e.append(dumper.Element('child'))
2498 e.findall('.//grandchild')[0].set('attr', 'other value')
Eli Bendersky7ec45f72012-12-30 06:17:49 -08002499
Serhiy Storchakabad12572014-12-15 14:03:42 +02002500 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
2501 dumper, loader, proto)
Eli Bendersky698bdb22013-01-10 06:01:06 -08002502
Serhiy Storchakabad12572014-12-15 14:03:42 +02002503 self.assertEqual(e2.tag, 'foo')
2504 self.assertEqual(e2.attrib['bar'], 42)
2505 self.assertEqual(len(e2), 2)
2506 self.assertEqualElements(e, e2)
Eli Bendersky396e8fc2012-03-23 14:24:20 +02002507
Eli Benderskydd3661e2013-09-13 06:24:25 -07002508 def test_pickle_issue18997(self):
Serhiy Storchakabad12572014-12-15 14:03:42 +02002509 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2510 for dumper, loader in product(self.modules, repeat=2):
2511 XMLTEXT = """<?xml version="1.0"?>
2512 <group><dogs>4</dogs>
2513 </group>"""
2514 e1 = dumper.fromstring(XMLTEXT)
2515 if hasattr(e1, '__getstate__'):
2516 self.assertEqual(e1.__getstate__()['tag'], 'group')
2517 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
2518 dumper, loader, proto)
2519 self.assertEqual(e2.tag, 'group')
2520 self.assertEqual(e2[0].tag, 'dogs')
Eli Benderskydd3661e2013-09-13 06:24:25 -07002521
Eli Bendersky23687042013-02-26 05:53:23 -08002522
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002523class BadElementTest(ElementTestCase, unittest.TestCase):
2524 def test_extend_mutable_list(self):
2525 class X:
2526 @property
2527 def __class__(self):
2528 L[:] = [ET.Element('baz')]
2529 return ET.Element
2530 L = [X()]
2531 e = ET.Element('foo')
2532 try:
2533 e.extend(L)
2534 except TypeError:
2535 pass
2536
2537 class Y(X, ET.Element):
2538 pass
2539 L = [Y('x')]
2540 e = ET.Element('foo')
2541 e.extend(L)
2542
2543 def test_extend_mutable_list2(self):
2544 class X:
2545 @property
2546 def __class__(self):
2547 del L[:]
2548 return ET.Element
2549 L = [X(), ET.Element('baz')]
2550 e = ET.Element('foo')
2551 try:
2552 e.extend(L)
2553 except TypeError:
2554 pass
2555
2556 class Y(X, ET.Element):
2557 pass
2558 L = [Y('bar'), ET.Element('baz')]
2559 e = ET.Element('foo')
2560 e.extend(L)
2561
2562 def test_remove_with_mutating(self):
2563 class X(ET.Element):
2564 def __eq__(self, o):
2565 del e[:]
2566 return False
2567 e = ET.Element('foo')
2568 e.extend([X('bar')])
2569 self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2570
2571 e = ET.Element('foo')
2572 e.extend([ET.Element('bar')])
2573 self.assertRaises(ValueError, e.remove, X('baz'))
2574
Serhiy Storchaka9062c262016-06-12 09:43:55 +03002575 def test_recursive_repr(self):
2576 # Issue #25455
2577 e = ET.Element('foo')
2578 with swap_attr(e, 'tag', e):
2579 with self.assertRaises(RuntimeError):
2580 repr(e) # Should not crash
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002581
Serhiy Storchaka576def02017-03-30 09:47:31 +03002582 def test_element_get_text(self):
2583 # Issue #27863
2584 class X(str):
2585 def __del__(self):
2586 try:
2587 elem.text
2588 except NameError:
2589 pass
2590
2591 b = ET.TreeBuilder()
2592 b.start('tag', {})
2593 b.data('ABCD')
2594 b.data(X('EFGH'))
2595 b.data('IJKL')
2596 b.end('tag')
2597
2598 elem = b.close()
2599 self.assertEqual(elem.text, 'ABCDEFGHIJKL')
2600
2601 def test_element_get_tail(self):
2602 # Issue #27863
2603 class X(str):
2604 def __del__(self):
2605 try:
2606 elem[0].tail
2607 except NameError:
2608 pass
2609
2610 b = ET.TreeBuilder()
2611 b.start('root', {})
2612 b.start('tag', {})
2613 b.end('tag')
2614 b.data('ABCD')
2615 b.data(X('EFGH'))
2616 b.data('IJKL')
2617 b.end('root')
2618
2619 elem = b.close()
2620 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
2621
Serhiy Storchaka576def02017-03-30 09:47:31 +03002622 def test_subscr(self):
2623 # Issue #27863
2624 class X:
2625 def __index__(self):
2626 del e[:]
2627 return 1
2628
2629 e = ET.Element('elem')
2630 e.append(ET.Element('child'))
2631 e[:X()] # shouldn't crash
2632
2633 e.append(ET.Element('child'))
2634 e[0:10:X()] # shouldn't crash
2635
2636 def test_ass_subscr(self):
2637 # Issue #27863
2638 class X:
2639 def __index__(self):
2640 e[:] = []
2641 return 1
2642
2643 e = ET.Element('elem')
2644 for _ in range(10):
2645 e.insert(0, ET.Element('child'))
2646
2647 e[0:10:X()] = [] # shouldn't crash
2648
2649 def test_treebuilder_start(self):
2650 # Issue #27863
2651 def element_factory(x, y):
2652 return []
2653 b = ET.TreeBuilder(element_factory=element_factory)
2654
2655 b.start('tag', {})
2656 b.data('ABCD')
2657 self.assertRaises(AttributeError, b.start, 'tag2', {})
2658 del b
2659 gc_collect()
2660
2661 def test_treebuilder_end(self):
2662 # Issue #27863
2663 def element_factory(x, y):
2664 return []
2665 b = ET.TreeBuilder(element_factory=element_factory)
2666
2667 b.start('tag', {})
2668 b.data('ABCD')
2669 self.assertRaises(AttributeError, b.end, 'tag')
2670 del b
2671 gc_collect()
2672
2673
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03002674class MutatingElementPath(str):
2675 def __new__(cls, elem, *args):
2676 self = str.__new__(cls, *args)
2677 self.elem = elem
2678 return self
2679 def __eq__(self, o):
2680 del self.elem[:]
2681 return True
2682MutatingElementPath.__hash__ = str.__hash__
2683
2684class BadElementPath(str):
2685 def __eq__(self, o):
2686 raise 1/0
2687BadElementPath.__hash__ = str.__hash__
2688
2689class BadElementPathTest(ElementTestCase, unittest.TestCase):
2690 def setUp(self):
2691 super().setUp()
2692 from xml.etree import ElementPath
2693 self.path_cache = ElementPath._cache
2694 ElementPath._cache = {}
2695
2696 def tearDown(self):
2697 from xml.etree import ElementPath
2698 ElementPath._cache = self.path_cache
2699 super().tearDown()
2700
2701 def test_find_with_mutating(self):
2702 e = ET.Element('foo')
2703 e.extend([ET.Element('bar')])
2704 e.find(MutatingElementPath(e, 'x'))
2705
2706 def test_find_with_error(self):
2707 e = ET.Element('foo')
2708 e.extend([ET.Element('bar')])
2709 try:
2710 e.find(BadElementPath('x'))
2711 except ZeroDivisionError:
2712 pass
2713
2714 def test_findtext_with_mutating(self):
2715 e = ET.Element('foo')
2716 e.extend([ET.Element('bar')])
2717 e.findtext(MutatingElementPath(e, 'x'))
2718
2719 def test_findtext_with_error(self):
2720 e = ET.Element('foo')
2721 e.extend([ET.Element('bar')])
2722 try:
2723 e.findtext(BadElementPath('x'))
2724 except ZeroDivisionError:
2725 pass
2726
2727 def test_findall_with_mutating(self):
2728 e = ET.Element('foo')
2729 e.extend([ET.Element('bar')])
2730 e.findall(MutatingElementPath(e, 'x'))
2731
2732 def test_findall_with_error(self):
2733 e = ET.Element('foo')
2734 e.extend([ET.Element('bar')])
2735 try:
2736 e.findall(BadElementPath('x'))
2737 except ZeroDivisionError:
2738 pass
2739
2740
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02002741class ElementTreeTypeTest(unittest.TestCase):
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01002742 def test_istype(self):
2743 self.assertIsInstance(ET.ParseError, type)
2744 self.assertIsInstance(ET.QName, type)
2745 self.assertIsInstance(ET.ElementTree, type)
Eli Bendersky092af1f2012-03-04 07:14:03 +02002746 self.assertIsInstance(ET.Element, type)
Eli Bendersky64d11e62012-06-15 07:42:50 +03002747 self.assertIsInstance(ET.TreeBuilder, type)
2748 self.assertIsInstance(ET.XMLParser, type)
Eli Bendersky092af1f2012-03-04 07:14:03 +02002749
2750 def test_Element_subclass_trivial(self):
2751 class MyElement(ET.Element):
2752 pass
2753
2754 mye = MyElement('foo')
2755 self.assertIsInstance(mye, ET.Element)
2756 self.assertIsInstance(mye, MyElement)
2757 self.assertEqual(mye.tag, 'foo')
2758
Eli Benderskyb20df952012-05-20 06:33:29 +03002759 # test that attribute assignment works (issue 14849)
2760 mye.text = "joe"
2761 self.assertEqual(mye.text, "joe")
2762
Eli Bendersky092af1f2012-03-04 07:14:03 +02002763 def test_Element_subclass_constructor(self):
2764 class MyElement(ET.Element):
2765 def __init__(self, tag, attrib={}, **extra):
2766 super(MyElement, self).__init__(tag + '__', attrib, **extra)
2767
2768 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2769 self.assertEqual(mye.tag, 'foo__')
2770 self.assertEqual(sorted(mye.items()),
2771 [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2772
2773 def test_Element_subclass_new_method(self):
2774 class MyElement(ET.Element):
2775 def newmethod(self):
2776 return self.tag
2777
2778 mye = MyElement('joe')
2779 self.assertEqual(mye.newmethod(), 'joe')
Eli Benderskyda578192012-02-16 06:52:39 +02002780
Serhiy Storchakab11c5662018-10-14 10:32:19 +03002781 def test_Element_subclass_find(self):
2782 class MyElement(ET.Element):
2783 pass
2784
2785 e = ET.Element('foo')
2786 e.text = 'text'
2787 sub = MyElement('bar')
2788 sub.text = 'subtext'
2789 e.append(sub)
2790 self.assertEqual(e.findtext('bar'), 'subtext')
2791 self.assertEqual(e.find('bar').tag, 'bar')
2792 found = list(e.findall('bar'))
2793 self.assertEqual(len(found), 1, found)
2794 self.assertEqual(found[0].tag, 'bar')
2795
Eli Benderskyceab1a92013-01-12 07:42:46 -08002796
2797class ElementFindTest(unittest.TestCase):
2798 def test_find_simple(self):
2799 e = ET.XML(SAMPLE_XML)
2800 self.assertEqual(e.find('tag').tag, 'tag')
2801 self.assertEqual(e.find('section/tag').tag, 'tag')
2802 self.assertEqual(e.find('./tag').tag, 'tag')
2803
2804 e[2] = ET.XML(SAMPLE_SECTION)
2805 self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2806
2807 self.assertEqual(e.findtext('./tag'), 'text')
2808 self.assertEqual(e.findtext('section/tag'), 'subtext')
2809
2810 # section/nexttag is found but has no text
2811 self.assertEqual(e.findtext('section/nexttag'), '')
2812 self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2813
2814 # tog doesn't exist and 'default' kicks in
2815 self.assertIsNone(e.findtext('tog'))
2816 self.assertEqual(e.findtext('tog', 'default'), 'default')
2817
Eli Bendersky25771b32013-01-13 05:26:07 -08002818 # Issue #16922
2819 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2820
Eli Benderskya80f7612013-01-22 06:12:54 -08002821 def test_find_xpath(self):
2822 LINEAR_XML = '''
2823 <body>
2824 <tag class='a'/>
2825 <tag class='b'/>
2826 <tag class='c'/>
2827 <tag class='d'/>
2828 </body>'''
2829 e = ET.XML(LINEAR_XML)
2830
2831 # Test for numeric indexing and last()
2832 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2833 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2834 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2835 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2836 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2837
Eli Bendersky5c6198b2013-01-24 06:29:26 -08002838 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2839 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2840 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2841 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2842
Eli Benderskyceab1a92013-01-12 07:42:46 -08002843 def test_findall(self):
2844 e = ET.XML(SAMPLE_XML)
2845 e[2] = ET.XML(SAMPLE_SECTION)
2846 self.assertEqual(summarize_list(e.findall('.')), ['body'])
2847 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2848 self.assertEqual(summarize_list(e.findall('tog')), [])
2849 self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2850 self.assertEqual(summarize_list(e.findall('*')),
2851 ['tag', 'tag', 'section'])
2852 self.assertEqual(summarize_list(e.findall('.//tag')),
2853 ['tag'] * 4)
2854 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2855 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2856 self.assertEqual(summarize_list(e.findall('section/*')),
2857 ['tag', 'nexttag', 'nextsection'])
2858 self.assertEqual(summarize_list(e.findall('section//*')),
2859 ['tag', 'nexttag', 'nextsection', 'tag'])
2860 self.assertEqual(summarize_list(e.findall('section/.//*')),
2861 ['tag', 'nexttag', 'nextsection', 'tag'])
2862 self.assertEqual(summarize_list(e.findall('*/*')),
2863 ['tag', 'nexttag', 'nextsection'])
2864 self.assertEqual(summarize_list(e.findall('*//*')),
2865 ['tag', 'nexttag', 'nextsection', 'tag'])
2866 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
2867 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
2868 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
2869 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
2870
2871 self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
2872 ['tag'] * 3)
2873 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
2874 ['tag'])
Ammar Askar97e8b1e2020-11-09 02:02:39 -05002875 self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')),
2876 ['tag'] * 2)
Eli Benderskyceab1a92013-01-12 07:42:46 -08002877 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
2878 ['tag'] * 2)
Ammar Askar97e8b1e2020-11-09 02:02:39 -05002879 self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')),
2880 ['tag'])
Eli Benderskyceab1a92013-01-12 07:42:46 -08002881 self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
2882 ['tag'])
2883 self.assertEqual(summarize_list(e.findall('.//section[tag]')),
2884 ['section'])
2885 self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
2886 self.assertEqual(summarize_list(e.findall('../tag')), [])
2887 self.assertEqual(summarize_list(e.findall('section/../tag')),
2888 ['tag'] * 2)
2889 self.assertEqual(e.findall('section//'), e.findall('section//*'))
2890
scoder101a5e82017-09-30 15:35:21 +02002891 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2892 ['section'])
2893 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2894 ['section'])
2895 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2896 ['section'])
2897 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2898 ['section'])
2899 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2900 ['section'])
2901
Ammar Askar97e8b1e2020-11-09 02:02:39 -05002902 # Negations of above tests. They match nothing because the sole section
2903 # tag has subtext.
2904 self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")),
2905 [])
2906 self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")),
2907 [])
2908 self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")),
2909 [])
2910 self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")),
2911 [])
2912 self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")),
2913 [])
2914
scoder101a5e82017-09-30 15:35:21 +02002915 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2916 ['tag'])
2917 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2918 ['tag'])
2919 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2920 ['tag'])
2921 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2922 ['tag'])
2923 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2924 ['tag'])
2925 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2926 [])
2927 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2928 [])
2929
Ammar Askar97e8b1e2020-11-09 02:02:39 -05002930 # Negations of above tests.
2931 # Matches everything but the tag containing subtext
2932 self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")),
2933 ['tag'] * 3)
2934 self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")),
2935 ['tag'] * 3)
2936 self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')),
2937 ['tag'] * 3)
2938 self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')),
2939 ['tag'] * 3)
2940 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")),
2941 ['tag'] * 3)
2942 # Matches all tags.
2943 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")),
2944 ['tag'] * 4)
2945 self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")),
2946 ['tag'] * 4)
2947
scoder101a5e82017-09-30 15:35:21 +02002948 # duplicate section => 2x tag matches
2949 e[1] = e[2]
2950 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2951 ['section', 'section'])
2952 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2953 ['tag', 'tag'])
2954
Eli Benderskyceab1a92013-01-12 07:42:46 -08002955 def test_test_find_with_ns(self):
2956 e = ET.XML(SAMPLE_XML_NS)
2957 self.assertEqual(summarize_list(e.findall('tag')), [])
2958 self.assertEqual(
2959 summarize_list(e.findall("{http://effbot.org/ns}tag")),
2960 ['{http://effbot.org/ns}tag'] * 2)
2961 self.assertEqual(
2962 summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
2963 ['{http://effbot.org/ns}tag'] * 3)
2964
Eli Bendersky2acc5252013-08-03 17:47:47 -07002965 def test_findall_different_nsmaps(self):
2966 root = ET.XML('''
2967 <a xmlns:x="X" xmlns:y="Y">
2968 <x:b><c/></x:b>
2969 <b/>
2970 <c><x:b/><b/></c><y:b/>
2971 </a>''')
2972 nsmap = {'xx': 'X'}
2973 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2974 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2975 nsmap = {'xx': 'Y'}
2976 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2977 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
Stefan Behnele8113f52019-04-18 19:05:03 +02002978 nsmap = {'xx': 'X', '': 'Y'}
Stefan Behnele9927e12019-04-14 10:09:09 +02002979 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2980 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
Eli Bendersky2acc5252013-08-03 17:47:47 -07002981
Stefan Behnel47541682019-05-03 20:58:16 +02002982 def test_findall_wildcard(self):
2983 root = ET.XML('''
2984 <a xmlns:x="X" xmlns:y="Y">
2985 <x:b><c/></x:b>
2986 <b/>
2987 <c><x:b/><b/></c><y:b/>
2988 </a>''')
2989 root.append(ET.Comment('test'))
2990
2991 self.assertEqual(summarize_list(root.findall("{*}b")),
2992 ['{X}b', 'b', '{Y}b'])
2993 self.assertEqual(summarize_list(root.findall("{*}c")),
2994 ['c'])
2995 self.assertEqual(summarize_list(root.findall("{X}*")),
2996 ['{X}b'])
2997 self.assertEqual(summarize_list(root.findall("{Y}*")),
2998 ['{Y}b'])
2999 self.assertEqual(summarize_list(root.findall("{}*")),
3000 ['b', 'c'])
3001 self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency
3002 ['b'])
3003 self.assertEqual(summarize_list(root.findall("{}b")),
3004 summarize_list(root.findall("b")))
3005 self.assertEqual(summarize_list(root.findall("{*}*")),
3006 ['{X}b', 'b', 'c', '{Y}b'])
3007 # This is an unfortunate difference, but that's how find('*') works.
3008 self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
3009 summarize_list(root.findall("*")))
3010
3011 self.assertEqual(summarize_list(root.findall(".//{*}b")),
3012 ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
3013 self.assertEqual(summarize_list(root.findall(".//{*}c")),
3014 ['c', 'c'])
3015 self.assertEqual(summarize_list(root.findall(".//{X}*")),
3016 ['{X}b', '{X}b'])
3017 self.assertEqual(summarize_list(root.findall(".//{Y}*")),
3018 ['{Y}b'])
3019 self.assertEqual(summarize_list(root.findall(".//{}*")),
3020 ['c', 'b', 'c', 'b'])
3021 self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency
3022 ['b', 'b'])
3023 self.assertEqual(summarize_list(root.findall(".//{}b")),
3024 summarize_list(root.findall(".//b")))
3025
Eli Benderskyceab1a92013-01-12 07:42:46 -08003026 def test_bad_find(self):
3027 e = ET.XML(SAMPLE_XML)
3028 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
3029 e.findall('/tag')
Eli Benderskyc31f7732013-01-12 07:44:32 -08003030
Eli Benderskyceab1a92013-01-12 07:42:46 -08003031 def test_find_through_ElementTree(self):
3032 e = ET.XML(SAMPLE_XML)
3033 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
3034 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
3035 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
3036 ['tag'] * 2)
3037 # this produces a warning
Serhiy Storchaka762ec972017-03-30 18:12:06 +03003038 msg = ("This search is broken in 1.3 and earlier, and will be fixed "
3039 "in a future version. If you rely on the current behaviour, "
3040 "change it to '.+'")
3041 with self.assertWarnsRegex(FutureWarning, msg):
3042 it = ET.ElementTree(e).findall('//tag')
3043 self.assertEqual(summarize_list(it), ['tag'] * 3)
Eli Benderskyc31f7732013-01-12 07:44:32 -08003044
Eli Benderskyceab1a92013-01-12 07:42:46 -08003045
Eli Bendersky64d11e62012-06-15 07:42:50 +03003046class ElementIterTest(unittest.TestCase):
3047 def _ilist(self, elem, tag=None):
3048 return summarize_list(elem.iter(tag))
3049
3050 def test_basic(self):
3051 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
3052 self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
3053 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
3054 self.assertEqual(next(doc.iter()).tag, 'html')
3055 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
3056 self.assertEqual(''.join(doc.find('body').itertext()),
3057 'this is a paragraph.')
3058 self.assertEqual(next(doc.itertext()), 'this is a ')
3059
3060 # iterparse should return an iterator
3061 sourcefile = serialize(doc, to_string=False)
3062 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
3063
Mike53f7a7c2017-12-14 14:04:53 +03003064 # With an explicit parser too (issue #9708)
Eli Benderskyaaa97802013-01-24 07:15:19 -08003065 sourcefile = serialize(doc, to_string=False)
3066 parser = ET.XMLParser(target=ET.TreeBuilder())
3067 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
3068 'end')
3069
Eli Bendersky64d11e62012-06-15 07:42:50 +03003070 tree = ET.ElementTree(None)
3071 self.assertRaises(AttributeError, tree.iter)
3072
Eli Benderskye6174ca2013-01-10 06:27:53 -08003073 # Issue #16913
3074 doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
3075 self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
3076
Eli Bendersky64d11e62012-06-15 07:42:50 +03003077 def test_corners(self):
3078 # single root, no subelements
3079 a = ET.Element('a')
3080 self.assertEqual(self._ilist(a), ['a'])
3081
3082 # one child
3083 b = ET.SubElement(a, 'b')
3084 self.assertEqual(self._ilist(a), ['a', 'b'])
3085
3086 # one child and one grandchild
3087 c = ET.SubElement(b, 'c')
3088 self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
3089
3090 # two children, only first with grandchild
3091 d = ET.SubElement(a, 'd')
3092 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
3093
3094 # replace first child by second
3095 a[0] = a[1]
3096 del a[1]
3097 self.assertEqual(self._ilist(a), ['a', 'd'])
3098
3099 def test_iter_by_tag(self):
3100 doc = ET.XML('''
3101 <document>
3102 <house>
3103 <room>bedroom1</room>
3104 <room>bedroom2</room>
3105 </house>
3106 <shed>nothing here
3107 </shed>
3108 <house>
3109 <room>bedroom8</room>
3110 </house>
3111 </document>''')
3112
3113 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
3114 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
3115
Eli Benderskya8736902013-01-05 06:26:39 -08003116 # test that iter also accepts 'tag' as a keyword arg
3117 self.assertEqual(
3118 summarize_list(doc.iter(tag='room')),
3119 ['room'] * 3)
3120
Eli Bendersky64d11e62012-06-15 07:42:50 +03003121 # make sure both tag=None and tag='*' return all tags
3122 all_tags = ['document', 'house', 'room', 'room',
3123 'shed', 'house', 'room']
Serhiy Storchaka036fb152016-10-25 10:37:01 +03003124 self.assertEqual(summarize_list(doc.iter()), all_tags)
Eli Bendersky64d11e62012-06-15 07:42:50 +03003125 self.assertEqual(self._ilist(doc), all_tags)
3126 self.assertEqual(self._ilist(doc, '*'), all_tags)
3127
Serhiy Storchakad7a44152015-11-12 11:23:04 +02003128 def test_copy(self):
3129 a = ET.Element('a')
3130 it = a.iter()
3131 with self.assertRaises(TypeError):
3132 copy.copy(it)
3133
3134 def test_pickle(self):
3135 a = ET.Element('a')
3136 it = a.iter()
3137 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3138 with self.assertRaises((TypeError, pickle.PicklingError)):
3139 pickle.dumps(it, proto)
3140
Eli Bendersky64d11e62012-06-15 07:42:50 +03003141
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003142class TreeBuilderTest(unittest.TestCase):
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003143 sample1 = ('<!DOCTYPE html PUBLIC'
3144 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3145 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
Antoine Pitrouee329312012-10-04 19:53:29 +02003146 '<html>text<div>subtext</div>tail</html>')
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003147
Eli Bendersky48d358b2012-05-30 17:57:50 +03003148 sample2 = '''<toplevel>sometext</toplevel>'''
3149
Antoine Pitrouee329312012-10-04 19:53:29 +02003150 def _check_sample1_element(self, e):
3151 self.assertEqual(e.tag, 'html')
3152 self.assertEqual(e.text, 'text')
3153 self.assertEqual(e.tail, None)
3154 self.assertEqual(e.attrib, {})
3155 children = list(e)
3156 self.assertEqual(len(children), 1)
3157 child = children[0]
3158 self.assertEqual(child.tag, 'div')
3159 self.assertEqual(child.text, 'subtext')
3160 self.assertEqual(child.tail, 'tail')
3161 self.assertEqual(child.attrib, {})
3162
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003163 def test_dummy_builder(self):
3164 class BaseDummyBuilder:
3165 def close(self):
3166 return 42
3167
3168 class DummyBuilder(BaseDummyBuilder):
3169 data = start = end = lambda *a: None
3170
3171 parser = ET.XMLParser(target=DummyBuilder())
3172 parser.feed(self.sample1)
3173 self.assertEqual(parser.close(), 42)
3174
3175 parser = ET.XMLParser(target=BaseDummyBuilder())
3176 parser.feed(self.sample1)
3177 self.assertEqual(parser.close(), 42)
3178
3179 parser = ET.XMLParser(target=object())
3180 parser.feed(self.sample1)
3181 self.assertIsNone(parser.close())
3182
Stefan Behnel43851a22019-05-01 21:20:38 +02003183 def test_treebuilder_comment(self):
3184 b = ET.TreeBuilder()
3185 self.assertEqual(b.comment('ctext').tag, ET.Comment)
3186 self.assertEqual(b.comment('ctext').text, 'ctext')
3187
3188 b = ET.TreeBuilder(comment_factory=ET.Comment)
3189 self.assertEqual(b.comment('ctext').tag, ET.Comment)
3190 self.assertEqual(b.comment('ctext').text, 'ctext')
3191
3192 b = ET.TreeBuilder(comment_factory=len)
3193 self.assertEqual(b.comment('ctext'), len('ctext'))
3194
3195 def test_treebuilder_pi(self):
3196 b = ET.TreeBuilder()
3197 self.assertEqual(b.pi('target', None).tag, ET.PI)
3198 self.assertEqual(b.pi('target', None).text, 'target')
3199
3200 b = ET.TreeBuilder(pi_factory=ET.PI)
3201 self.assertEqual(b.pi('target').tag, ET.PI)
3202 self.assertEqual(b.pi('target').text, "target")
3203 self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
3204 self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ")
3205
3206 b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
3207 self.assertEqual(b.pi('target'), (len('target'), None))
3208 self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
3209
Stefan Behnelc6cb4cd2019-07-24 20:08:02 +02003210 def test_late_tail(self):
3211 # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3212 class TreeBuilderSubclass(ET.TreeBuilder):
3213 pass
3214
3215 xml = "<a>text<!-- comment -->tail</a>"
3216 a = ET.fromstring(xml)
3217 self.assertEqual(a.text, "texttail")
3218
3219 parser = ET.XMLParser(target=TreeBuilderSubclass())
3220 parser.feed(xml)
3221 a = parser.close()
3222 self.assertEqual(a.text, "texttail")
3223
3224 xml = "<a>text<?pi data?>tail</a>"
3225 a = ET.fromstring(xml)
3226 self.assertEqual(a.text, "texttail")
3227
3228 xml = "<a>text<?pi data?>tail</a>"
3229 parser = ET.XMLParser(target=TreeBuilderSubclass())
3230 parser.feed(xml)
3231 a = parser.close()
3232 self.assertEqual(a.text, "texttail")
3233
3234 def test_late_tail_mix_pi_comments(self):
3235 # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3236 # Test appending tails to comments/pis.
3237 class TreeBuilderSubclass(ET.TreeBuilder):
3238 pass
3239
3240 xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
3241 parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
3242 parser.feed(xml)
3243 a = parser.close()
3244 self.assertEqual(a[0].text, ' comment ')
3245 self.assertEqual(a[0].tail, '\ntail')
3246 self.assertEqual(a.text, "text ")
3247
3248 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
3249 parser.feed(xml)
3250 a = parser.close()
3251 self.assertEqual(a[0].text, ' comment ')
3252 self.assertEqual(a[0].tail, '\ntail')
3253 self.assertEqual(a.text, "text ")
3254
3255 xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3256 parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3257 parser.feed(xml)
3258 a = parser.close()
3259 self.assertEqual(a[0].text, 'pi data')
3260 self.assertEqual(a[0].tail, 'tail')
3261 self.assertEqual(a.text, "text\n")
3262
3263 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3264 parser.feed(xml)
3265 a = parser.close()
3266 self.assertEqual(a[0].text, 'pi data')
3267 self.assertEqual(a[0].tail, 'tail')
3268 self.assertEqual(a.text, "text\n")
3269
Eli Bendersky08231a92013-05-18 15:47:16 -07003270 def test_treebuilder_elementfactory_none(self):
3271 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
3272 parser.feed(self.sample1)
3273 e = parser.close()
3274 self._check_sample1_element(e)
3275
Eli Bendersky58d548d2012-05-29 15:45:16 +03003276 def test_subclass(self):
3277 class MyTreeBuilder(ET.TreeBuilder):
3278 def foobar(self, x):
3279 return x * 2
3280
3281 tb = MyTreeBuilder()
3282 self.assertEqual(tb.foobar(10), 20)
3283
3284 parser = ET.XMLParser(target=tb)
3285 parser.feed(self.sample1)
3286
3287 e = parser.close()
Antoine Pitrouee329312012-10-04 19:53:29 +02003288 self._check_sample1_element(e)
Eli Bendersky58d548d2012-05-29 15:45:16 +03003289
Stefan Behnel43851a22019-05-01 21:20:38 +02003290 def test_subclass_comment_pi(self):
3291 class MyTreeBuilder(ET.TreeBuilder):
3292 def foobar(self, x):
3293 return x * 2
3294
3295 tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
3296 self.assertEqual(tb.foobar(10), 20)
3297
3298 parser = ET.XMLParser(target=tb)
3299 parser.feed(self.sample1)
3300 parser.feed('<!-- a comment--><?and a pi?>')
3301
3302 e = parser.close()
3303 self._check_sample1_element(e)
3304
Eli Bendersky2b711402012-03-16 15:29:50 +02003305 def test_element_factory(self):
Eli Bendersky48d358b2012-05-30 17:57:50 +03003306 lst = []
3307 def myfactory(tag, attrib):
3308 nonlocal lst
3309 lst.append(tag)
3310 return ET.Element(tag, attrib)
3311
3312 tb = ET.TreeBuilder(element_factory=myfactory)
3313 parser = ET.XMLParser(target=tb)
3314 parser.feed(self.sample2)
3315 parser.close()
3316
3317 self.assertEqual(lst, ['toplevel'])
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003318
Antoine Pitrouee329312012-10-04 19:53:29 +02003319 def _check_element_factory_class(self, cls):
3320 tb = ET.TreeBuilder(element_factory=cls)
3321
3322 parser = ET.XMLParser(target=tb)
3323 parser.feed(self.sample1)
3324 e = parser.close()
3325 self.assertIsInstance(e, cls)
3326 self._check_sample1_element(e)
3327
3328 def test_element_factory_subclass(self):
3329 class MyElement(ET.Element):
3330 pass
3331 self._check_element_factory_class(MyElement)
3332
3333 def test_element_factory_pure_python_subclass(self):
Christian Clausscfca4a62021-10-07 17:49:47 +02003334 # Mimic SimpleTAL's behaviour (issue #16089): both versions of
Antoine Pitrouee329312012-10-04 19:53:29 +02003335 # TreeBuilder should be able to cope with a subclass of the
3336 # pure Python Element class.
Eli Bendersky46955b22013-05-19 09:20:50 -07003337 base = ET._Element_Py
Antoine Pitrouee329312012-10-04 19:53:29 +02003338 # Not from a C extension
3339 self.assertEqual(base.__module__, 'xml.etree.ElementTree')
3340 # Force some multiple inheritance with a C class to make things
3341 # more interesting.
3342 class MyElement(base, ValueError):
3343 pass
3344 self._check_element_factory_class(MyElement)
3345
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01003346 def test_doctype(self):
3347 class DoctypeParser:
3348 _doctype = None
3349
3350 def doctype(self, name, pubid, system):
3351 self._doctype = (name, pubid, system)
3352
3353 def close(self):
3354 return self._doctype
3355
3356 parser = ET.XMLParser(target=DoctypeParser())
3357 parser.feed(self.sample1)
3358
3359 self.assertEqual(parser.close(),
3360 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3361 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3362
scoderc8d8e152017-09-14 22:00:03 +02003363 def test_builder_lookup_errors(self):
3364 class RaisingBuilder:
3365 def __init__(self, raise_in=None, what=ValueError):
3366 self.raise_in = raise_in
3367 self.what = what
3368
3369 def __getattr__(self, name):
3370 if name == self.raise_in:
3371 raise self.what(self.raise_in)
3372 def handle(*args):
3373 pass
3374 return handle
3375
3376 ET.XMLParser(target=RaisingBuilder())
3377 # cET also checks for 'close' and 'doctype', PyET does it only at need
3378 for event in ('start', 'data', 'end', 'comment', 'pi'):
3379 with self.assertRaisesRegex(ValueError, event):
3380 ET.XMLParser(target=RaisingBuilder(event))
3381
3382 ET.XMLParser(target=RaisingBuilder(what=AttributeError))
3383 for event in ('start', 'data', 'end', 'comment', 'pi'):
3384 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
3385 parser.feed(self.sample1)
3386 self.assertIsNone(parser.close())
3387
Eli Bendersky175fada2012-06-15 08:37:08 +03003388
Eli Bendersky52467b12012-06-01 07:13:08 +03003389class XMLParserTest(unittest.TestCase):
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003390 sample1 = b'<file><line>22</line></file>'
3391 sample2 = (b'<!DOCTYPE html PUBLIC'
3392 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3393 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3394 b'<html>text</html>')
3395 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
3396 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
Eli Bendersky52467b12012-06-01 07:13:08 +03003397
3398 def _check_sample_element(self, e):
3399 self.assertEqual(e.tag, 'file')
3400 self.assertEqual(e[0].tag, 'line')
3401 self.assertEqual(e[0].text, '22')
3402
3403 def test_constructor_args(self):
Eli Bendersky23687042013-02-26 05:53:23 -08003404 parser2 = ET.XMLParser(encoding='utf-8',
Eli Bendersky23687042013-02-26 05:53:23 -08003405 target=ET.TreeBuilder())
Eli Bendersky52467b12012-06-01 07:13:08 +03003406 parser2.feed(self.sample1)
3407 self._check_sample_element(parser2.close())
3408
3409 def test_subclass(self):
3410 class MyParser(ET.XMLParser):
3411 pass
3412 parser = MyParser()
3413 parser.feed(self.sample1)
3414 self._check_sample_element(parser.close())
3415
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003416 def test_doctype_warning(self):
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003417 with warnings.catch_warnings():
3418 warnings.simplefilter('error', DeprecationWarning)
3419 parser = ET.XMLParser()
3420 parser.feed(self.sample2)
3421 parser.close()
3422
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003423 def test_subclass_doctype(self):
3424 _doctype = None
3425 class MyParserWithDoctype(ET.XMLParser):
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003426 def doctype(self, *args, **kwargs):
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003427 nonlocal _doctype
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003428 _doctype = (args, kwargs)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003429
3430 parser = MyParserWithDoctype()
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003431 with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003432 parser.feed(self.sample2)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003433 parser.close()
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003434 self.assertIsNone(_doctype)
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03003435
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003436 _doctype = _doctype2 = None
3437 with warnings.catch_warnings():
3438 warnings.simplefilter('error', DeprecationWarning)
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003439 warnings.simplefilter('error', RuntimeWarning)
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003440 class DoctypeParser:
3441 def doctype(self, name, pubid, system):
3442 nonlocal _doctype2
3443 _doctype2 = (name, pubid, system)
3444
3445 parser = MyParserWithDoctype(target=DoctypeParser())
3446 parser.feed(self.sample2)
3447 parser.close()
3448 self.assertIsNone(_doctype)
3449 self.assertEqual(_doctype2,
3450 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3451 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3452
3453 def test_inherited_doctype(self):
3454 '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
3455 with warnings.catch_warnings():
3456 warnings.simplefilter('error', DeprecationWarning)
Serhiy Storchaka02ec92f2018-07-24 12:03:34 +03003457 warnings.simplefilter('error', RuntimeWarning)
Serhiy Storchaka05744ac2015-06-29 22:35:58 +03003458 class MyParserWithoutDoctype(ET.XMLParser):
3459 pass
3460 parser = MyParserWithoutDoctype()
3461 parser.feed(self.sample2)
3462 parser.close()
3463
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03003464 def test_parse_string(self):
3465 parser = ET.XMLParser(target=ET.TreeBuilder())
3466 parser.feed(self.sample3)
3467 e = parser.close()
3468 self.assertEqual(e.tag, 'money')
3469 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
3470 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
3471
Eli Bendersky52467b12012-06-01 07:13:08 +03003472
Eli Bendersky737b1732012-05-29 06:02:56 +03003473class NamespaceParseTest(unittest.TestCase):
3474 def test_find_with_namespace(self):
3475 nsmap = {'h': 'hello', 'f': 'foo'}
3476 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
3477
3478 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
3479 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
3480 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
3481
3482
Eli Bendersky865756a2012-03-09 13:38:15 +02003483class ElementSlicingTest(unittest.TestCase):
3484 def _elem_tags(self, elemlist):
3485 return [e.tag for e in elemlist]
3486
3487 def _subelem_tags(self, elem):
3488 return self._elem_tags(list(elem))
3489
3490 def _make_elem_with_children(self, numchildren):
3491 """Create an Element with a tag 'a', with the given amount of children
3492 named 'a0', 'a1' ... and so on.
3493
3494 """
3495 e = ET.Element('a')
3496 for i in range(numchildren):
3497 ET.SubElement(e, 'a%s' % i)
3498 return e
3499
3500 def test_getslice_single_index(self):
3501 e = self._make_elem_with_children(10)
3502
3503 self.assertEqual(e[1].tag, 'a1')
3504 self.assertEqual(e[-2].tag, 'a8')
3505
3506 self.assertRaises(IndexError, lambda: e[12])
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003507 self.assertRaises(IndexError, lambda: e[-12])
Eli Bendersky865756a2012-03-09 13:38:15 +02003508
3509 def test_getslice_range(self):
3510 e = self._make_elem_with_children(6)
3511
3512 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
3513 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
3514 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
3515 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
3516 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
3517 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
3518
3519 def test_getslice_steps(self):
3520 e = self._make_elem_with_children(10)
3521
3522 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
3523 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
3524 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
3525 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003526 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
3527 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
Eli Bendersky865756a2012-03-09 13:38:15 +02003528
3529 def test_getslice_negative_steps(self):
3530 e = self._make_elem_with_children(4)
3531
3532 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
3533 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003534 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
3535 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
3536 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
Eli Bendersky865756a2012-03-09 13:38:15 +02003537
3538 def test_delslice(self):
3539 e = self._make_elem_with_children(4)
3540 del e[0:2]
3541 self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
3542
3543 e = self._make_elem_with_children(4)
3544 del e[0:]
3545 self.assertEqual(self._subelem_tags(e), [])
3546
3547 e = self._make_elem_with_children(4)
3548 del e[::-1]
3549 self.assertEqual(self._subelem_tags(e), [])
3550
3551 e = self._make_elem_with_children(4)
3552 del e[::-2]
3553 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3554
3555 e = self._make_elem_with_children(4)
3556 del e[1::2]
3557 self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3558
3559 e = self._make_elem_with_children(2)
3560 del e[::2]
3561 self.assertEqual(self._subelem_tags(e), ['a1'])
3562
Serhiy Storchaka04d759b2015-11-22 12:18:38 +02003563 def test_setslice_single_index(self):
3564 e = self._make_elem_with_children(4)
3565 e[1] = ET.Element('b')
3566 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3567
3568 e[-2] = ET.Element('c')
3569 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3570
3571 with self.assertRaises(IndexError):
3572 e[5] = ET.Element('d')
3573 with self.assertRaises(IndexError):
3574 e[-5] = ET.Element('d')
3575 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3576
3577 def test_setslice_range(self):
3578 e = self._make_elem_with_children(4)
3579 e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
3580 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
3581
3582 e = self._make_elem_with_children(4)
3583 e[1:3] = [ET.Element('b')]
3584 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
3585
3586 e = self._make_elem_with_children(4)
3587 e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
3588 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
3589
3590 def test_setslice_steps(self):
3591 e = self._make_elem_with_children(6)
3592 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
3593 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
3594
3595 e = self._make_elem_with_children(6)
3596 with self.assertRaises(ValueError):
3597 e[1:5:2] = [ET.Element('b')]
3598 with self.assertRaises(ValueError):
3599 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
3600 with self.assertRaises(ValueError):
3601 e[1:5:2] = []
3602 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
3603
3604 e = self._make_elem_with_children(4)
3605 e[1::sys.maxsize] = [ET.Element('b')]
3606 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3607 e[1::sys.maxsize<<64] = [ET.Element('c')]
3608 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3609
3610 def test_setslice_negative_steps(self):
3611 e = self._make_elem_with_children(4)
3612 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
3613 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
3614
3615 e = self._make_elem_with_children(4)
3616 with self.assertRaises(ValueError):
3617 e[2:0:-1] = [ET.Element('b')]
3618 with self.assertRaises(ValueError):
3619 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
3620 with self.assertRaises(ValueError):
3621 e[2:0:-1] = []
3622 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
3623
3624 e = self._make_elem_with_children(4)
3625 e[1::-sys.maxsize] = [ET.Element('b')]
3626 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3627 e[1::-sys.maxsize-1] = [ET.Element('c')]
3628 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3629 e[1::-sys.maxsize<<64] = [ET.Element('d')]
3630 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
3631
Eli Benderskyf996e772012-03-16 05:53:30 +02003632
Eli Bendersky00f402b2012-07-15 06:02:22 +03003633class IOTest(unittest.TestCase):
Eli Bendersky00f402b2012-07-15 06:02:22 +03003634 def test_encoding(self):
3635 # Test encoding issues.
3636 elem = ET.Element("tag")
3637 elem.text = "abc"
3638 self.assertEqual(serialize(elem), '<tag>abc</tag>')
Martin Panter89f76d32015-09-23 01:14:35 +00003639 for enc in ("utf-8", "us-ascii"):
3640 with self.subTest(enc):
3641 self.assertEqual(serialize(elem, encoding=enc),
3642 b'<tag>abc</tag>')
3643 self.assertEqual(serialize(elem, encoding=enc.upper()),
3644 b'<tag>abc</tag>')
Eli Bendersky00f402b2012-07-15 06:02:22 +03003645 for enc in ("iso-8859-1", "utf-16", "utf-32"):
Martin Panter89f76d32015-09-23 01:14:35 +00003646 with self.subTest(enc):
3647 self.assertEqual(serialize(elem, encoding=enc),
3648 ("<?xml version='1.0' encoding='%s'?>\n"
3649 "<tag>abc</tag>" % enc).encode(enc))
3650 upper = enc.upper()
3651 self.assertEqual(serialize(elem, encoding=upper),
3652 ("<?xml version='1.0' encoding='%s'?>\n"
3653 "<tag>abc</tag>" % upper).encode(enc))
Eli Bendersky00f402b2012-07-15 06:02:22 +03003654
3655 elem = ET.Element("tag")
3656 elem.text = "<&\"\'>"
3657 self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
3658 self.assertEqual(serialize(elem, encoding="utf-8"),
3659 b'<tag>&lt;&amp;"\'&gt;</tag>')
3660 self.assertEqual(serialize(elem, encoding="us-ascii"),
3661 b'<tag>&lt;&amp;"\'&gt;</tag>')
3662 for enc in ("iso-8859-1", "utf-16", "utf-32"):
3663 self.assertEqual(serialize(elem, encoding=enc),
3664 ("<?xml version='1.0' encoding='%s'?>\n"
3665 "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
3666
3667 elem = ET.Element("tag")
3668 elem.attrib["key"] = "<&\"\'>"
3669 self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
3670 self.assertEqual(serialize(elem, encoding="utf-8"),
3671 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3672 self.assertEqual(serialize(elem, encoding="us-ascii"),
3673 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3674 for enc in ("iso-8859-1", "utf-16", "utf-32"):
3675 self.assertEqual(serialize(elem, encoding=enc),
3676 ("<?xml version='1.0' encoding='%s'?>\n"
3677 "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
3678
3679 elem = ET.Element("tag")
3680 elem.text = '\xe5\xf6\xf6<>'
3681 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
3682 self.assertEqual(serialize(elem, encoding="utf-8"),
3683 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
3684 self.assertEqual(serialize(elem, encoding="us-ascii"),
3685 b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
3686 for enc in ("iso-8859-1", "utf-16", "utf-32"):
3687 self.assertEqual(serialize(elem, encoding=enc),
3688 ("<?xml version='1.0' encoding='%s'?>\n"
3689 "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
3690
3691 elem = ET.Element("tag")
3692 elem.attrib["key"] = '\xe5\xf6\xf6<>'
3693 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
3694 self.assertEqual(serialize(elem, encoding="utf-8"),
3695 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
3696 self.assertEqual(serialize(elem, encoding="us-ascii"),
3697 b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
3698 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
3699 self.assertEqual(serialize(elem, encoding=enc),
3700 ("<?xml version='1.0' encoding='%s'?>\n"
3701 "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
3702
3703 def test_write_to_filename(self):
Hai Shideb01622020-07-06 20:29:49 +08003704 self.addCleanup(os_helper.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003705 tree = ET.ElementTree(ET.XML('''<site />'''))
3706 tree.write(TESTFN)
3707 with open(TESTFN, 'rb') as f:
3708 self.assertEqual(f.read(), b'''<site />''')
3709
3710 def test_write_to_text_file(self):
Hai Shideb01622020-07-06 20:29:49 +08003711 self.addCleanup(os_helper.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003712 tree = ET.ElementTree(ET.XML('''<site />'''))
3713 with open(TESTFN, 'w', encoding='utf-8') as f:
3714 tree.write(f, encoding='unicode')
3715 self.assertFalse(f.closed)
3716 with open(TESTFN, 'rb') as f:
3717 self.assertEqual(f.read(), b'''<site />''')
3718
3719 def test_write_to_binary_file(self):
Hai Shideb01622020-07-06 20:29:49 +08003720 self.addCleanup(os_helper.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003721 tree = ET.ElementTree(ET.XML('''<site />'''))
3722 with open(TESTFN, 'wb') as f:
3723 tree.write(f)
3724 self.assertFalse(f.closed)
3725 with open(TESTFN, 'rb') as f:
3726 self.assertEqual(f.read(), b'''<site />''')
3727
3728 def test_write_to_binary_file_with_bom(self):
Hai Shideb01622020-07-06 20:29:49 +08003729 self.addCleanup(os_helper.unlink, TESTFN)
Eli Bendersky00f402b2012-07-15 06:02:22 +03003730 tree = ET.ElementTree(ET.XML('''<site />'''))
3731 # test BOM writing to buffered file
3732 with open(TESTFN, 'wb') as f:
3733 tree.write(f, encoding='utf-16')
3734 self.assertFalse(f.closed)
3735 with open(TESTFN, 'rb') as f:
3736 self.assertEqual(f.read(),
3737 '''<?xml version='1.0' encoding='utf-16'?>\n'''
3738 '''<site />'''.encode("utf-16"))
3739 # test BOM writing to non-buffered file
3740 with open(TESTFN, 'wb', buffering=0) as f:
3741 tree.write(f, encoding='utf-16')
3742 self.assertFalse(f.closed)
3743 with open(TESTFN, 'rb') as f:
3744 self.assertEqual(f.read(),
3745 '''<?xml version='1.0' encoding='utf-16'?>\n'''
3746 '''<site />'''.encode("utf-16"))
3747
Eli Benderskyf996e772012-03-16 05:53:30 +02003748 def test_read_from_stringio(self):
3749 tree = ET.ElementTree()
Eli Bendersky00f402b2012-07-15 06:02:22 +03003750 stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
Eli Benderskyf996e772012-03-16 05:53:30 +02003751 tree.parse(stream)
Eli Benderskyf996e772012-03-16 05:53:30 +02003752 self.assertEqual(tree.getroot().tag, 'site')
3753
Eli Bendersky00f402b2012-07-15 06:02:22 +03003754 def test_write_to_stringio(self):
3755 tree = ET.ElementTree(ET.XML('''<site />'''))
3756 stream = io.StringIO()
3757 tree.write(stream, encoding='unicode')
3758 self.assertEqual(stream.getvalue(), '''<site />''')
3759
3760 def test_read_from_bytesio(self):
3761 tree = ET.ElementTree()
3762 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3763 tree.parse(raw)
3764 self.assertEqual(tree.getroot().tag, 'site')
3765
3766 def test_write_to_bytesio(self):
3767 tree = ET.ElementTree(ET.XML('''<site />'''))
3768 raw = io.BytesIO()
3769 tree.write(raw)
3770 self.assertEqual(raw.getvalue(), b'''<site />''')
3771
3772 class dummy:
3773 pass
3774
3775 def test_read_from_user_text_reader(self):
3776 stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3777 reader = self.dummy()
3778 reader.read = stream.read
3779 tree = ET.ElementTree()
3780 tree.parse(reader)
3781 self.assertEqual(tree.getroot().tag, 'site')
3782
3783 def test_write_to_user_text_writer(self):
3784 tree = ET.ElementTree(ET.XML('''<site />'''))
3785 stream = io.StringIO()
3786 writer = self.dummy()
3787 writer.write = stream.write
3788 tree.write(writer, encoding='unicode')
3789 self.assertEqual(stream.getvalue(), '''<site />''')
3790
3791 def test_read_from_user_binary_reader(self):
3792 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3793 reader = self.dummy()
3794 reader.read = raw.read
3795 tree = ET.ElementTree()
3796 tree.parse(reader)
3797 self.assertEqual(tree.getroot().tag, 'site')
3798 tree = ET.ElementTree()
3799
3800 def test_write_to_user_binary_writer(self):
3801 tree = ET.ElementTree(ET.XML('''<site />'''))
3802 raw = io.BytesIO()
3803 writer = self.dummy()
3804 writer.write = raw.write
3805 tree.write(writer)
3806 self.assertEqual(raw.getvalue(), b'''<site />''')
3807
3808 def test_write_to_user_binary_writer_with_bom(self):
3809 tree = ET.ElementTree(ET.XML('''<site />'''))
3810 raw = io.BytesIO()
3811 writer = self.dummy()
3812 writer.write = raw.write
3813 writer.seekable = lambda: True
3814 writer.tell = raw.tell
3815 tree.write(writer, encoding="utf-16")
3816 self.assertEqual(raw.getvalue(),
3817 '''<?xml version='1.0' encoding='utf-16'?>\n'''
3818 '''<site />'''.encode("utf-16"))
3819
Eli Bendersky426e2482012-07-17 05:45:11 +03003820 def test_tostringlist_invariant(self):
3821 root = ET.fromstring('<tag>foo</tag>')
3822 self.assertEqual(
3823 ET.tostring(root, 'unicode'),
3824 ''.join(ET.tostringlist(root, 'unicode')))
3825 self.assertEqual(
3826 ET.tostring(root, 'utf-16'),
3827 b''.join(ET.tostringlist(root, 'utf-16')))
3828
Eli Benderskya9a2ef52013-01-13 06:04:43 -08003829 def test_short_empty_elements(self):
3830 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
3831 self.assertEqual(
3832 ET.tostring(root, 'unicode'),
3833 '<tag>a<x />b<y />c</tag>')
3834 self.assertEqual(
3835 ET.tostring(root, 'unicode', short_empty_elements=True),
3836 '<tag>a<x />b<y />c</tag>')
3837 self.assertEqual(
3838 ET.tostring(root, 'unicode', short_empty_elements=False),
3839 '<tag>a<x></x>b<y></y>c</tag>')
3840
Eli Benderskyf996e772012-03-16 05:53:30 +02003841
Eli Bendersky5b77d812012-03-16 08:20:05 +02003842class ParseErrorTest(unittest.TestCase):
3843 def test_subclass(self):
3844 self.assertIsInstance(ET.ParseError(), SyntaxError)
3845
3846 def _get_error(self, s):
3847 try:
3848 ET.fromstring(s)
3849 except ET.ParseError as e:
3850 return e
3851
3852 def test_error_position(self):
3853 self.assertEqual(self._get_error('foo').position, (1, 0))
3854 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
3855 self.assertEqual(self._get_error('foobar<').position, (1, 6))
3856
3857 def test_error_code(self):
3858 import xml.parsers.expat.errors as ERRORS
3859 self.assertEqual(self._get_error('foo').code,
3860 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
3861
3862
Eli Bendersky737b1732012-05-29 06:02:56 +03003863class KeywordArgsTest(unittest.TestCase):
3864 # Test various issues with keyword arguments passed to ET.Element
3865 # constructor and methods
3866 def test_issue14818(self):
3867 x = ET.XML("<a>foo</a>")
3868 self.assertEqual(x.find('a', None),
3869 x.find(path='a', namespaces=None))
3870 self.assertEqual(x.findtext('a', None, None),
3871 x.findtext(path='a', default=None, namespaces=None))
3872 self.assertEqual(x.findall('a', None),
3873 x.findall(path='a', namespaces=None))
3874 self.assertEqual(list(x.iterfind('a', None)),
3875 list(x.iterfind(path='a', namespaces=None)))
3876
3877 self.assertEqual(ET.Element('a').attrib, {})
3878 elements = [
3879 ET.Element('a', dict(href="#", id="foo")),
3880 ET.Element('a', attrib=dict(href="#", id="foo")),
3881 ET.Element('a', dict(href="#"), id="foo"),
3882 ET.Element('a', href="#", id="foo"),
3883 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
3884 ]
3885 for e in elements:
3886 self.assertEqual(e.tag, 'a')
3887 self.assertEqual(e.attrib, dict(href="#", id="foo"))
3888
3889 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
3890 self.assertEqual(e2.attrib['key1'], 'value1')
3891
3892 with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3893 ET.Element('a', "I'm not a dict")
3894 with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3895 ET.Element('a', attrib="I'm not a dict")
3896
Eli Bendersky64d11e62012-06-15 07:42:50 +03003897# --------------------------------------------------------------------
3898
Eli Bendersky64d11e62012-06-15 07:42:50 +03003899class NoAcceleratorTest(unittest.TestCase):
Eli Bendersky52280c42012-12-30 06:27:56 -08003900 def setUp(self):
3901 if not pyET:
Eli Bendersky698bdb22013-01-10 06:01:06 -08003902 raise unittest.SkipTest('only for the Python version')
Eli Bendersky52280c42012-12-30 06:27:56 -08003903
Eli Bendersky64d11e62012-06-15 07:42:50 +03003904 # Test that the C accelerator was not imported for pyET
3905 def test_correct_import_pyET(self):
Eli Benderskye26fa1b2013-05-19 17:49:54 -07003906 # The type of methods defined in Python code is types.FunctionType,
3907 # while the type of methods defined inside _elementtree is
3908 # <class 'wrapper_descriptor'>
3909 self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
3910 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
Eli Bendersky64d11e62012-06-15 07:42:50 +03003911
Stefan Behnele1d5dd62019-05-01 22:34:13 +02003912
3913# --------------------------------------------------------------------
3914
3915def c14n_roundtrip(xml, **options):
3916 return pyET.canonicalize(xml, **options)
3917
3918
3919class C14NTest(unittest.TestCase):
3920 maxDiff = None
3921
3922 #
3923 # simple roundtrip tests (from c14n.py)
3924
3925 def test_simple_roundtrip(self):
3926 # Basics
3927 self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
3928 self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
3929 '<doc xmlns="uri"></doc>')
3930 self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
3931 '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
3932 self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
3933 '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
3934 self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
3935 '<elem></elem>')
3936
3937 # C14N spec
3938 self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
3939 '<doc>Hello, world!</doc>')
3940 self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
3941 '<value>2</value>')
3942 self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
3943 '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
3944 self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
3945 '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
3946 self.assertEqual(c14n_roundtrip("<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
3947 '<norm attr=" \' &#xD;&#xA;&#x9; \' "></norm>')
3948 self.assertEqual(c14n_roundtrip("<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>"),
3949 '<normNames attr=" A &#xD;&#xA;&#x9; B "></normNames>')
3950 self.assertEqual(c14n_roundtrip("<normId id=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
3951 '<normId id=" \' &#xD;&#xA;&#x9; \' "></normId>')
3952
3953 # fragments from PJ's tests
3954 #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
3955 #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
3956
scoder6a412c92020-10-03 08:07:07 +02003957 # Namespace issues
3958 xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>'
3959 self.assertEqual(c14n_roundtrip(xml), xml)
3960 xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>'
3961 self.assertEqual(c14n_roundtrip(xml), xml)
3962 xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>'
3963 self.assertEqual(c14n_roundtrip(xml), xml)
3964
Stefan Behnele1d5dd62019-05-01 22:34:13 +02003965 def test_c14n_exclusion(self):
3966 xml = textwrap.dedent("""\
3967 <root xmlns:x="http://example.com/x">
3968 <a x:attr="attrx">
3969 <b>abtext</b>
3970 </a>
3971 <b>btext</b>
3972 <c>
3973 <x:d>dtext</x:d>
3974 </c>
3975 </root>
3976 """)
3977 self.assertEqual(
3978 c14n_roundtrip(xml, strip_text=True),
3979 '<root>'
3980 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3981 '<b>btext</b>'
3982 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3983 '</root>')
3984 self.assertEqual(
3985 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
3986 '<root>'
3987 '<a><b>abtext</b></a>'
3988 '<b>btext</b>'
3989 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3990 '</root>')
3991 self.assertEqual(
3992 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
3993 '<root>'
3994 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3995 '<b>btext</b>'
3996 '<c></c>'
3997 '</root>')
3998 self.assertEqual(
3999 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
4000 exclude_tags=['{http://example.com/x}d']),
4001 '<root>'
4002 '<a><b>abtext</b></a>'
4003 '<b>btext</b>'
4004 '<c></c>'
4005 '</root>')
4006 self.assertEqual(
4007 c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
4008 '<root>'
4009 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4010 '</root>')
4011 self.assertEqual(
4012 c14n_roundtrip(xml, exclude_tags=['a', 'b']),
4013 '<root>\n'
4014 ' \n'
4015 ' \n'
4016 ' <c>\n'
4017 ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
4018 ' </c>\n'
4019 '</root>')
4020 self.assertEqual(
4021 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
4022 '<root>'
4023 '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
4024 '<c></c>'
4025 '</root>')
4026 self.assertEqual(
4027 c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
4028 '<root>\n'
4029 ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
4030 ' \n'
4031 ' </a>\n'
4032 ' \n'
4033 ' <c>\n'
4034 ' \n'
4035 ' </c>\n'
4036 '</root>')
4037
4038 #
4039 # basic method=c14n tests from the c14n 2.0 specification. uses
4040 # test files under xmltestdata/c14n-20.
4041
4042 # note that this uses generated C14N versions of the standard ET.write
4043 # output, not roundtripped C14N (see above).
4044
4045 def test_xml_c14n2(self):
4046 datadir = findfile("c14n-20", subdir="xmltestdata")
4047 full_path = partial(os.path.join, datadir)
4048
4049 files = [filename[:-4] for filename in sorted(os.listdir(datadir))
4050 if filename.endswith('.xml')]
4051 input_files = [
4052 filename for filename in files
4053 if filename.startswith('in')
4054 ]
4055 configs = {
4056 filename: {
4057 # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
4058 option.tag.split('}')[-1]: ((option.text or '').strip(), option)
4059 for option in ET.parse(full_path(filename) + ".xml").getroot()
4060 }
4061 for filename in files
4062 if filename.startswith('c14n')
4063 }
4064
4065 tests = {
4066 input_file: [
4067 (filename, configs[filename.rsplit('_', 1)[-1]])
4068 for filename in files
4069 if filename.startswith(f'out_{input_file}_')
4070 and filename.rsplit('_', 1)[-1] in configs
4071 ]
4072 for input_file in input_files
4073 }
4074
4075 # Make sure we found all test cases.
4076 self.assertEqual(30, len([
4077 output_file for output_files in tests.values()
4078 for output_file in output_files]))
4079
4080 def get_option(config, option_name, default=None):
4081 return config.get(option_name, (default, ()))[0]
4082
4083 for input_file, output_files in tests.items():
4084 for output_file, config in output_files:
4085 keep_comments = get_option(
4086 config, 'IgnoreComments') == 'true' # no, it's right :)
4087 strip_text = get_option(
4088 config, 'TrimTextNodes') == 'true'
4089 rewrite_prefixes = get_option(
4090 config, 'PrefixRewrite') == 'sequential'
4091 if 'QNameAware' in config:
4092 qattrs = [
4093 f"{{{el.get('NS')}}}{el.get('Name')}"
4094 for el in config['QNameAware'][1].findall(
4095 '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
4096 ]
4097 qtags = [
4098 f"{{{el.get('NS')}}}{el.get('Name')}"
4099 for el in config['QNameAware'][1].findall(
4100 '{http://www.w3.org/2010/xml-c14n2}Element')
4101 ]
4102 else:
4103 qtags = qattrs = None
4104
4105 # Build subtest description from config.
4106 config_descr = ','.join(
4107 f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
4108 for name, (value, children) in sorted(config.items())
4109 )
4110
4111 with self.subTest(f"{output_file}({config_descr})"):
4112 if input_file == 'inNsRedecl' and not rewrite_prefixes:
4113 self.skipTest(
4114 f"Redeclared namespace handling is not supported in {output_file}")
4115 if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
4116 self.skipTest(
4117 f"Redeclared namespace handling is not supported in {output_file}")
4118 if 'QNameAware' in config and config['QNameAware'][1].find(
4119 '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
4120 self.skipTest(
4121 f"QName rewriting in XPath text is not supported in {output_file}")
4122
4123 f = full_path(input_file + ".xml")
4124 if input_file == 'inC14N5':
4125 # Hack: avoid setting up external entity resolution in the parser.
4126 with open(full_path('world.txt'), 'rb') as entity_file:
4127 with open(f, 'rb') as f:
4128 f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
4129
4130 text = ET.canonicalize(
4131 from_file=f,
4132 with_comments=keep_comments,
4133 strip_text=strip_text,
4134 rewrite_prefixes=rewrite_prefixes,
4135 qname_aware_tags=qtags, qname_aware_attrs=qattrs)
4136
4137 with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
4138 expected = f.read()
4139 if input_file == 'inC14N3':
4140 # FIXME: cET resolves default attributes but ET does not!
4141 expected = expected.replace(' attr="default"', '')
4142 text = text.replace(' attr="default"', '')
4143 self.assertEqual(expected, text)
4144
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01004145# --------------------------------------------------------------------
4146
4147
Eli Bendersky64d11e62012-06-15 07:42:50 +03004148def test_main(module=None):
4149 # When invoked without a module, runs the Python ET tests by loading pyET.
4150 # Otherwise, uses the given module as the ET.
Eli Bendersky698bdb22013-01-10 06:01:06 -08004151 global pyET
4152 pyET = import_fresh_module('xml.etree.ElementTree',
4153 blocked=['_elementtree'])
Eli Bendersky64d11e62012-06-15 07:42:50 +03004154 if module is None:
Eli Bendersky64d11e62012-06-15 07:42:50 +03004155 module = pyET
Florent Xicluna41fe6152010-04-02 18:52:12 +00004156
Eli Bendersky64d11e62012-06-15 07:42:50 +03004157 global ET
4158 ET = module
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004159
Eli Bendersky865756a2012-03-09 13:38:15 +02004160 test_classes = [
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02004161 ModuleTest,
Eli Bendersky865756a2012-03-09 13:38:15 +02004162 ElementSlicingTest,
Eli Bendersky396e8fc2012-03-23 14:24:20 +02004163 BasicElementTest,
Serhiy Storchaka5bf31202015-05-18 18:29:33 +03004164 BadElementTest,
4165 BadElementPathTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02004166 ElementTreeTest,
Eli Bendersky00f402b2012-07-15 06:02:22 +03004167 IOTest,
Eli Bendersky5b77d812012-03-16 08:20:05 +02004168 ParseErrorTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02004169 XIncludeTest,
4170 ElementTreeTypeTest,
Eli Benderskyceab1a92013-01-12 07:42:46 -08004171 ElementFindTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03004172 ElementIterTest,
Eli Bendersky737b1732012-05-29 06:02:56 +03004173 TreeBuilderTest,
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03004174 XMLParserTest,
Eli Benderskyb5869342013-08-30 05:51:20 -07004175 XMLPullParserTest,
Serhiy Storchakaf8cf59e2013-02-25 17:20:59 +02004176 BugsTest,
Stefan Behnele1d5dd62019-05-01 22:34:13 +02004177 KeywordArgsTest,
4178 C14NTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03004179 ]
4180
4181 # These tests will only run for the pure-Python version that doesn't import
4182 # _elementtree. We can't use skipUnless here, because pyET is filled in only
4183 # after the module is loaded.
Eli Bendersky698bdb22013-01-10 06:01:06 -08004184 if pyET is not ET:
Eli Bendersky64d11e62012-06-15 07:42:50 +03004185 test_classes.extend([
4186 NoAcceleratorTest,
Eli Bendersky64d11e62012-06-15 07:42:50 +03004187 ])
Florent Xicluna75b5e7e2012-03-05 10:42:19 +01004188
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004189 # Provide default namespace mapping and path cache.
4190 from xml.etree import ElementPath
4191 nsmap = ET.register_namespace._namespace_map
4192 # Copy the default namespace mapping
4193 nsmap_copy = nsmap.copy()
4194 # Copy the path cache (should be empty)
4195 path_cache = ElementPath._cache
4196 ElementPath._cache = path_cache.copy()
Stefan Behnel43851a22019-05-01 21:20:38 +02004197 # Align the Comment/PI factories.
4198 if hasattr(ET, '_set_factories'):
4199 old_factories = ET._set_factories(ET.Comment, ET.PI)
4200 else:
4201 old_factories = None
4202
Eli Bendersky6319e0f2012-06-16 06:47:44 +03004203 try:
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004204 support.run_unittest(*test_classes)
Eli Bendersky6319e0f2012-06-16 06:47:44 +03004205 finally:
Serhiy Storchaka762ec972017-03-30 18:12:06 +03004206 from xml.etree import ElementPath
4207 # Restore mapping and path cache
4208 nsmap.clear()
4209 nsmap.update(nsmap_copy)
4210 ElementPath._cache = path_cache
Stefan Behnel43851a22019-05-01 21:20:38 +02004211 if old_factories is not None:
4212 ET._set_factories(*old_factories)
Eli Bendersky6319e0f2012-06-16 06:47:44 +03004213 # don't interfere with subsequent tests
4214 ET = pyET = None
4215
Florent Xiclunaf15351d2010-03-13 23:24:31 +00004216
Armin Rigo9ed73062005-12-14 18:10:45 +00004217if __name__ == '__main__':
4218 test_main()