Handle dumps of corrupted documents more gracefully
Check parent pointers for NULL after the non-recursive rewrite of the
serialization code. This avoids segfaults with corrupted documents
which can apparently be seen with lxml, see issue #187.
diff --git a/HTMLtree.c b/HTMLtree.c
index cdb7f86..8d0c779 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -903,6 +903,12 @@
break;
}
+ /*
+ * The parent should never be NULL here but we want to handle
+ * corrupted documents gracefully.
+ */
+ if (cur->parent == NULL)
+ return;
cur = cur->parent;
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
diff --git a/xmlsave.c b/xmlsave.c
index 2225628..61a4045 100644
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -1058,6 +1058,12 @@
break;
}
+ /*
+ * The parent should never be NULL here but we want to handle
+ * corrupted documents gracefully.
+ */
+ if (cur->parent == NULL)
+ return;
cur = cur->parent;
if (cur->type == XML_ELEMENT_NODE) {
@@ -1686,6 +1692,12 @@
break;
}
+ /*
+ * The parent should never be NULL here but we want to handle
+ * corrupted documents gracefully.
+ */
+ if (cur->parent == NULL)
+ return;
cur = cur->parent;
if (cur->type == XML_ELEMENT_NODE) {