Merge "Upgrade libxml2 to 7279d236364739a05657a8a614c15990eb08d0c6" am: 7cb4d979c3 am: 80288015e5 am: b7ae737b76 Original change: https://android-review.googlesource.com/c/platform/external/libxml2/+/1699085 Change-Id: I287f2d2f5af7cc8f4bd6b3cd533c8f2ad0874e31

commit: b734db3aebef30b72df60699c56628b954a05e06 [log] [tgz]
author: Elliott Hughes <enh@google.com> Fri May 07 21:15:46 2021 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> Fri May 07 21:15:46 2021 +0000
tree: 8a23fffd8a1f77ddae2490917e94b6ca6ae0dbbe
parent: caf8e6bf6d187d55a84eec2c07c5e68417f4c6d5 [diff]
parent: b7ae737b76479eb61d88953955bbfb61b158a7ef [diff]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6ee9ec..a437717 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -496,7 +496,7 @@
 	VERSION ${PROJECT_VERSION}
 )
 
-if(WIN32)
+if(MSVC)
 	if(BUILD_SHARED_LIBS)
 		set_target_properties(
 			LibXml2

diff --git a/HTMLparser.c b/HTMLparser.c
index c9a64c7..b56363a 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c

@@ -1072,102 +1072,266 @@
 }
 };
 
+typedef struct {
+    const char *oldTag;
+    const char *newTag;
+} htmlStartCloseEntry;
+
 /*
  * start tags that imply the end of current element
  */
-static const char * const htmlStartClose[] = {
-"form",		"form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
-		"dl", "ul", "ol", "menu", "dir", "address", "pre",
-		"listing", "xmp", "head", NULL,
-"head",		"p", NULL,
-"title",	"p", NULL,
-"body",		"head", "style", "link", "title", "p", NULL,
-"frameset",	"head", "style", "link", "title", "p", NULL,
-"li",		"p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
-		"pre", "listing", "xmp", "head", "li", NULL,
-"hr",		"p", "head", NULL,
-"h1",		"p", "head", NULL,
-"h2",		"p", "head", NULL,
-"h3",		"p", "head", NULL,
-"h4",		"p", "head", NULL,
-"h5",		"p", "head", NULL,
-"h6",		"p", "head", NULL,
-"dir",		"p", "head", NULL,
-"address",	"p", "head", "ul", NULL,
-"pre",		"p", "head", "ul", NULL,
-"listing",	"p", "head", NULL,
-"xmp",		"p", "head", NULL,
-"blockquote",	"p", "head", NULL,
-"dl",		"p", "dt", "menu", "dir", "address", "pre", "listing",
-		"xmp", "head", NULL,
-"dt",		"p", "menu", "dir", "address", "pre", "listing", "xmp",
-                "head", "dd", NULL,
-"dd",		"p", "menu", "dir", "address", "pre", "listing", "xmp",
-                "head", "dt", NULL,
-"ul",		"p", "head", "ol", "menu", "dir", "address", "pre",
-		"listing", "xmp", NULL,
-"ol",		"p", "head", "ul", NULL,
-"menu",		"p", "head", "ul", NULL,
-"p",		"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
-"div",		"p", "head", NULL,
-"noscript",	"script", NULL,
-"center",	"font", "b", "i", "p", "head", NULL,
-"a",		"a", "head", NULL,
-"caption",	"p", NULL,
-"colgroup",	"caption", "colgroup", "col", "p", NULL,
-"col",		"caption", "col", "p", NULL,
-"table",	"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
-		"listing", "xmp", "a", NULL,
-"th",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
-"td",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
-"tr",		"th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
-"thead",	"caption", "col", "colgroup", NULL,
-"tfoot",	"th", "td", "tr", "caption", "col", "colgroup", "thead",
-		"tbody", "p", NULL,
-"tbody",	"th", "td", "tr", "caption", "col", "colgroup", "thead",
-		"tfoot", "tbody", "p", NULL,
-"optgroup",	"option", NULL,
-"option",	"option", NULL,
-"fieldset",	"legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
-		"pre", "listing", "xmp", "a", NULL,
-/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
-"tt",		"head", NULL,
-"i",		"head", NULL,
-"b",		"head", NULL,
-"u",		"head", NULL,
-"s",		"head", NULL,
-"strike",	"head", NULL,
-"big",		"head", NULL,
-"small",	"head", NULL,
-
-"em",		"head", NULL,
-"strong",	"head", NULL,
-"dfn",		"head", NULL,
-"code",		"head", NULL,
-"samp",		"head", NULL,
-"kbd",		"head", NULL,
-"var",		"head", NULL,
-"cite",		"head", NULL,
-"abbr",		"head", NULL,
-"acronym",	"head", NULL,
-
-/* "a" */
-"img",		"head", NULL,
-/* "applet" */
-/* "embed" */
-/* "object" */
-"font",		"head", NULL,
-/* "basefont" */
-"br",		"head", NULL,
-/* "script" */
-"map",		"head", NULL,
-"q",		"head", NULL,
-"sub",		"head", NULL,
-"sup",		"head", NULL,
-"span",		"head", NULL,
-"bdo",		"head", NULL,
-"iframe",	"head", NULL,
-NULL
+static const htmlStartCloseEntry htmlStartClose[] = {
+    { "a", "a" },
+    { "a", "fieldset" },
+    { "a", "table" },
+    { "a", "td" },
+    { "a", "th" },
+    { "address", "dd" },
+    { "address", "dl" },
+    { "address", "dt" },
+    { "address", "form" },
+    { "address", "li" },
+    { "address", "ul" },
+    { "b", "center" },
+    { "b", "p" },
+    { "b", "td" },
+    { "b", "th" },
+    { "big", "p" },
+    { "caption", "col" },
+    { "caption", "colgroup" },
+    { "caption", "tbody" },
+    { "caption", "tfoot" },
+    { "caption", "thead" },
+    { "caption", "tr" },
+    { "col", "col" },
+    { "col", "colgroup" },
+    { "col", "tbody" },
+    { "col", "tfoot" },
+    { "col", "thead" },
+    { "col", "tr" },
+    { "colgroup", "colgroup" },
+    { "colgroup", "tbody" },
+    { "colgroup", "tfoot" },
+    { "colgroup", "thead" },
+    { "colgroup", "tr" },
+    { "dd", "dt" },
+    { "dir", "dd" },
+    { "dir", "dl" },
+    { "dir", "dt" },
+    { "dir", "form" },
+    { "dir", "ul" },
+    { "dl", "form" },
+    { "dl", "li" },
+    { "dt", "dd" },
+    { "dt", "dl" },
+    { "font", "center" },
+    { "font", "td" },
+    { "font", "th" },
+    { "form", "form" },
+    { "h1", "fieldset" },
+    { "h1", "form" },
+    { "h1", "li" },
+    { "h1", "p" },
+    { "h1", "table" },
+    { "h2", "fieldset" },
+    { "h2", "form" },
+    { "h2", "li" },
+    { "h2", "p" },
+    { "h2", "table" },
+    { "h3", "fieldset" },
+    { "h3", "form" },
+    { "h3", "li" },
+    { "h3", "p" },
+    { "h3", "table" },
+    { "h4", "fieldset" },
+    { "h4", "form" },
+    { "h4", "li" },
+    { "h4", "p" },
+    { "h4", "table" },
+    { "h5", "fieldset" },
+    { "h5", "form" },
+    { "h5", "li" },
+    { "h5", "p" },
+    { "h5", "table" },
+    { "h6", "fieldset" },
+    { "h6", "form" },
+    { "h6", "li" },
+    { "h6", "p" },
+    { "h6", "table" },
+    { "head", "a" },
+    { "head", "abbr" },
+    { "head", "acronym" },
+    { "head", "address" },
+    { "head", "b" },
+    { "head", "bdo" },
+    { "head", "big" },
+    { "head", "blockquote" },
+    { "head", "body" },
+    { "head", "br" },
+    { "head", "center" },
+    { "head", "cite" },
+    { "head", "code" },
+    { "head", "dd" },
+    { "head", "dfn" },
+    { "head", "dir" },
+    { "head", "div" },
+    { "head", "dl" },
+    { "head", "dt" },
+    { "head", "em" },
+    { "head", "fieldset" },
+    { "head", "font" },
+    { "head", "form" },
+    { "head", "frameset" },
+    { "head", "h1" },
+    { "head", "h2" },
+    { "head", "h3" },
+    { "head", "h4" },
+    { "head", "h5" },
+    { "head", "h6" },
+    { "head", "hr" },
+    { "head", "i" },
+    { "head", "iframe" },
+    { "head", "img" },
+    { "head", "kbd" },
+    { "head", "li" },
+    { "head", "listing" },
+    { "head", "map" },
+    { "head", "menu" },
+    { "head", "ol" },
+    { "head", "p" },
+    { "head", "pre" },
+    { "head", "q" },
+    { "head", "s" },
+    { "head", "samp" },
+    { "head", "small" },
+    { "head", "span" },
+    { "head", "strike" },
+    { "head", "strong" },
+    { "head", "sub" },
+    { "head", "sup" },
+    { "head", "table" },
+    { "head", "tt" },
+    { "head", "u" },
+    { "head", "ul" },
+    { "head", "var" },
+    { "head", "xmp" },
+    { "hr", "form" },
+    { "i", "center" },
+    { "i", "p" },
+    { "i", "td" },
+    { "i", "th" },
+    { "legend", "fieldset" },
+    { "li", "li" },
+    { "link", "body" },
+    { "link", "frameset" },
+    { "listing", "dd" },
+    { "listing", "dl" },
+    { "listing", "dt" },
+    { "listing", "fieldset" },
+    { "listing", "form" },
+    { "listing", "li" },
+    { "listing", "table" },
+    { "listing", "ul" },
+    { "menu", "dd" },
+    { "menu", "dl" },
+    { "menu", "dt" },
+    { "menu", "form" },
+    { "menu", "ul" },
+    { "ol", "form" },
+    { "ol", "ul" },
+    { "option", "optgroup" },
+    { "option", "option" },
+    { "p", "address" },
+    { "p", "blockquote" },
+    { "p", "body" },
+    { "p", "caption" },
+    { "p", "center" },
+    { "p", "col" },
+    { "p", "colgroup" },
+    { "p", "dd" },
+    { "p", "dir" },
+    { "p", "div" },
+    { "p", "dl" },
+    { "p", "dt" },
+    { "p", "fieldset" },
+    { "p", "form" },
+    { "p", "frameset" },
+    { "p", "h1" },
+    { "p", "h2" },
+    { "p", "h3" },
+    { "p", "h4" },
+    { "p", "h5" },
+    { "p", "h6" },
+    { "p", "head" },
+    { "p", "hr" },
+    { "p", "li" },
+    { "p", "listing" },
+    { "p", "menu" },
+    { "p", "ol" },
+    { "p", "p" },
+    { "p", "pre" },
+    { "p", "table" },
+    { "p", "tbody" },
+    { "p", "td" },
+    { "p", "tfoot" },
+    { "p", "th" },
+    { "p", "title" },
+    { "p", "tr" },
+    { "p", "ul" },
+    { "p", "xmp" },
+    { "pre", "dd" },
+    { "pre", "dl" },
+    { "pre", "dt" },
+    { "pre", "fieldset" },
+    { "pre", "form" },
+    { "pre", "li" },
+    { "pre", "table" },
+    { "pre", "ul" },
+    { "s", "p" },
+    { "script", "noscript" },
+    { "small", "p" },
+    { "span", "td" },
+    { "span", "th" },
+    { "strike", "p" },
+    { "style", "body" },
+    { "style", "frameset" },
+    { "tbody", "tbody" },
+    { "tbody", "tfoot" },
+    { "td", "tbody" },
+    { "td", "td" },
+    { "td", "tfoot" },
+    { "td", "th" },
+    { "td", "tr" },
+    { "tfoot", "tbody" },
+    { "th", "tbody" },
+    { "th", "td" },
+    { "th", "tfoot" },
+    { "th", "th" },
+    { "th", "tr" },
+    { "thead", "tbody" },
+    { "thead", "tfoot" },
+    { "title", "body" },
+    { "title", "frameset" },
+    { "tr", "tbody" },
+    { "tr", "tfoot" },
+    { "tr", "tr" },
+    { "tt", "p" },
+    { "u", "p" },
+    { "u", "td" },
+    { "u", "th" },
+    { "ul", "address" },
+    { "ul", "form" },
+    { "ul", "menu" },
+    { "ul", "ol" },
+    { "ul", "pre" },
+    { "xmp", "dd" },
+    { "xmp", "dl" },
+    { "xmp", "dt" },
+    { "xmp", "fieldset" },
+    { "xmp", "form" },
+    { "xmp", "li" },
+    { "xmp", "table" },
+    { "xmp", "ul" }
 };
 
 /*
@@ -1237,9 +1401,6 @@
     {NULL,    100} /* Default priority */
 };
 
-static const char** htmlStartCloseIndex[100];
-static int htmlStartCloseIndexinitialized = 0;
-
 /************************************************************************
  *									*
  *	functions to handle HTML specific data			*
@@ -1249,24 +1410,18 @@
 /**
  * htmlInitAutoClose:
  *
- * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
- * This is not reentrant. Call xmlInitParser() once before processing in
- * case of use in multithreaded programs.
+ * This is a no-op now.
  */
 void
 htmlInitAutoClose(void) {
-    int indx, i = 0;
+}
 
-    if (htmlStartCloseIndexinitialized) return;
+static int
+htmlCompareTags(const void *key, const void *member) {
+    const xmlChar *tag = (const xmlChar *) key;
+    const htmlElemDesc *desc = (const htmlElemDesc *) member;
 
-    for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL;
-    indx = 0;
-    while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) {
-        htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
-	while (htmlStartClose[i] != NULL) i++;
-	i++;
-    }
-    htmlStartCloseIndexinitialized = 1;
+    return(xmlStrcasecmp(tag, BAD_CAST desc->name));
 }
 
 /**
@@ -1279,14 +1434,12 @@
  */
 const htmlElemDesc *
 htmlTagLookup(const xmlChar *tag) {
-    unsigned int i;
+    if (tag == NULL)
+        return(NULL);
 
-    for (i = 0; i < (sizeof(html40ElementTable) /
-                     sizeof(html40ElementTable[0]));i++) {
-        if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))
-	    return((htmlElemDescPtr) &html40ElementTable[i]);
-    }
-    return(NULL);
+    return((const htmlElemDesc *) bsearch(tag, html40ElementTable,
+                sizeof(html40ElementTable) / sizeof(htmlElemDesc),
+                sizeof(htmlElemDesc), htmlCompareTags));
 }
 
 /**
@@ -1307,6 +1460,19 @@
 }
 
 
+static int
+htmlCompareStartClose(const void *vkey, const void *member) {
+    const htmlStartCloseEntry *key = (const htmlStartCloseEntry *) vkey;
+    const htmlStartCloseEntry *entry = (const htmlStartCloseEntry *) member;
+    int ret;
+
+    ret = strcmp(key->oldTag, entry->oldTag);
+    if (ret == 0)
+        ret = strcmp(key->newTag, entry->newTag);
+
+    return(ret);
+}
+
 /**
  * htmlCheckAutoClose:
  * @newtag:  The new tag name
@@ -1314,37 +1480,21 @@
  *
  * Checks whether the new tag is one of the registered valid tags for
  * closing old.
- * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
  *
  * Returns 0 if no, 1 if yes.
  */
 static int
 htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
 {
-    int i, indx;
-    const char **closed = NULL;
+    htmlStartCloseEntry key;
+    void *res;
 
-    if (htmlStartCloseIndexinitialized == 0)
-        htmlInitAutoClose();
-
-    /* inefficient, but not a big deal */
-    for (indx = 0; indx < 100; indx++) {
-        closed = htmlStartCloseIndex[indx];
-        if (closed == NULL)
-            return (0);
-        if (xmlStrEqual(BAD_CAST * closed, newtag))
-            break;
-    }
-
-    i = closed - htmlStartClose;
-    i++;
-    while (htmlStartClose[i] != NULL) {
-        if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) {
-            return (1);
-        }
-        i++;
-    }
-    return (0);
+    key.oldTag = (const char *) oldtag;
+    key.newTag = (const char *) newtag;
+    res = bsearch(&key, htmlStartClose,
+            sizeof(htmlStartClose) / sizeof(htmlStartCloseEntry),
+            sizeof(htmlStartCloseEntry), htmlCompareStartClose);
+    return(res != NULL);
 }
 
 /**
@@ -4057,12 +4207,10 @@
      * With the exception that the autoclose may have popped stuff out
      * of the stack.
      */
-    if (!xmlStrEqual(name, ctxt->name)) {
-        if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
-            htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
-	                 "Opening and ending tag mismatch: %s and %s\n",
-			 name, ctxt->name);
-        }
+    if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
+        htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
+                     "Opening and ending tag mismatch: %s and %s\n",
+                     name, ctxt->name);
     }
 
     /*

diff --git a/INSTALL.libxml2 b/INSTALL.libxml2
index 383fb93..ac9211d 100644
--- a/INSTALL.libxml2
+++ b/INSTALL.libxml2

@@ -1,27 +1,21 @@
-Extracted from the documentation:
-   http://xmlsoft.org/FAQ.html#Compilatio
-
 See also the generic INSTALL file for configure options
 
 Compilation
 
-   1.What is the process to compile libxml ? 
+  1. What is the process to compile libxml?
 
      As most UNIX libraries libxml follows the "standard":
 
-     gunzip -c xxx.tar.gz | tar xvf -
+         gunzip -c xxx.tar.gz | tar xvf -
+         cd libxml-xxxx
 
-     cd libxml-xxxx
-
-     ./configure --help
+         ./configure --help
 
      to see the options, then the compilation/installation proper
 
-     ./configure [possible options]
-
-     make
-
-     make install
+         ./configure [possible options]
+         make
+         make install
 
      At that point you may have to rerun ldconfig or similar utility to
      update your list of installed shared libs.
@@ -29,33 +23,29 @@
      At this point you can check that the library is properly functioning
      by running
 
-     make tests
+         make check
 
-   2.What other libraries are needed to compile/install libxml ? 
+     Please report test failures to the mailing list or bug tracker.
 
-     Libxml does not requires any other library, the normal C ANSI API
-     should be sufficient (please report any violation to this rule you
-     may find).
+  2. What other libraries are needed to compile/install libxml?
 
-     However if found at configuration time libxml will detect and use
+     Libxml does not require any other libraries. A platform with somewhat
+     recent POSIX support should be sufficient (please report any violation
+     to this rule you may find).
+
+     However if found at configuration time, libxml will detect and use
      the following libs:
 
-         libz: a highly portable and available widely compression library 
-             http://www.info-zip.org/pub/infozip/zlib/
+         libz: a highly portable and widely available compression library
+             https://zlib.net/
+         liblzma: another compression library
+             https://tukaani.org/xz/
          iconv: a powerful character encoding conversion library. It's
-	     included by default on recent glibc libraries, so it doesn't
-	     need to be installed specifically on linux. It seems it's
-	     now part of the official UNIX specification. Here is one
-	     implementation of the library which source can be found here.
-             http://clisp.cons.org/~haible/packages-libiconv.html
-             ftp://ftp.ilog.fr/pub/Users/haible/gnu/
-
-   3.make tests may fail on some platforms 
-
-     Sometime the regression tests results don't completely match the
-     value produced by the parser, and the makefile uses diff to print
-     the delta. On some platforms the diff return breaks the compilation
-     process, if the diff is small this is probably not a serious problem
+             part of POSIX.1-2001, so it doesn't need to be installed
+             on modern UNIX-like systems, specifically on Linux.
+             https://www.gnu.org/software/libiconv/
+         ICU: Mainly used by Chromium on Windows. Unnecessary on most
+             systems.
 
 Daniel
 veillard@redhat.com

diff --git a/METADATA b/METADATA
index a69aab2..fa133f6 100644
--- a/METADATA
+++ b/METADATA

@@ -10,13 +10,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://github.com/GNOME/libxml2/archive/ce2fbaa89da627a6c6cd6344d5339a76feba94b5.zip"
+    value: "https://github.com/GNOME/libxml2/archive/7279d236364739a05657a8a614c15990eb08d0c6.zip"
   }
-  version: "ce2fbaa89da627a6c6cd6344d5339a76feba94b5"
+  version: "7279d236364739a05657a8a614c15990eb08d0c6"
   license_type: BY_EXCEPTION_ONLY
   last_upgrade_date {
     year: 2021
-    month: 2
-    day: 23
+    month: 5
+    day: 6
   }
 }

diff --git a/entities.c b/entities.c
index 37b99a5..1a8f86f 100644
--- a/entities.c
+++ b/entities.c

@@ -704,11 +704,25 @@
 	    } else {
 		/*
 		 * We assume we have UTF-8 input.
+		 * It must match either:
+		 *   110xxxxx 10xxxxxx
+		 *   1110xxxx 10xxxxxx 10xxxxxx
+		 *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+		 * That is:
+		 *   cur[0] is 11xxxxxx
+		 *   cur[1] is 10xxxxxx
+		 *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
+		 *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
+		 *   cur[0] is not 11111xxx
 		 */
 		char buf[11], *ptr;
 		int val = 0, l = 1;
 
-		if (*cur < 0xC0) {
+		if (((cur[0] & 0xC0) != 0xC0) ||
+		    ((cur[1] & 0xC0) != 0x80) ||
+		    (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
+		    (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
+		    (((cur[0] & 0xF8) == 0xF8))) {
 		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
 			    "xmlEncodeEntities: input not UTF-8");
 		    if (doc != NULL)

diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am
index 2bbdbb1..7d38347 100644
--- a/fuzz/Makefile.am
+++ b/fuzz/Makefile.am

@@ -74,7 +74,7 @@
 	./html$(EXEEXT) \
 	    -dict=html.dict \
 	    -max_len=1000000 \
-	    -timeout=20 \
+	    -timeout=10 \
 	    corpus/html seed/html
 
 # Regexp fuzzer
@@ -99,7 +99,7 @@
 	@mkdir -p corpus/uri
 	./uri$(EXEEXT) \
 	    -max_len=10000 \
-	    -timeout=5 \
+	    -timeout=2 \
 	    corpus/uri $(srcdir)/seed/uri
 
 # XML Schema fuzzer

diff --git a/fuzz/html.options b/fuzz/html.options
index e5d3bbe..1c63f53 100644
--- a/fuzz/html.options
+++ b/fuzz/html.options

@@ -1,2 +1,2 @@
 [libfuzzer]
-timeout = 20
+timeout = 10

diff --git a/fuzz/uri.options b/fuzz/uri.options
index ea2a7a2..0676c65 100644
--- a/fuzz/uri.options
+++ b/fuzz/uri.options

@@ -1,2 +1,2 @@
 [libfuzzer]
-timeout = 5
+timeout = 2

diff --git a/include/libxml/c14n.h b/include/libxml/c14n.h
index d74847d..af93de6 100644
--- a/include/libxml/c14n.h
+++ b/include/libxml/c14n.h

@@ -16,17 +16,19 @@
  */
 #ifndef __XML_C14N_H__
 #define __XML_C14N_H__
+
+#include <libxml/xmlversion.h>
+
 #ifdef LIBXML_C14N_ENABLED
 #ifdef LIBXML_OUTPUT_ENABLED
 
+#include <libxml/tree.h>
+#include <libxml/xpath.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 
-#include <libxml/xmlversion.h>
-#include <libxml/tree.h>
-#include <libxml/xpath.h>
-
 /*
  * XML Canonicalization
  * http://www.w3.org/TR/xml-c14n

diff --git a/parser.c b/parser.c
index efde672..73c27ed 100644
--- a/parser.c
+++ b/parser.c

@@ -2684,8 +2684,10 @@
 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
 			                      0, 0, 0);
 		ctxt->depth--;
-		if (rep == NULL)
+		if (rep == NULL) {
+                    ent->content[0] = 0;
                     goto int_error;
+                }
 
                 current = rep;
                 while (*current != 0) { /* non input consuming loop */
@@ -2740,8 +2742,11 @@
 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
 			                      0, 0, 0);
 		ctxt->depth--;
-		if (rep == NULL)
+		if (rep == NULL) {
+                    if (ent->content != NULL)
+                        ent->content[0] = 0;
                     goto int_error;
+                }
                 current = rep;
                 while (*current != 0) { /* non input consuming loop */
                     buffer[nbchars++] = *current++;
@@ -6203,6 +6208,8 @@
 	SKIP_BLANKS;
         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
                                                            depth + 1);
+        if (cur == NULL)
+            return(NULL);
 	SKIP_BLANKS;
 	GROW;
     } else {
@@ -6336,6 +6343,11 @@
 	    SKIP_BLANKS;
 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
                                                           depth + 1);
+            if (last == NULL) {
+		if (ret != NULL)
+		    xmlFreeDocElementContent(ctxt->myDoc, ret);
+		return(NULL);
+            }
 	    SKIP_BLANKS;
 	} else {
 	    elem = xmlParseName(ctxt);

diff --git a/tree.c b/tree.c
index d6ea704..c707f59 100644
--- a/tree.c
+++ b/tree.c

@@ -1901,12 +1901,6 @@
     if (value != NULL) {
         xmlNodePtr tmp;
 
-        if(!xmlCheckUTF8(value)) {
-            xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) doc,
-                       NULL);
-            if (doc != NULL)
-                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
-        }
         cur->children = xmlNewDocText(doc, value);
         cur->last = NULL;
         tmp = cur->children;
@@ -2026,6 +2020,11 @@
  * @value:  the value of the attribute
  *
  * Create a new property carried by a document.
+ * NOTE: @value is supposed to be a piece of XML CDATA, so it allows entity
+ *       references, but XML special chars need to be escaped first by using
+ *       xmlEncodeEntitiesReentrant(). Use xmlNewProp() if you don't need
+ *       entities support.
+ *
  * Returns a pointer to the attribute
  */
 xmlAttrPtr
@@ -4894,7 +4893,9 @@
             }
             next = ((xmlAttrPtr) cur)->parent;
         } else {
-            next = cur->parent;
+            xmlFree(buf);
+            xmlFree(buffer);
+            return (NULL);
         }
 
         /*
@@ -6589,6 +6590,16 @@
 		attrDecl = xmlGetDtdQAttrDesc(doc->extSubset,
 		    elemQName, name, NULL);
 	    }
+        } else if (xmlStrEqual(nsName, XML_XML_NAMESPACE)) {
+	    /*
+	    * The XML namespace must be bound to prefix 'xml'.
+	    */
+	    attrDecl = xmlGetDtdQAttrDesc(doc->intSubset,
+		elemQName, name, BAD_CAST "xml");
+	    if ((attrDecl == NULL) && (doc->extSubset != NULL)) {
+		attrDecl = xmlGetDtdQAttrDesc(doc->extSubset,
+		    elemQName, name, BAD_CAST "xml");
+	    }
 	} else {
 	    xmlNsPtr *nsList, *cur;
 
@@ -6935,12 +6946,6 @@
 	if (value != NULL) {
 	    xmlNodePtr tmp;
 
-	    if(!xmlCheckUTF8(value)) {
-	        xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) node->doc,
-	                   NULL);
-                if (node->doc != NULL)
-                    node->doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
-	    }
 	    prop->children = xmlNewDocText(node->doc, value);
 	    prop->last = NULL;
 	    tmp = prop->children;

diff --git a/xinclude.c b/xinclude.c
index 1636caf..b2e6ea1 100644
--- a/xinclude.c
+++ b/xinclude.c

@@ -2430,9 +2430,8 @@
             ctxt->incTotal++;
             xmlXIncludePreProcessNode(ctxt, cur);
         } else if ((cur->children != NULL) &&
-                   (cur->children->type != XML_ENTITY_DECL) &&
-                   (cur->children->type != XML_XINCLUDE_START) &&
-                   (cur->children->type != XML_XINCLUDE_END)) {
+                   ((cur->type == XML_DOCUMENT_NODE) ||
+                    (cur->type == XML_ELEMENT_NODE))) {
             cur = cur->children;
             continue;
         }

diff --git a/xmllint.c b/xmllint.c
index 6ca1bf5..a3fe10a 100644
--- a/xmllint.c
+++ b/xmllint.c

@@ -2213,7 +2213,7 @@
             if (res > 0) {
                 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
                             chars, res, filename, XML_CHAR_ENCODING_NONE);
-                xmlCtxtUseOptions(ctxt, options);
+                htmlCtxtUseOptions(ctxt, options);
                 while ((res = fread(chars, 1, pushsize, f)) > 0) {
                     htmlParseChunk(ctxt, chars, res, 0);
                 }
@@ -2426,6 +2426,7 @@
 	dtd = xmlGetIntSubset(doc);
 	if (dtd != NULL) {
 	    xmlUnlinkNode((xmlNodePtr)dtd);
+            doc->intSubset = NULL;
 	    xmlFreeDtd(dtd);
 	}
     }

diff --git a/xmlschemastypes.c b/xmlschemastypes.c
index 07b5fd7..9c2dff0 100644
--- a/xmlschemastypes.c
+++ b/xmlschemastypes.c

@@ -2187,6 +2187,44 @@
     return(ret);
 }
 
+/*
+ * xmlSchemaCheckLanguageType
+ * @value: the value to check
+ *
+ * Check that a value conforms to the lexical space of the language datatype.
+ * Must conform to [a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*
+ *
+ * Returns 1 if this validates, 0 otherwise.
+ */
+static int
+xmlSchemaCheckLanguageType(const xmlChar* value) {
+    int first = 1, len = 0;
+    const xmlChar* cur = value;
+
+    if (value == NULL)
+        return (0);
+
+    while (cur[0] != 0) {
+        if (!( ((cur[0] >= 'a') && (cur[0] <= 'z')) || ((cur[0] >= 'A') && (cur[0] <= 'Z'))
+            || (cur[0] == '-')
+            || ((first == 0) && (xmlIsDigit_ch(cur[0]))) ))
+            return (0);
+        if (cur[0] == '-') {
+            if ((len < 1) || (len > 8))
+                return (0);
+            len = 0;
+            first = 0;
+        }
+        else
+            len++;
+        cur++;
+    }
+    if ((len < 1) || (len > 8))
+        return (0);
+
+    return (1);
+}
+
 /**
  * xmlSchemaValAtomicType:
  * @type: the predefined type
@@ -2704,7 +2742,8 @@
 		if (norm != NULL)
 		    value = norm;
 	    }
-            if (xmlCheckLanguageID(value) == 1) {
+
+            if (xmlSchemaCheckLanguageType(value) == 1) {
                 if (val != NULL) {
                     v = xmlSchemaNewValue(XML_SCHEMAS_LANGUAGE);
                     if (v != NULL) {

diff --git a/xpath.c b/xpath.c
index 6ee7e57..7497ba0 100644
--- a/xpath.c
+++ b/xpath.c

@@ -488,14 +488,6 @@
  *									*
  ************************************************************************/
 
-#ifndef INFINITY
-#define INFINITY (DBL_MAX * DBL_MAX)
-#endif
-
-#ifndef NAN
-#define NAN (INFINITY / INFINITY)
-#endif
-
 double xmlXPathNAN;
 double xmlXPathPINF;
 double xmlXPathNINF;
@@ -505,11 +497,14 @@
  *
  * Initialize the XPath environment
  */
+ATTRIBUTE_NO_SANITIZE("float-divide-by-zero")
 void
 xmlXPathInit(void) {
-    xmlXPathNAN = NAN;
-    xmlXPathPINF = INFINITY;
-    xmlXPathNINF = -INFINITY;
+    /* MSVC doesn't allow division by zero in constant expressions. */
+    double zero = 0.0;
+    xmlXPathNAN = 0.0 / zero;
+    xmlXPathPINF = 1.0 / zero;
+    xmlXPathNINF = -xmlXPathPINF;
 }
 
 /**
@@ -538,9 +533,9 @@
 #ifdef isinf
     return isinf(val) ? (val > 0 ? 1 : -1) : 0;
 #else
-    if (val >= INFINITY)
+    if (val >= xmlXPathPINF)
         return 1;
-    if (val <= -INFINITY)
+    if (val <= -xmlXPathPINF)
         return -1;
     return 0;
 #endif
@@ -5873,10 +5868,10 @@
     double ret;
 
     if (node == NULL)
-	return(NAN);
+	return(xmlXPathNAN);
     strval = xmlXPathCastNodeToString(node);
     if (strval == NULL)
-	return(NAN);
+	return(xmlXPathNAN);
     ret = xmlXPathCastStringToNumber(strval);
     xmlFree(strval);
 
@@ -5897,7 +5892,7 @@
     double ret;
 
     if (ns == NULL)
-	return(NAN);
+	return(xmlXPathNAN);
     str = xmlXPathCastNodeSetToString(ns);
     ret = xmlXPathCastStringToNumber(str);
     xmlFree(str);
@@ -5917,13 +5912,13 @@
     double ret = 0.0;
 
     if (val == NULL)
-	return(NAN);
+	return(xmlXPathNAN);
     switch (val->type) {
     case XPATH_UNDEFINED:
 #ifdef DEBUG_EXPR
 	xmlGenericError(xmlGenericErrorContext, "NUMBER: undefined\n");
 #endif
-	ret = NAN;
+	ret = xmlXPathNAN;
 	break;
     case XPATH_NODESET:
     case XPATH_XSLT_TREE:
@@ -5943,7 +5938,7 @@
     case XPATH_RANGE:
     case XPATH_LOCATIONSET:
 	TODO;
-	ret = NAN;
+	ret = xmlXPathNAN;
 	break;
     }
     return(ret);
@@ -7570,7 +7565,7 @@
     CHECK_TYPE(XPATH_NUMBER);
     arg1 = ctxt->value->floatval;
     if (arg2 == 0)
-	ctxt->value->floatval = NAN;
+	ctxt->value->floatval = xmlXPathNAN;
     else {
 	ctxt->value->floatval = fmod(arg1, arg2);
     }
@@ -10000,7 +9995,7 @@
     if (cur == NULL) return(0);
     while (IS_BLANK_CH(*cur)) cur++;
     if ((*cur != '.') && ((*cur < '0') || (*cur > '9')) && (*cur != '-')) {
-        return(NAN);
+        return(xmlXPathNAN);
     }
     if (*cur == '-') {
 	isneg = 1;
@@ -10036,7 +10031,7 @@
 
         cur++;
 	if (((*cur < '0') || (*cur > '9')) && (!ok)) {
-	    return(NAN);
+	    return(xmlXPathNAN);
 	}
         while (*cur == '0') {
 	    frac = frac + 1;
@@ -10069,7 +10064,7 @@
       }
     }
     while (IS_BLANK_CH(*cur)) cur++;
-    if (*cur != 0) return(NAN);
+    if (*cur != 0) return(xmlXPathNAN);
     if (isneg) ret = -ret;
     if (is_exponent_negative) exponent = -exponent;
     ret *= pow(10.0, (double)exponent);
commit	b734db3aebef30b72df60699c56628b954a05e06	[log] [tgz]
author	Elliott Hughes <enh@google.com>	Fri May 07 21:15:46 2021 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	Fri May 07 21:15:46 2021 +0000
tree	8a23fffd8a1f77ddae2490917e94b6ca6ae0dbbe
parent	caf8e6bf6d187d55a84eec2c07c5e68417f4c6d5 [diff]
parent	b7ae737b76479eb61d88953955bbfb61b158a7ef [diff]