Merge changes from topic 'upgrade-expat-2.2.0' am: d61777ec98 Change-Id: Ie17e4d110719ccd42446339fd21aaee3ee233a6a

commit: 470553711f98a9110beccab5c1648186f827404e [log] [tgz]
author: Paul Duffin <paulduffin@google.com> Wed Mar 01 13:43:33 2017 +0000
committer: android-build-merger <android-build-merger@google.com> Wed Mar 01 13:43:33 2017 +0000
tree: f4cc4a019c9a6ddfbb5ef4873df40264cdb13d0b
parent: b05dbe320cc7cecb104c1b7ae38a57d5f3cecc89 [diff]
parent: d61777ec982f05a05e372d911704088e09f6c40f [diff]
diff --git a/Changes b/Changes
index db5f7f3..583c868 100644
--- a/Changes
+++ b/Changes

@@ -1,3 +1,52 @@
+Release 2.2.0 Tue June 21 2016
+        Security fixes:
+            #537  CVE-2016-0718 -- Fix crash on malformed input
+                  CVE-2016-4472 -- Improve insufficient fix to CVE-2015-1283 /
+                                   CVE-2015-2716 introduced with Expat 2.1.1
+            #499  CVE-2016-5300 -- Use more entropy for hash initialization
+                                   than the original fix to CVE-2012-0876
+            #519  CVE-2012-6702 -- Resolve troublesome internal call to srand
+                                   that was introduced with Expat 2.1.0
+                                   when addressing CVE-2012-0876 (issue #496)
+
+        Bug fixes:
+                  Fix uninitialized reads of size 1
+                    (e.g. in little2_updatePosition)
+                  Fix detection of UTF-8 character boundaries
+
+        Other changes:
+            #532  Fix compilation for Visual Studio 2010 (keyword "C99")
+                  Autotools: Resolve use of "$<" to better support bmake
+                  Autotools: Add QA script "qa.sh" (and make target "qa")
+                  Autotools: Respect CXXFLAGS if given
+                  Autotools: Fix "make run-xmltest"
+                  Autotools: Have "make run-xmltest" check for expected output
+             p90  CMake: Fix static build (BUILD_shared=OFF) on Windows
+            #536  CMake: Add soversion, support -DNO_SONAME=yes to bypass
+            #323  CMake: Add suffix "d" to differentiate debug from release
+                  CMake: Define WIN32 with CMake on Windows
+                  Annotate memory allocators for GCC
+                  Address all currently known compile warnings
+                  Make sure that API symbols remain visible despite
+                    -fvisibility=hidden
+                  Remove executable flag from source files
+                  Resolve COMPILED_FROM_DSP in favor of WIN32
+
+        Special thanks to:
+            Björn Lindahl
+            Christian Heimes
+            Cristian Rodríguez
+            Daniel Krügler
+            Gustavo Grieco
+            Karl Waclawek
+            László Böszörményi
+            Marco Grassi
+            Pascal Cuoq
+            Sergei Nikulov
+            Thomas Beutlich
+            Warren Young
+            Yann Droneaud
+
 Release 2.1.1 Sat March 12 2016
         Security fixes:
             #582: CVE-2015-1283 - Multiple integer overflows in XML_GetBuffer
@@ -7,7 +56,7 @@
             #520: Symbol XML_SetHashSalt was not exported
             Output of "xmlwf -h" was incomplete
 
-        Other changes
+        Other changes:
             #503: Document behavior of calling XML_SetHashSalt with salt 0
             Minor improvements to man page xmlwf(1)
             Improvements to the experimental CMake build system

diff --git a/NOTICE b/NOTICE
index dcb4506..092c83b 100644
--- a/NOTICE
+++ b/NOTICE

@@ -1,6 +1,5 @@
-Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
-                               and Clark Cooper
-Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
+Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper
+Copyright (c) 2001-2016 Expat maintainers
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the

diff --git a/README b/README
index 9ec8d0c..a7d2845 100644
--- a/README
+++ b/README

@@ -1,5 +1,5 @@
 
-                        Expat, Release 2.1.1
+                        Expat, Release 2.2.0
 
 This is Expat, a C library for parsing XML, written by James Clark.
 Expat is a stream-oriented XML parser.  This means that you register
@@ -114,7 +114,7 @@
 "/usr/ccs/bin", which is not in the default PATH.  You will need to
 add this to your path for the "make" command, and probably also switch
 to GNU make (the "make" found in /usr/ccs/bin does not seem to work
-properly -- appearantly it does not understand .PHONY directives).  If
+properly -- apparently it does not understand .PHONY directives).  If
 you're using ksh or bash, use this command to build:
 
         PATH=/usr/ccs/bin:$PATH make

diff --git a/README.version b/README.version
index 5514625..7b85695 100644
--- a/README.version
+++ b/README.version

@@ -1,3 +1,7 @@
-URL: http://sourceforge.net/projects/expat/files/expat/2.1.0/expat-2.1.0.tar.gz/download
-Version: 2.1.0
+URL: https://sourceforge.net/projects/expat/files/expat/2.2.0/expat-2.2.0.tar.bz2/download
+Version: 2.2.0
 BugComponent: 24949
+
+Local Changes:
+    Reverted change to switch from COMPILED_FROM_DSP to WIN32. This was needed in order to get it to compile as winconfig.h is not available.
+    Fix cast from pointer to integer of different size

diff --git a/doc/xmlwf.1 b/doc/xmlwf.1
index f45aea9..06bb84c 100644
--- a/doc/xmlwf.1
+++ b/doc/xmlwf.1

@@ -177,12 +177,14 @@
 This is an example of an internal entity:
 
 .nf
+
 <!ENTITY vers '1.0.2'>
 .fi
 
 And here are some examples of external entities:
 
 .nf
+
 <!ENTITY header SYSTEM "header\-&vers;.xml">  (parsed)
 <!ENTITY logo SYSTEM "logo.png" PNG>         (unparsed)
 .fi
@@ -193,6 +195,7 @@
 starts with a hyphen. For example:
 
 .nf
+
 xmlwf \-\- \-myfile.xml
 .fi
 
@@ -228,6 +231,7 @@
 Here are some XML validators on the web:
 
 .nf
+
 http://www.hcrc.ed.ac.uk/~richard/xml\-check.html
 http://www.stg.brown.edu/service/xmlvalid/
 http://www.scripting.com/frontier5/xml/code/xmlValidator.html
@@ -235,6 +239,7 @@
 .fi
 .SH "SEE ALSO"
 .nf
+
 The Expat home page:        http://www.libexpat.org/
 The W3 XML specification:   http://www.w3.org/TR/REC\-xml
 .fi

diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml
new file mode 100644
index 0000000..92ea8b5
--- /dev/null
+++ b/doc/xmlwf.xml

@@ -0,0 +1,440 @@
+<!DOCTYPE refentry [
+  <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+  <!ENTITY dhfirstname "<firstname>Scott</firstname>">
+  <!ENTITY dhsurname   "<surname>Bronson</surname>">
+  <!-- Please adjust the date whenever revising the manpage. -->
+  <!ENTITY dhdate      "<date>March 11, 2016</date>">
+  <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+       allowed: see man(7), man(1). -->
+  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
+  <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
+  <!ENTITY dhusername  "Scott Bronson">
+  <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
+  <!ENTITY dhpackage   "xmlwf">
+
+  <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
+  <!ENTITY gnu         "<acronym>GNU</acronym>">
+]>
+
+<refentry>
+  <refentryinfo>
+    <address>
+      &dhemail;
+    </address>
+    <author>
+      &dhfirstname;
+      &dhsurname;
+    </author>
+    <copyright>
+      <year>2001</year>
+      <holder>&dhusername;</holder>
+    </copyright>
+    &dhdate;
+  </refentryinfo>
+  <refmeta>
+    &dhucpackage;
+
+    &dhsection;
+  </refmeta>
+  <refnamediv>
+    <refname>&dhpackage;</refname>
+
+    <refpurpose>Determines if an XML document is well-formed</refpurpose>
+  </refnamediv>
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>&dhpackage;</command>
+	  <arg><option>-s</option></arg>
+	  <arg><option>-n</option></arg>
+	  <arg><option>-p</option></arg>
+	  <arg><option>-x</option></arg>
+
+	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
+	  <arg><option>-w</option></arg>
+
+	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
+	  <arg><option>-c</option></arg>
+	  <arg><option>-m</option></arg>
+
+	  <arg><option>-r</option></arg>
+	  <arg><option>-t</option></arg>
+
+	  <arg><option>-v</option></arg>
+
+	  <arg>file ...</arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+ 
+  <refsect1>
+    <title>DESCRIPTION</title>
+
+    <para>
+	<command>&dhpackage;</command> uses the Expat library to
+	determine if an XML document is well-formed.  It is
+	non-validating.
+	</para>
+
+	<para>
+	If you do not specify any files on the command-line, and you
+	have a recent version of <command>&dhpackage;</command>, the
+	input file will be read from standard input.
+	</para>
+
+  </refsect1>
+
+  <refsect1>
+    <title>WELL-FORMED DOCUMENTS</title>
+
+	<para>
+	  A well-formed document must adhere to the
+	  following rules:
+	</para>
+
+	<itemizedlist>
+      <listitem><para>
+	    The file begins with an XML declaration.  For instance,
+		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
+		<emphasis>NOTE:</emphasis>
+		<command>&dhpackage;</command> does not currently
+		check for a valid XML declaration.
+      </para></listitem>
+      <listitem><para>
+		Every start tag is either empty (&lt;tag/&gt;)
+		or has a corresponding end tag.
+      </para></listitem>
+      <listitem><para>
+	    There is exactly one root element.  This element must contain
+		all other elements in the document.  Only comments, white
+		space, and processing instructions may come after the close
+		of the root element.
+      </para></listitem>
+      <listitem><para>
+		All elements nest properly.
+      </para></listitem>
+      <listitem><para>
+		All attribute values are enclosed in quotes (either single
+		or double).
+      </para></listitem>
+    </itemizedlist>
+
+	<para>
+	  If the document has a DTD, and it strictly complies with that
+	  DTD, then the document is also considered <emphasis>valid</emphasis>.
+	  <command>&dhpackage;</command> is a non-validating parser --
+	  it does not check the DTD.  However, it does support
+	  external entities (see the <option>-x</option> option).
+	</para>
+  </refsect1>
+
+  <refsect1>
+    <title>OPTIONS</title>
+
+<para>
+When an option includes an argument, you may specify the argument either
+separately ("<option>-d</option> output") or concatenated with the
+option ("<option>-d</option>output").  <command>&dhpackage;</command>
+supports both.
+</para>
+
+    <variablelist>
+
+      <varlistentry>
+        <term><option>-c</option></term>
+        <listitem>
+		<para>
+  If the input file is well-formed and <command>&dhpackage;</command>
+  doesn't encounter any errors, the input file is simply copied to
+  the output directory unchanged.
+  This implies no namespaces (turns off <option>-n</option>) and
+  requires <option>-d</option> to specify an output file.
+  		</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-d output-dir</option></term>
+        <listitem>
+		<para>
+  Specifies a directory to contain transformed
+  representations of the input files.
+  By default, <option>-d</option> outputs a canonical representation
+  (described below).
+  You can select different output formats using <option>-c</option>
+  and <option>-m</option>.
+	  </para>
+	  <para>
+  The output filenames will
+  be exactly the same as the input filenames or "STDIN" if the input is
+  coming from standard input.  Therefore, you must be careful that the
+  output file does not go into the same directory as the input
+  file.  Otherwise, <command>&dhpackage;</command> will delete the
+  input file before it generates the output file (just like running
+  <literal>cat &lt; file &gt; file</literal> in most shells).
+	  </para>
+	  <para> 
+  Two structurally equivalent XML documents have a byte-for-byte
+  identical canonical XML representation.
+  Note that ignorable white space is considered significant and
+  is treated equivalently to data.
+  More on canonical XML can be found at
+  http://www.jclark.com/xml/canonxml.html .
+	  </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-e encoding</option></term>
+        <listitem>
+		<para>
+   Specifies the character encoding for the document, overriding
+   any document encoding declaration.  <command>&dhpackage;</command>
+   supports four built-in encodings:
+   	<literal>US-ASCII</literal>,
+	<literal>UTF-8</literal>,
+	<literal>UTF-16</literal>, and
+	<literal>ISO-8859-1</literal>.
+   Also see the <option>-w</option> option.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-m</option></term>
+        <listitem>
+		<para>
+  Outputs some strange sort of XML file that completely
+  describes the input file, including character positions.
+  Requires <option>-d</option> to specify an output file.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-n</option></term>
+        <listitem>
+		<para>
+  Turns on namespace processing.  (describe namespaces)
+  <option>-c</option> disables namespaces.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-p</option></term>
+        <listitem>
+		<para>
+    Tells xmlwf to process external DTDs and parameter
+    entities.
+	 </para>
+	 <para>
+   Normally <command>&dhpackage;</command> never parses parameter
+   entities.  <option>-p</option> tells it to always parse them.
+   <option>-p</option> implies <option>-x</option>.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-r</option></term>
+        <listitem>
+		<para>
+   Normally <command>&dhpackage;</command> memory-maps the XML file
+   before parsing; this can result in faster parsing on many
+   platforms.
+   <option>-r</option> turns off memory-mapping and uses normal file
+   IO calls instead.
+   Of course, memory-mapping is automatically turned off
+   when reading from standard input.
+	   </para>
+		<para>
+   Use of memory-mapping can cause some platforms to report
+   substantially higher memory usage for
+   <command>&dhpackage;</command>, but this appears to be a matter of
+   the operating system reporting memory in a strange way; there is
+   not a leak in <command>&dhpackage;</command>.
+           </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-s</option></term>
+        <listitem>
+		<para>
+  Prints an error if the document is not standalone. 
+  A document is standalone if it has no external subset and no
+  references to parameter entities.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-t</option></term>
+        <listitem>
+		<para>
+  Turns on timings.  This tells Expat to parse the entire file,
+  but not perform any processing.
+  This gives a fairly accurate idea of the raw speed of Expat itself
+  without client overhead.
+  <option>-t</option> turns off most of the output options
+  (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-v</option></term>
+        <listitem>
+		<para>
+  Prints the version of the Expat library being used, including some
+  information on the compile-time configuration of the library, and
+  then exits.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-w</option></term>
+        <listitem>
+		<para>
+  Enables support for Windows code pages.
+  Normally, <command>&dhpackage;</command> will throw an error if it
+  runs across an encoding that it is not equipped to handle itself.  With
+  <option>-w</option>, &dhpackage; will try to use a Windows code
+  page.  See also <option>-e</option>.
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>-x</option></term>
+        <listitem>
+		<para>
+  Turns on parsing external entities.
+  </para>
+<para>
+  Non-validating parsers are not required to resolve external
+  entities, or even expand entities at all.
+  Expat always expands internal entities (?),
+  but external entity parsing must be enabled explicitly.
+  </para>
+  <para>
+  External entities are simply entities that obtain their
+  data from outside the XML file currently being parsed.
+  </para>
+  <para>
+  This is an example of an internal entity:
+<literallayout>
+&lt;!ENTITY vers '1.0.2'&gt;
+</literallayout>
+  </para>
+  <para>
+  And here are some examples of external entities:
+
+<literallayout>
+&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
+&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
+</literallayout>
+
+	   </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--</option></term>
+        <listitem>
+		<para>
+    (Two hyphens.)
+    Terminates the list of options.  This is only needed if a filename
+    starts with a hyphen.  For example:
+	   </para>
+<literallayout>
+&dhpackage; -- -myfile.xml
+</literallayout>
+		<para>
+    will run <command>&dhpackage;</command> on the file
+    <filename>-myfile.xml</filename>.
+	   </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+
+	<para>
+    Older versions of <command>&dhpackage;</command> do not support
+    reading from standard input.
+	</para>
+  </refsect1>
+
+  <refsect1>
+  <title>OUTPUT</title>
+    <para>
+	If an input file is not well-formed,
+	<command>&dhpackage;</command> prints a single line describing
+	the problem to standard output.  If a file is well formed,
+	<command>&dhpackage;</command> outputs nothing.
+	Note that the result code is <emphasis>not</emphasis> set.
+	</para>
+  </refsect1>
+  
+  <refsect1>
+    <title>BUGS</title>
+	<para>
+	<command>&dhpackage;</command> returns a 0 - noerr result,
+	even if the file is not well-formed.  There is no good way for
+	a program to use <command>&dhpackage;</command> to quickly
+	check a file -- it must parse <command>&dhpackage;</command>'s
+	standard output.
+	</para>
+	<para>
+	The errors should go to standard error, not standard output.
+	</para>
+	<para>
+	There should be a way to get <option>-d</option> to send its
+	output to standard output rather than forcing the user to send
+	it to a file.
+	</para>
+	<para>
+	I have no idea why anyone would want to use the
+	<option>-d</option>, <option>-c</option>, and
+	<option>-m</option> options.  If someone could explain it to
+	me, I'd like to add this information to this manpage.
+	</para>
+  </refsect1>
+
+  <refsect1>
+    <title>ALTERNATIVES</title>
+	<para>
+	  Here are some XML validators on the web:
+
+<literallayout>
+http://www.hcrc.ed.ac.uk/~richard/xml-check.html
+http://www.stg.brown.edu/service/xmlvalid/
+http://www.scripting.com/frontier5/xml/code/xmlValidator.html
+http://www.xml.com/pub/a/tools/ruwf/check.html
+</literallayout>
+
+		 </para>
+  </refsect1>
+
+  <refsect1>
+    <title>SEE ALSO</title>
+	<para>
+
+<literallayout>
+The Expat home page:        http://www.libexpat.org/
+The W3 XML specification:   http://www.w3.org/TR/REC-xml
+</literallayout>
+
+	</para>
+  </refsect1>
+
+  <refsect1>
+    <title>AUTHOR</title>
+    <para>
+	  This manual page was written by &dhusername; &dhemail; for
+      the &debian; system (but may be used by others).  Permission is
+      granted to copy, distribute and/or modify this document under
+      the terms of the <acronym>GNU</acronym> Free Documentation
+      License, Version 1.1.
+	</para>
+  </refsect1>
+</refentry>

diff --git a/examples/elements.c b/examples/elements.c
index bc04b6b..0ca1abd 100644
--- a/examples/elements.c
+++ b/examples/elements.c

@@ -27,6 +27,8 @@
 {
   int i;
   int *depthPtr = (int *)userData;
+  (void)atts;
+
   for (i = 0; i < *depthPtr; i++)
     putchar('\t');
   puts(name);
@@ -37,6 +39,8 @@
 endElement(void *userData, const char *name)
 {
   int *depthPtr = (int *)userData;
+  (void)name;
+
   *depthPtr -= 1;
 }
 
@@ -47,6 +51,9 @@
   XML_Parser parser = XML_ParserCreate(NULL);
   int done;
   int depth = 0;
+  (void)argc;
+  (void)argv;
+
   XML_SetUserData(parser, &depth);
   XML_SetElementHandler(parser, startElement, endElement);
   do {

diff --git a/examples/outline.c b/examples/outline.c
index 3a3c838..d9b0917 100644
--- a/examples/outline.c
+++ b/examples/outline.c

@@ -49,6 +49,7 @@
 start(void *data, const char *el, const char **attr)
 {
   int i;
+  (void)data;
 
   for (i = 0; i < Depth; i++)
     printf("  ");
@@ -66,6 +67,9 @@
 static void XMLCALL
 end(void *data, const char *el)
 {
+  (void)data;
+  (void)el;
+
   Depth--;
 }
 
@@ -73,6 +77,9 @@
 main(int argc, char *argv[])
 {
   XML_Parser p = XML_ParserCreate(NULL);
+  (void)argc;
+  (void)argv;
+
   if (! p) {
     fprintf(stderr, "Couldn't allocate memory for parser\n");
     exit(-1);

diff --git a/lib/expat.h b/lib/expat.h
index ec62f14..086e24b 100644
--- a/lib/expat.h
+++ b/lib/expat.h

@@ -342,7 +342,7 @@
                          XML_EntityDeclHandler handler);
 
 /* OBSOLETE -- OBSOLETE -- OBSOLETE
-   This handler has been superceded by the EntityDeclHandler above.
+   This handler has been superseded by the EntityDeclHandler above.
    It is provided here for backward compatibility.
 
    This is called for a declaration of an unparsed (NDATA) entity.
@@ -973,9 +973,12 @@
 
 /* Exposing the memory handling functions used in Expat */
 XMLPARSEAPI(void *)
+XML_ATTR_MALLOC
+XML_ATTR_ALLOC_SIZE(2)
 XML_MemMalloc(XML_Parser parser, size_t size);
 
 XMLPARSEAPI(void *)
+XML_ATTR_ALLOC_SIZE(3)
 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
 
 XMLPARSEAPI(void)
@@ -1031,14 +1034,12 @@
 XML_GetFeatureList(void);
 
 
-/* Expat follows the GNU/Linux convention of odd number minor version for
-   beta/development releases and even number minor version for stable
-   releases. Micro is bumped with each release, and set to 0 with each
-   change to major or minor version.
+/* Expat follows the semantic versioning convention.
+   See http://semver.org.
 */
 #define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 1
-#define XML_MICRO_VERSION 1
+#define XML_MINOR_VERSION 2
+#define XML_MICRO_VERSION 0
 
 #ifdef __cplusplus
 }

diff --git a/lib/expat_external.h b/lib/expat_external.h
index 2c03284..aa08a2f 100644
--- a/lib/expat_external.h
+++ b/lib/expat_external.h

@@ -65,12 +65,26 @@
 #endif
 #endif  /* not defined XML_STATIC */
 
+#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4)
+#define XMLIMPORT __attribute__ ((visibility ("default")))
+#endif
 
 /* If we didn't define it above, define it away: */
 #ifndef XMLIMPORT
 #define XMLIMPORT
 #endif
 
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define XML_ATTR_MALLOC __attribute__((__malloc__))
+#else
+#define XML_ATTR_MALLOC
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+#define XML_ATTR_ALLOC_SIZE(x)  __attribute__((__alloc_size__(x)))
+#else
+#define XML_ATTR_ALLOC_SIZE(x)
+#endif
 
 #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL
 

diff --git a/lib/internal.h b/lib/internal.h
index dd54548..94cb98e 100644
--- a/lib/internal.h
+++ b/lib/internal.h

@@ -71,3 +71,25 @@
 #define inline
 #endif
 #endif
+
+#ifndef UNUSED_P
+# ifdef __GNUC__
+#  define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__))
+# else
+#  define UNUSED_P(p) UNUSED_ ## p
+# endif
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef);
+
+
+#ifdef __cplusplus
+}
+#endif

diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 0655e08..2f4e725 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c

@@ -729,7 +729,7 @@
   if (sizeof(unsigned long) == 4) {
     return entropy * 2147483647;
   } else {
-    return entropy * 2305843009213693951;
+    return entropy * (unsigned long)2305843009213693951;
   }
 }
 
@@ -1728,14 +1728,17 @@
   }
 
   if (len > bufferLim - bufferEnd) {
-    int neededSize = len + (int)(bufferEnd - bufferPtr);
+#ifdef XML_CONTEXT_BYTES
+    int keep;
+#endif  /* defined XML_CONTEXT_BYTES */
+    /* Do not invoke signed arithmetic overflow: */
+    int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
     if (neededSize < 0) {
       errorCode = XML_ERROR_NO_MEMORY;
       return NULL;
     }
 #ifdef XML_CONTEXT_BYTES
-    int keep = (int)(bufferPtr - buffer);
-
+    keep = (int)(bufferPtr - buffer);
     if (keep > XML_CONTEXT_BYTES)
       keep = XML_CONTEXT_BYTES;
     neededSize += keep;
@@ -1760,7 +1763,8 @@
       if (bufferSize == 0)
         bufferSize = INIT_BUFFER_SIZE;
       do {
-        bufferSize *= 2;
+        /* Do not invoke signed arithmetic overflow: */
+        bufferSize = (int) (2U * (unsigned) bufferSize);
       } while (bufferSize < neededSize && bufferSize > 0);
       if (bufferSize <= 0) {
         errorCode = XML_ERROR_NO_MEMORY;
@@ -1887,7 +1891,7 @@
 XML_GetCurrentByteIndex(XML_Parser parser)
 {
   if (eventPtr)
-    return parseEndByteIndex - (parseEndPtr - eventPtr);
+    return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr));
   return -1;
 }
 
@@ -4959,9 +4963,9 @@
 
 static enum XML_Error PTRCALL
 errorProcessor(XML_Parser parser,
-               const char *s,
-               const char *end,
-               const char **nextPtr)
+               const char *UNUSED_P(s),
+               const char *UNUSED_P(end),
+               const char **UNUSED_P(nextPtr))
 {
   return errorCode;
 }
@@ -6285,7 +6289,7 @@
   }
   if (pool->blocks && pool->start == pool->blocks->s) {
     BLOCK *temp;
-    int blockSize = (int)(pool->end - pool->start)*2;
+    int blockSize = (int)((unsigned)(pool->end - pool->start)*2U);
 
     if (blockSize < 0)
       return XML_FALSE;

diff --git a/lib/xmlrole.c b/lib/xmlrole.c
index 44772e2..8a68e20 100644
--- a/lib/xmlrole.c
+++ b/lib/xmlrole.c

@@ -195,9 +195,9 @@
 static int PTRCALL
 prolog2(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -216,9 +216,9 @@
 static int PTRCALL
 doctype0(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -264,9 +264,9 @@
 static int PTRCALL
 doctype2(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -281,9 +281,9 @@
 static int PTRCALL
 doctype3(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -298,9 +298,9 @@
 static int PTRCALL
 doctype4(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -318,9 +318,9 @@
 static int PTRCALL
 doctype5(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -437,9 +437,9 @@
 static int PTRCALL
 entity0(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -457,9 +457,9 @@
 static int PTRCALL
 entity1(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -502,9 +502,9 @@
 static int PTRCALL
 entity3(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -519,9 +519,9 @@
 static int PTRCALL
 entity4(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -559,9 +559,9 @@
 static int PTRCALL
 entity6(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -605,9 +605,9 @@
 static int PTRCALL
 entity8(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -622,9 +622,9 @@
 static int PTRCALL
 entity9(PROLOG_STATE *state,
         int tok,
-        const char *ptr,
-        const char *end,
-        const ENCODING *enc)
+        const char *UNUSED_P(ptr),
+        const char *UNUSED_P(end),
+        const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -639,9 +639,9 @@
 static int PTRCALL
 entity10(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -656,9 +656,9 @@
 static int PTRCALL
 notation0(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -697,9 +697,9 @@
 static int PTRCALL
 notation2(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -714,9 +714,9 @@
 static int PTRCALL
 notation3(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -732,9 +732,9 @@
 static int PTRCALL
 notation4(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -753,9 +753,9 @@
 static int PTRCALL
 attlist0(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -771,9 +771,9 @@
 static int PTRCALL
 attlist1(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -833,9 +833,9 @@
 static int PTRCALL
 attlist3(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -852,9 +852,9 @@
 static int PTRCALL
 attlist4(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -872,9 +872,9 @@
 static int PTRCALL
 attlist5(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -889,9 +889,9 @@
 static int PTRCALL
 attlist6(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -906,9 +906,9 @@
 static int PTRCALL
 attlist7(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -967,9 +967,9 @@
 static int PTRCALL
 attlist9(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -984,9 +984,9 @@
 static int PTRCALL
 element0(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1072,9 +1072,9 @@
 static int PTRCALL
 element3(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1097,9 +1097,9 @@
 static int PTRCALL
 element4(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1115,9 +1115,9 @@
 static int PTRCALL
 element5(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1136,9 +1136,9 @@
 static int PTRCALL
 element6(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1166,9 +1166,9 @@
 static int PTRCALL
 element7(PROLOG_STATE *state,
          int tok,
-         const char *ptr,
-         const char *end,
-         const ENCODING *enc)
+         const char *UNUSED_P(ptr),
+         const char *UNUSED_P(end),
+         const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1240,9 +1240,9 @@
 static int PTRCALL
 condSect1(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1258,9 +1258,9 @@
 static int PTRCALL
 condSect2(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1277,9 +1277,9 @@
 static int PTRCALL
 declClose(PROLOG_STATE *state,
           int tok,
-          const char *ptr,
-          const char *end,
-          const ENCODING *enc)
+          const char *UNUSED_P(ptr),
+          const char *UNUSED_P(end),
+          const ENCODING *UNUSED_P(enc))
 {
   switch (tok) {
   case XML_TOK_PROLOG_S:
@@ -1292,11 +1292,11 @@
 }
 
 static int PTRCALL
-error(PROLOG_STATE *state,
-      int tok,
-      const char *ptr,
-      const char *end,
-      const ENCODING *enc)
+error(PROLOG_STATE *UNUSED_P(state),
+      int UNUSED_P(tok),
+      const char *UNUSED_P(ptr),
+      const char *UNUSED_P(end),
+      const ENCODING *UNUSED_P(enc))
 {
   return XML_ROLE_NONE;
 }

diff --git a/lib/xmltok.c b/lib/xmltok.c
index cb98ce1..daa3565 100644
--- a/lib/xmltok.c
+++ b/lib/xmltok.c

@@ -46,7 +46,7 @@
 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
 
 #define UCS2_GET_NAMING(pages, hi, lo) \
-   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
 
 /* A 2 byte UTF-8 representation splits the characters 11 bits between
    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
@@ -56,7 +56,7 @@
     (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
                       + ((((byte)[0]) & 3) << 1) \
                       + ((((byte)[1]) >> 5) & 1)] \
-         & (1 << (((byte)[1]) & 0x1F)))
+         & (1u << (((byte)[1]) & 0x1F)))
 
 /* A 3 byte UTF-8 representation splits the characters 16 bits between
    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
@@ -69,7 +69,7 @@
                        << 3) \
                       + ((((byte)[1]) & 3) << 1) \
                       + ((((byte)[2]) >> 5) & 1)] \
-         & (1 << (((byte)[2]) & 0x1F)))
+         & (1u << (((byte)[2]) & 0x1F)))
 
 #define UTF8_GET_NAMING(pages, p, n) \
   ((n) == 2 \
@@ -122,19 +122,19 @@
     ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
 
 static int PTRFASTCALL
-isNever(const ENCODING *enc, const char *p)
+isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
 {
   return 0;
 }
 
 static int PTRFASTCALL
-utf8_isName2(const ENCODING *enc, const char *p)
+utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
 }
 
 static int PTRFASTCALL
-utf8_isName3(const ENCODING *enc, const char *p)
+utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
 }
@@ -142,13 +142,13 @@
 #define utf8_isName4 isNever
 
 static int PTRFASTCALL
-utf8_isNmstrt2(const ENCODING *enc, const char *p)
+utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
 }
 
 static int PTRFASTCALL
-utf8_isNmstrt3(const ENCODING *enc, const char *p)
+utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
 }
@@ -156,19 +156,19 @@
 #define utf8_isNmstrt4 isNever
 
 static int PTRFASTCALL
-utf8_isInvalid2(const ENCODING *enc, const char *p)
+utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_INVALID2((const unsigned char *)p);
 }
 
 static int PTRFASTCALL
-utf8_isInvalid3(const ENCODING *enc, const char *p)
+utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_INVALID3((const unsigned char *)p);
 }
 
 static int PTRFASTCALL
-utf8_isInvalid4(const ENCODING *enc, const char *p)
+utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
 {
   return UTF8_INVALID4((const unsigned char *)p);
 }
@@ -222,6 +222,17 @@
  E ## isInvalid3, \
  E ## isInvalid4
 
+#define NULL_VTABLE \
+ /* isName2 */ NULL, \
+ /* isName3 */ NULL, \
+ /* isName4 */ NULL, \
+ /* isNmstrt2 */ NULL, \
+ /* isNmstrt3 */ NULL, \
+ /* isNmstrt4 */ NULL, \
+ /* isInvalid2 */ NULL, \
+ /* isInvalid3 */ NULL, \
+ /* isInvalid4 */ NULL
+
 static int FASTCALL checkCharRefNumber(int);
 
 #include "xmltok_impl.h"
@@ -318,8 +329,43 @@
   UTF8_cval4 = 0xf0
 };
 
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+{
+  const char * fromLim = *fromLimRef;
+  size_t walked = 0;
+  for (; fromLim > from; fromLim--, walked++) {
+    const unsigned char prev = (unsigned char)fromLim[-1];
+    if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
+      if (walked + 1 >= 4) {
+        fromLim += 4 - 1;
+        break;
+      } else {
+        walked = 0;
+      }
+    } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
+      if (walked + 1 >= 3) {
+        fromLim += 3 - 1;
+        break;
+      } else {
+        walked = 0;
+      }
+    } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
+      if (walked + 1 >= 2) {
+        fromLim += 2 - 1;
+        break;
+      } else {
+        walked = 0;
+      }
+    } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
+      break;
+    }
+  }
+  *fromLimRef = fromLim;
+}
+
 static enum XML_Convert_Result PTRCALL
-utf8_toUtf8(const ENCODING *enc,
+utf8_toUtf8(const ENCODING *UNUSED_P(enc),
             const char **fromP, const char *fromLim,
             char **toP, const char *toLim)
 {
@@ -329,9 +375,8 @@
   if (fromLim - *fromP > toLim - *toP) {
     /* Avoid copying partial characters. */
     res = XML_CONVERT_OUTPUT_EXHAUSTED;
-    for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
-      if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
-        break;
+    fromLim = *fromP + (toLim - *toP);
+    align_limit_to_full_utf8_characters(*fromP, &fromLim);
   }
   for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
     *to = *from;
@@ -449,7 +494,7 @@
 };
 
 static enum XML_Convert_Result PTRCALL
-latin1_toUtf8(const ENCODING *enc,
+latin1_toUtf8(const ENCODING *UNUSED_P(enc),
               const char **fromP, const char *fromLim,
               char **toP, const char *toLim)
 {
@@ -474,7 +519,7 @@
 }
 
 static enum XML_Convert_Result PTRCALL
-latin1_toUtf16(const ENCODING *enc,
+latin1_toUtf16(const ENCODING *UNUSED_P(enc),
                const char **fromP, const char *fromLim,
                unsigned short **toP, const unsigned short *toLim)
 {
@@ -495,7 +540,7 @@
 #include "asciitab.h"
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(sb_)
+  STANDARD_VTABLE(sb_) NULL_VTABLE
 };
 
 #endif
@@ -508,11 +553,11 @@
 #undef BT_COLON
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(sb_)
+  STANDARD_VTABLE(sb_) NULL_VTABLE
 };
 
 static enum XML_Convert_Result PTRCALL
-ascii_toUtf8(const ENCODING *enc,
+ascii_toUtf8(const ENCODING *UNUSED_P(enc),
              const char **fromP, const char *fromLim,
              char **toP, const char *toLim)
 {
@@ -533,7 +578,7 @@
 #include "asciitab.h"
 /* BT_NONXML == 0 */
   },
-  STANDARD_VTABLE(sb_)
+  STANDARD_VTABLE(sb_) NULL_VTABLE
 };
 
 #endif
@@ -546,7 +591,7 @@
 #undef BT_COLON
 /* BT_NONXML == 0 */
   },
-  STANDARD_VTABLE(sb_)
+  STANDARD_VTABLE(sb_) NULL_VTABLE
 };
 
 static int PTRFASTCALL
@@ -570,7 +615,7 @@
 
 #define DEFINE_UTF16_TO_UTF8(E) \
 static enum XML_Convert_Result  PTRCALL \
-E ## toUtf8(const ENCODING *enc, \
+E ## toUtf8(const ENCODING *UNUSED_P(enc), \
             const char **fromP, const char *fromLim, \
             char **toP, const char *toLim) \
 { \
@@ -642,7 +687,7 @@
 
 #define DEFINE_UTF16_TO_UTF16(E) \
 static enum XML_Convert_Result  PTRCALL \
-E ## toUtf16(const ENCODING *enc, \
+E ## toUtf16(const ENCODING *UNUSED_P(enc), \
              const char **fromP, const char *fromLim, \
              unsigned short **toP, const unsigned short *toLim) \
 { \
@@ -776,7 +821,7 @@
 #include "asciitab.h"
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(little2_)
+  STANDARD_VTABLE(little2_) NULL_VTABLE
 };
 
 #endif
@@ -795,7 +840,7 @@
 #undef BT_COLON
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(little2_)
+  STANDARD_VTABLE(little2_) NULL_VTABLE
 };
 
 #if BYTEORDER != 4321
@@ -808,7 +853,7 @@
 #include "iasciitab.h"
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(little2_)
+  STANDARD_VTABLE(little2_) NULL_VTABLE
 };
 
 #endif
@@ -821,7 +866,7 @@
 #undef BT_COLON
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(little2_)
+  STANDARD_VTABLE(little2_) NULL_VTABLE
 };
 
 #endif
@@ -917,7 +962,7 @@
 #include "asciitab.h"
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(big2_)
+  STANDARD_VTABLE(big2_) NULL_VTABLE
 };
 
 #endif
@@ -936,7 +981,7 @@
 #undef BT_COLON
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(big2_)
+  STANDARD_VTABLE(big2_) NULL_VTABLE
 };
 
 #if BYTEORDER != 1234
@@ -949,7 +994,7 @@
 #include "iasciitab.h"
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(big2_)
+  STANDARD_VTABLE(big2_) NULL_VTABLE
 };
 
 #endif
@@ -962,7 +1007,7 @@
 #undef BT_COLON
 #include "latin1tab.h"
   },
-  STANDARD_VTABLE(big2_)
+  STANDARD_VTABLE(big2_) NULL_VTABLE
 };
 
 #endif
@@ -988,7 +1033,7 @@
 }
 
 static void PTRCALL
-initUpdatePosition(const ENCODING *enc, const char *ptr,
+initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
                    const char *end, POSITION *pos)
 {
   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);

diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
index 6c5a3ba..5f779c0 100644
--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c

@@ -87,27 +87,45 @@
 #define PREFIX(ident) ident
 #endif
 
+
+#define HAS_CHARS(enc, ptr, end, count) \
+    (end - ptr >= count * MINBPC(enc))
+
+#define HAS_CHAR(enc, ptr, end) \
+    HAS_CHARS(enc, ptr, end, 1)
+
+#define REQUIRE_CHARS(enc, ptr, end, count) \
+    { \
+      if (! HAS_CHARS(enc, ptr, end, count)) { \
+        return XML_TOK_PARTIAL; \
+      } \
+    }
+
+#define REQUIRE_CHAR(enc, ptr, end) \
+    REQUIRE_CHARS(enc, ptr, end, 1)
+
+
 /* ptr points to character following "<!-" */
 
 static int PTRCALL
 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
                     const char *end, const char **nextTokPtr)
 {
-  if (ptr < end) {
+  if (HAS_CHAR(enc, ptr, end)) {
     if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
       *nextTokPtr = ptr;
       return XML_TOK_INVALID;
     }
     ptr += MINBPC(enc);
-    while (ptr < end) {
+    while (HAS_CHAR(enc, ptr, end)) {
       switch (BYTE_TYPE(enc, ptr)) {
       INVALID_CASES(ptr, nextTokPtr)
       case BT_MINUS:
-        if ((ptr += MINBPC(enc)) == end)
-          return XML_TOK_PARTIAL;
+        ptr += MINBPC(enc);
+        REQUIRE_CHAR(enc, ptr, end);
         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
-          if ((ptr += MINBPC(enc)) == end)
-            return XML_TOK_PARTIAL;
+          ptr += MINBPC(enc);
+          REQUIRE_CHAR(enc, ptr, end);
           if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
             *nextTokPtr = ptr;
             return XML_TOK_INVALID;
@@ -131,8 +149,7 @@
 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
                  const char *end, const char **nextTokPtr)
 {
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   case BT_MINUS:
     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -147,11 +164,10 @@
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     case BT_PERCNT:
-      if (ptr + MINBPC(enc) == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHARS(enc, ptr, end, 2);
       /* don't allow <!ENTITY% foo "whatever"> */
       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
       case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
@@ -175,7 +191,7 @@
 }
 
 static int PTRCALL
-PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
+PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
                       const char *end, int *tokPtr)
 {
   int upper = 0;
@@ -225,15 +241,14 @@
 {
   int tok;
   const char *target = ptr;
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   default:
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     case BT_S: case BT_CR: case BT_LF:
@@ -242,13 +257,12 @@
         return XML_TOK_INVALID;
       }
       ptr += MINBPC(enc);
-      while (ptr < end) {
+      while (HAS_CHAR(enc, ptr, end)) {
         switch (BYTE_TYPE(enc, ptr)) {
         INVALID_CASES(ptr, nextTokPtr)
         case BT_QUEST:
           ptr += MINBPC(enc);
-          if (ptr == end)
-            return XML_TOK_PARTIAL;
+          REQUIRE_CHAR(enc, ptr, end);
           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
             *nextTokPtr = ptr + MINBPC(enc);
             return tok;
@@ -266,8 +280,7 @@
         return XML_TOK_INVALID;
       }
       ptr += MINBPC(enc);
-      if (ptr == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHAR(enc, ptr, end);
       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
         *nextTokPtr = ptr + MINBPC(enc);
         return tok;
@@ -282,15 +295,14 @@
 }
 
 static int PTRCALL
-PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
+PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
                          const char *end, const char **nextTokPtr)
 {
   static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
                                      ASCII_T, ASCII_A, ASCII_LSQB };
   int i;
   /* CDATA[ */
-  if (end - ptr < 6 * MINBPC(enc))
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHARS(enc, ptr, end, 6);
   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
     if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
       *nextTokPtr = ptr;
@@ -319,13 +331,11 @@
   switch (BYTE_TYPE(enc, ptr)) {
   case BT_RSQB:
     ptr += MINBPC(enc);
-    if (ptr == end)
-      return XML_TOK_PARTIAL;
+    REQUIRE_CHAR(enc, ptr, end);
     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
       break;
     ptr += MINBPC(enc);
-    if (ptr == end)
-      return XML_TOK_PARTIAL;
+    REQUIRE_CHAR(enc, ptr, end);
     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       ptr -= MINBPC(enc);
       break;
@@ -334,8 +344,7 @@
     return XML_TOK_CDATA_SECT_CLOSE;
   case BT_CR:
     ptr += MINBPC(enc);
-    if (ptr == end)
-      return XML_TOK_PARTIAL;
+    REQUIRE_CHAR(enc, ptr, end);
     if (BYTE_TYPE(enc, ptr) == BT_LF)
       ptr += MINBPC(enc);
     *nextTokPtr = ptr;
@@ -348,7 +357,7 @@
     ptr += MINBPC(enc);
     break;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
 #define LEAD_CASE(n) \
     case BT_LEAD ## n: \
@@ -383,19 +392,18 @@
 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
                    const char *end, const char **nextTokPtr)
 {
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   default:
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     case BT_S: case BT_CR: case BT_LF:
-      for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) {
+      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
         switch (BYTE_TYPE(enc, ptr)) {
         case BT_S: case BT_CR: case BT_LF:
           break;
@@ -432,7 +440,7 @@
 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
                        const char *end, const char **nextTokPtr)
 {
-  if (ptr < end) {
+  if (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     case BT_DIGIT:
     case BT_HEX:
@@ -441,7 +449,7 @@
       *nextTokPtr = ptr;
       return XML_TOK_INVALID;
     }
-    for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) {
+    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
       switch (BYTE_TYPE(enc, ptr)) {
       case BT_DIGIT:
       case BT_HEX:
@@ -464,7 +472,7 @@
 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
                     const char *end, const char **nextTokPtr)
 {
-  if (ptr < end) {
+  if (HAS_CHAR(enc, ptr, end)) {
     if (CHAR_MATCHES(enc, ptr, ASCII_x))
       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
     switch (BYTE_TYPE(enc, ptr)) {
@@ -474,7 +482,7 @@
       *nextTokPtr = ptr;
       return XML_TOK_INVALID;
     }
-    for (ptr += MINBPC(enc); ptr < end; ptr += MINBPC(enc)) {
+    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
       switch (BYTE_TYPE(enc, ptr)) {
       case BT_DIGIT:
         break;
@@ -496,8 +504,7 @@
 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
                 const char **nextTokPtr)
 {
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   case BT_NUM:
@@ -506,7 +513,7 @@
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     case BT_SEMI:
@@ -529,7 +536,7 @@
 #ifdef XML_NS
   int hadColon = 0;
 #endif
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
 #ifdef XML_NS
@@ -540,8 +547,7 @@
       }
       hadColon = 1;
       ptr += MINBPC(enc);
-      if (ptr == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHAR(enc, ptr, end);
       switch (BYTE_TYPE(enc, ptr)) {
       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       default:
@@ -555,8 +561,7 @@
         int t;
 
         ptr += MINBPC(enc);
-        if (ptr == end)
-          return XML_TOK_PARTIAL;
+        REQUIRE_CHAR(enc, ptr, end);
         t = BYTE_TYPE(enc, ptr);
         if (t == BT_EQUALS)
           break;
@@ -579,8 +584,7 @@
 #endif
         for (;;) {
           ptr += MINBPC(enc);
-          if (ptr == end)
-            return XML_TOK_PARTIAL;
+          REQUIRE_CHAR(enc, ptr, end);
           open = BYTE_TYPE(enc, ptr);
           if (open == BT_QUOT || open == BT_APOS)
             break;
@@ -598,8 +602,7 @@
         /* in attribute value */
         for (;;) {
           int t;
-          if (ptr == end)
-            return XML_TOK_PARTIAL;
+          REQUIRE_CHAR(enc, ptr, end);
           t = BYTE_TYPE(enc, ptr);
           if (t == open)
             break;
@@ -624,8 +627,7 @@
           }
         }
         ptr += MINBPC(enc);
-        if (ptr == end)
-          return XML_TOK_PARTIAL;
+        REQUIRE_CHAR(enc, ptr, end);
         switch (BYTE_TYPE(enc, ptr)) {
         case BT_S:
         case BT_CR:
@@ -642,8 +644,7 @@
         /* ptr points to closing quote */
         for (;;) {
           ptr += MINBPC(enc);
-          if (ptr == end)
-            return XML_TOK_PARTIAL;
+          REQUIRE_CHAR(enc, ptr, end);
           switch (BYTE_TYPE(enc, ptr)) {
           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
           case BT_S: case BT_CR: case BT_LF:
@@ -655,8 +656,7 @@
           case BT_SOL:
           sol:
             ptr += MINBPC(enc);
-            if (ptr == end)
-              return XML_TOK_PARTIAL;
+            REQUIRE_CHAR(enc, ptr, end);
             if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
               *nextTokPtr = ptr;
               return XML_TOK_INVALID;
@@ -688,13 +688,12 @@
 #ifdef XML_NS
   int hadColon;
 #endif
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   case BT_EXCL:
-    if ((ptr += MINBPC(enc)) == end)
-      return XML_TOK_PARTIAL;
+    ptr += MINBPC(enc);
+    REQUIRE_CHAR(enc, ptr, end);
     switch (BYTE_TYPE(enc, ptr)) {
     case BT_MINUS:
       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -716,7 +715,7 @@
   hadColon = 0;
 #endif
   /* we have a start-tag */
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
 #ifdef XML_NS
@@ -727,8 +726,7 @@
       }
       hadColon = 1;
       ptr += MINBPC(enc);
-      if (ptr == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHAR(enc, ptr, end);
       switch (BYTE_TYPE(enc, ptr)) {
       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       default:
@@ -740,7 +738,7 @@
     case BT_S: case BT_CR: case BT_LF:
       {
         ptr += MINBPC(enc);
-        while (ptr < end) {
+        while (HAS_CHAR(enc, ptr, end)) {
           switch (BYTE_TYPE(enc, ptr)) {
           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
           case BT_GT:
@@ -765,8 +763,7 @@
     case BT_SOL:
     sol:
       ptr += MINBPC(enc);
-      if (ptr == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHAR(enc, ptr, end);
       if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
         *nextTokPtr = ptr;
         return XML_TOK_INVALID;
@@ -803,7 +800,7 @@
     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
   case BT_CR:
     ptr += MINBPC(enc);
-    if (ptr == end)
+    if (! HAS_CHAR(enc, ptr, end))
       return XML_TOK_TRAILING_CR;
     if (BYTE_TYPE(enc, ptr) == BT_LF)
       ptr += MINBPC(enc);
@@ -814,12 +811,12 @@
     return XML_TOK_DATA_NEWLINE;
   case BT_RSQB:
     ptr += MINBPC(enc);
-    if (ptr == end)
+    if (! HAS_CHAR(enc, ptr, end))
       return XML_TOK_TRAILING_RSQB;
     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
       break;
     ptr += MINBPC(enc);
-    if (ptr == end)
+    if (! HAS_CHAR(enc, ptr, end))
       return XML_TOK_TRAILING_RSQB;
     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       ptr -= MINBPC(enc);
@@ -832,7 +829,7 @@
     ptr += MINBPC(enc);
     break;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
 #define LEAD_CASE(n) \
     case BT_LEAD ## n: \
@@ -845,12 +842,12 @@
     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
 #undef LEAD_CASE
     case BT_RSQB:
-      if (ptr + MINBPC(enc) != end) {
+      if (HAS_CHARS(enc, ptr, end, 2)) {
          if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
            ptr += MINBPC(enc);
            break;
          }
-         if (ptr + 2*MINBPC(enc) != end) {
+         if (HAS_CHARS(enc, ptr, end, 3)) {
            if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
              ptr += MINBPC(enc);
              break;
@@ -884,8 +881,7 @@
 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
                     const char **nextTokPtr)
 {
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@@ -895,7 +891,7 @@
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     case BT_SEMI:
@@ -913,15 +909,14 @@
 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
                       const char **nextTokPtr)
 {
-  if (ptr == end)
-    return XML_TOK_PARTIAL;
+  REQUIRE_CHAR(enc, ptr, end);
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   default:
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     case BT_CR: case BT_LF: case BT_S:
@@ -941,7 +936,7 @@
                 const char *ptr, const char *end,
                 const char **nextTokPtr)
 {
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     int t = BYTE_TYPE(enc, ptr);
     switch (t) {
     INVALID_CASES(ptr, nextTokPtr)
@@ -950,7 +945,7 @@
       ptr += MINBPC(enc);
       if (t != open)
         break;
-      if (ptr == end)
+      if (! HAS_CHAR(enc, ptr, end))
         return -XML_TOK_LITERAL;
       *nextTokPtr = ptr;
       switch (BYTE_TYPE(enc, ptr)) {
@@ -992,8 +987,7 @@
   case BT_LT:
     {
       ptr += MINBPC(enc);
-      if (ptr == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHAR(enc, ptr, end);
       switch (BYTE_TYPE(enc, ptr)) {
       case BT_EXCL:
         return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
@@ -1021,7 +1015,7 @@
   case BT_S: case BT_LF:
     for (;;) {
       ptr += MINBPC(enc);
-      if (ptr == end)
+      if (! HAS_CHAR(enc, ptr, end))
         break;
       switch (BYTE_TYPE(enc, ptr)) {
       case BT_S: case BT_LF:
@@ -1048,11 +1042,10 @@
     return XML_TOK_OPEN_BRACKET;
   case BT_RSQB:
     ptr += MINBPC(enc);
-    if (ptr == end)
+    if (! HAS_CHAR(enc, ptr, end))
       return -XML_TOK_CLOSE_BRACKET;
     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
-      if (ptr + MINBPC(enc) == end)
-        return XML_TOK_PARTIAL;
+      REQUIRE_CHARS(enc, ptr, end, 2);
       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
         *nextTokPtr = ptr + 2*MINBPC(enc);
         return XML_TOK_COND_SECT_CLOSE;
@@ -1065,7 +1058,7 @@
     return XML_TOK_OPEN_PAREN;
   case BT_RPAR:
     ptr += MINBPC(enc);
-    if (ptr == end)
+    if (! HAS_CHAR(enc, ptr, end))
       return -XML_TOK_CLOSE_PAREN;
     switch (BYTE_TYPE(enc, ptr)) {
     case BT_AST:
@@ -1141,7 +1134,7 @@
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
     case BT_GT: case BT_RPAR: case BT_COMMA:
@@ -1154,8 +1147,7 @@
       ptr += MINBPC(enc);
       switch (tok) {
       case XML_TOK_NAME:
-        if (ptr == end)
-          return XML_TOK_PARTIAL;
+        REQUIRE_CHAR(enc, ptr, end);
         tok = XML_TOK_PREFIXED_NAME;
         switch (BYTE_TYPE(enc, ptr)) {
         CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
@@ -1206,8 +1198,10 @@
   const char *start;
   if (ptr >= end)
     return XML_TOK_NONE;
+  else if (! HAS_CHAR(enc, ptr, end))
+    return XML_TOK_PARTIAL;
   start = ptr;
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
 #define LEAD_CASE(n) \
     case BT_LEAD ## n: ptr += n; break;
@@ -1232,7 +1226,7 @@
     case BT_CR:
       if (ptr == start) {
         ptr += MINBPC(enc);
-        if (ptr == end)
+        if (! HAS_CHAR(enc, ptr, end))
           return XML_TOK_TRAILING_CR;
         if (BYTE_TYPE(enc, ptr) == BT_LF)
           ptr += MINBPC(enc);
@@ -1264,8 +1258,10 @@
   const char *start;
   if (ptr >= end)
     return XML_TOK_NONE;
+  else if (! HAS_CHAR(enc, ptr, end))
+    return XML_TOK_PARTIAL;
   start = ptr;
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
 #define LEAD_CASE(n) \
     case BT_LEAD ## n: ptr += n; break;
@@ -1294,7 +1290,7 @@
     case BT_CR:
       if (ptr == start) {
         ptr += MINBPC(enc);
-        if (ptr == end)
+        if (! HAS_CHAR(enc, ptr, end))
           return XML_TOK_TRAILING_CR;
         if (BYTE_TYPE(enc, ptr) == BT_LF)
           ptr += MINBPC(enc);
@@ -1326,15 +1322,15 @@
       end = ptr + n;
     }
   }
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
     INVALID_CASES(ptr, nextTokPtr)
     case BT_LT:
-      if ((ptr += MINBPC(enc)) == end)
-        return XML_TOK_PARTIAL;
+      ptr += MINBPC(enc);
+      REQUIRE_CHAR(enc, ptr, end);
       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
-        if ((ptr += MINBPC(enc)) == end)
-          return XML_TOK_PARTIAL;
+        ptr += MINBPC(enc);
+        REQUIRE_CHAR(enc, ptr, end);
         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
           ++level;
           ptr += MINBPC(enc);
@@ -1342,11 +1338,11 @@
       }
       break;
     case BT_RSQB:
-      if ((ptr += MINBPC(enc)) == end)
-        return XML_TOK_PARTIAL;
+      ptr += MINBPC(enc);
+      REQUIRE_CHAR(enc, ptr, end);
       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
-        if ((ptr += MINBPC(enc)) == end)
-          return XML_TOK_PARTIAL;
+        ptr += MINBPC(enc);
+        REQUIRE_CHAR(enc, ptr, end);
         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
           ptr += MINBPC(enc);
           if (level == 0) {
@@ -1373,7 +1369,7 @@
 {
   ptr += MINBPC(enc);
   end -= MINBPC(enc);
-  for (; ptr < end; ptr += MINBPC(enc)) {
+  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
     switch (BYTE_TYPE(enc, ptr)) {
     case BT_DIGIT:
     case BT_HEX:
@@ -1521,7 +1517,7 @@
 }
 
 static int PTRFASTCALL
-PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
+PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
 {
   int result = 0;
   /* skip &# */
@@ -1565,7 +1561,7 @@
 }
 
 static int PTRCALL
-PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
+PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
                              const char *end)
 {
   switch ((end - ptr)/MINBPC(enc)) {
@@ -1683,11 +1679,11 @@
 }
 
 static int PTRCALL
-PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
+PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
                          const char *end1, const char *ptr2)
 {
   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
-    if (ptr1 == end1)
+    if (end1 - ptr1 < MINBPC(enc))
       return 0;
     if (!CHAR_MATCHES(enc, ptr1, *ptr2))
       return 0;
@@ -1744,7 +1740,7 @@
                        const char *end,
                        POSITION *pos)
 {
-  while (ptr < end) {
+  while (HAS_CHAR(enc, ptr, end)) {
     switch (BYTE_TYPE(enc, ptr)) {
 #define LEAD_CASE(n) \
     case BT_LEAD ## n: \
@@ -1760,7 +1756,7 @@
     case BT_CR:
       pos->lineNumber++;
       ptr += MINBPC(enc);
-      if (ptr < end && BYTE_TYPE(enc, ptr) == BT_LF)
+      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
         ptr += MINBPC(enc);
       pos->columnNumber = (XML_Size)-1;
       break;

diff --git a/tests/chardata.c b/tests/chardata.c
index 02243e0..012499b 100644
--- a/tests/chardata.c
+++ b/tests/chardata.c

@@ -47,7 +47,7 @@
     if ((len + storage->count) > maxchars) {
         len = (maxchars - storage->count);
     }
-    if (len + storage->count < sizeof(storage->data)) {
+    if (len + storage->count < (int)sizeof(storage->data)) {
         memcpy(storage->data + storage->count, s, len);
         storage->count += len;
     }
@@ -68,7 +68,7 @@
     if ((len + storage->count) > maxchars) {
         len = (maxchars - storage->count);
     }
-    if (len + storage->count < sizeof(storage->data)) {
+    if (len + storage->count < (int)sizeof(storage->data)) {
         memcpy(storage->data + storage->count, s,
                len * sizeof(storage->data[0]));
         storage->count += len;

diff --git a/tests/minicheck.c b/tests/minicheck.c
index d2f4295..5a1f5ed 100644
--- a/tests/minicheck.c
+++ b/tests/minicheck.c

@@ -10,10 +10,11 @@
 #include <setjmp.h>
 #include <assert.h>
 
+#include "internal.h"  /* for UNUSED_P only */
 #include "minicheck.h"
 
 Suite *
-suite_create(char *name)
+suite_create(const char *name)
 {
     Suite *suite = (Suite *) calloc(1, sizeof(Suite));
     if (suite != NULL) {
@@ -23,7 +24,7 @@
 }
 
 TCase *
-tcase_create(char *name)
+tcase_create(const char *name)
 {
     TCase *tc = (TCase *) calloc(1, sizeof(TCase));
     if (tc != NULL) {
@@ -156,7 +157,7 @@
 }
 
 void
-_fail_unless(int condition, const char *file, int line, char *msg)
+_fail_unless(int UNUSED_P(condition), const char *UNUSED_P(file), int UNUSED_P(line), const char *msg)
 {
     /* Always print the error message so it isn't lost.  In this case,
        we have a failure, so there's no reason to be quiet about what

diff --git a/tests/minicheck.h b/tests/minicheck.h
index c917c02..9b06f51 100644
--- a/tests/minicheck.h
+++ b/tests/minicheck.h

@@ -26,6 +26,11 @@
 #define __func__ __FUNCTION__
 #endif
 
+/* ISO C90 does not support '__func__' predefined identifier */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ < 199901)
+# define __func__ "(unknown)"
+#endif
+
 #define START_TEST(testname) static void testname(void) { \
     _check_set_test_info(__func__, __FILE__, __LINE__);   \
     {
@@ -48,12 +53,12 @@
 };
 
 struct Suite {
-    char *name;
+    const char *name;
     TCase *tests;
 };
 
 struct TCase {
-    char *name;
+    const char *name;
     tcase_setup_function setup;
     tcase_teardown_function teardown;
     tcase_test_function *tests;
@@ -72,9 +77,9 @@
  * Prototypes for the actual implementation.
  */
 
-void _fail_unless(int condition, const char *file, int line, char *msg);
-Suite *suite_create(char *name);
-TCase *tcase_create(char *name);
+void _fail_unless(int condition, const char *file, int line, const char *msg);
+Suite *suite_create(const char *name);
+TCase *tcase_create(const char *name);
 void suite_add_tcase(Suite *suite, TCase *tc);
 void tcase_add_checked_fixture(TCase *,
                                tcase_setup_function,

diff --git a/tests/runtests.c b/tests/runtests.c
index 614d6b2..c0cdea9 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c

@@ -13,9 +13,14 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
+#include <stddef.h>  /* ptrdiff_t */
+#ifndef __cplusplus
+# include <stdbool.h>
+#endif
 
 #include "expat.h"
 #include "chardata.h"
+#include "internal.h"  /* for UNUSED_P only */
 #include "minicheck.h"
 
 #if defined(__amigaos__) && defined(__USE_INLINE__)
@@ -66,13 +71,34 @@
     _fail_unless(0, file, line, buffer);
 }
 
+static enum XML_Status
+_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
+{
+    enum XML_Status res = XML_STATUS_ERROR;
+    int offset = 0;
+
+    if (len == 0) {
+        return XML_Parse(parser, s, len, isFinal);
+    }
+
+    for (; offset < len; offset++) {
+        const int innerIsFinal = (offset == len - 1) && isFinal;
+        const char c = s[offset]; /* to help out-of-bounds detection */
+        res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
+        if (res != XML_STATUS_OK) {
+            return res;
+        }
+    }
+    return res;
+}
+
 #define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
 
 static void
-_expect_failure(char *text, enum XML_Error errorCode, char *errorMessage,
-                char *file, int lineno)
+_expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
+                const char *file, int lineno)
 {
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
         /* Hackish use of _fail_unless() macro, but let's us report
            the right filename and line number. */
         _fail_unless(0, file, lineno, errorMessage);
@@ -89,63 +115,63 @@
 */
 
 static void XMLCALL
-dummy_start_doctype_handler(void           *userData,
-                            const XML_Char *doctypeName,
-                            const XML_Char *sysid,
-                            const XML_Char *pubid,
-                            int            has_internal_subset)
+dummy_start_doctype_handler(void           *UNUSED_P(userData),
+                            const XML_Char *UNUSED_P(doctypeName),
+                            const XML_Char *UNUSED_P(sysid),
+                            const XML_Char *UNUSED_P(pubid),
+                            int            UNUSED_P(has_internal_subset))
 {}
 
 static void XMLCALL
-dummy_end_doctype_handler(void *userData)
+dummy_end_doctype_handler(void *UNUSED_P(userData))
 {}
 
 static void XMLCALL
-dummy_entity_decl_handler(void           *userData,
-                          const XML_Char *entityName,
-                          int            is_parameter_entity,
-                          const XML_Char *value,
-                          int            value_length,
-                          const XML_Char *base,
-                          const XML_Char *systemId,
-                          const XML_Char *publicId,
-                          const XML_Char *notationName)
+dummy_entity_decl_handler(void           *UNUSED_P(userData),
+                          const XML_Char *UNUSED_P(entityName),
+                          int            UNUSED_P(is_parameter_entity),
+                          const XML_Char *UNUSED_P(value),
+                          int            UNUSED_P(value_length),
+                          const XML_Char *UNUSED_P(base),
+                          const XML_Char *UNUSED_P(systemId),
+                          const XML_Char *UNUSED_P(publicId),
+                          const XML_Char *UNUSED_P(notationName))
 {}
 
 static void XMLCALL
-dummy_notation_decl_handler(void *userData,
-                            const XML_Char *notationName,
-                            const XML_Char *base,
-                            const XML_Char *systemId,
-                            const XML_Char *publicId)
+dummy_notation_decl_handler(void *UNUSED_P(userData),
+                            const XML_Char *UNUSED_P(notationName),
+                            const XML_Char *UNUSED_P(base),
+                            const XML_Char *UNUSED_P(systemId),
+                            const XML_Char *UNUSED_P(publicId))
 {}
 
 static void XMLCALL
-dummy_element_decl_handler(void *userData,
-                           const XML_Char *name,
-                           XML_Content *model)
+dummy_element_decl_handler(void *UNUSED_P(userData),
+                           const XML_Char *UNUSED_P(name),
+                           XML_Content *UNUSED_P(model))
 {}
 
 static void XMLCALL
-dummy_attlist_decl_handler(void           *userData,
-                           const XML_Char *elname,
-                           const XML_Char *attname,
-                           const XML_Char *att_type,
-                           const XML_Char *dflt,
-                           int            isrequired)
+dummy_attlist_decl_handler(void           *UNUSED_P(userData),
+                           const XML_Char *UNUSED_P(elname),
+                           const XML_Char *UNUSED_P(attname),
+                           const XML_Char *UNUSED_P(att_type),
+                           const XML_Char *UNUSED_P(dflt),
+                           int            UNUSED_P(isrequired))
 {}
 
 static void XMLCALL
-dummy_comment_handler(void *userData, const XML_Char *data)
+dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
 {}
 
 static void XMLCALL
-dummy_pi_handler(void *userData, const XML_Char *target, const XML_Char *data)
+dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
 {}
 
 static void XMLCALL
-dummy_start_element(void *userData,
-                    const XML_Char *name, const XML_Char **atts)
+dummy_start_element(void *UNUSED_P(userData),
+                    const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
 {}
 
 
@@ -158,7 +184,7 @@
     char text[] = "<doc>\0</doc>";
 
     /* test that a NUL byte (in US-ASCII data) is an error */
-    if (XML_Parse(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
         fail("Parser did not report error on NUL-byte.");
     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
         xml_failure(parser);
@@ -178,9 +204,9 @@
 START_TEST(test_bom_utf8)
 {
     /* This test is really just making sure we don't core on a UTF-8 BOM. */
-    char *text = "\357\273\277<e/>";
+    const char *text = "\357\273\277<e/>";
 
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -189,7 +215,7 @@
 {
     char text[] = "\376\377\0<\0e\0/\0>";
 
-    if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -198,7 +224,7 @@
 {
     char text[] = "\377\376<\0e\0/\0>\0";
 
-    if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -210,7 +236,7 @@
 }
 
 static void XMLCALL
-accumulate_attribute(void *userData, const XML_Char *name,
+accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
                      const XML_Char **atts)
 {
     CharData *storage = (CharData *)userData;
@@ -222,7 +248,7 @@
 
 
 static void
-_run_character_check(XML_Char *text, XML_Char *expected,
+_run_character_check(const XML_Char *text, const XML_Char *expected,
                      const char *file, int line)
 {
     CharData storage;
@@ -230,7 +256,7 @@
     CharData_Init(&storage);
     XML_SetUserData(parser, &storage);
     XML_SetCharacterDataHandler(parser, accumulate_characters);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         _xml_failure(parser, file, line);
     CharData_CheckXMLChars(&storage, expected);
 }
@@ -239,7 +265,7 @@
         _run_character_check(text, expected, __FILE__, __LINE__)
 
 static void
-_run_attribute_check(XML_Char *text, XML_Char *expected,
+_run_attribute_check(const XML_Char *text, const XML_Char *expected,
                      const char *file, int line)
 {
     CharData storage;
@@ -247,7 +273,7 @@
     CharData_Init(&storage);
     XML_SetUserData(parser, &storage);
     XML_SetStartElementHandler(parser, accumulate_attribute);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         _xml_failure(parser, file, line);
     CharData_CheckXMLChars(&storage, expected);
 }
@@ -258,7 +284,7 @@
 /* Regression test for SF bug #491986. */
 START_TEST(test_danish_latin1)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
         "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
     run_character_check(text,
@@ -270,7 +296,7 @@
 /* Regression test for SF bug #514281. */
 START_TEST(test_french_charref_hexidecimal)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
         "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
     run_character_check(text,
@@ -280,7 +306,7 @@
 
 START_TEST(test_french_charref_decimal)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
         "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
     run_character_check(text,
@@ -290,7 +316,7 @@
 
 START_TEST(test_french_latin1)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
         "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
     run_character_check(text,
@@ -300,7 +326,7 @@
 
 START_TEST(test_french_utf8)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='utf-8'?>\n"
         "<doc>\xC3\xA9</doc>";
     run_character_check(text, "\xC3\xA9");
@@ -314,7 +340,7 @@
 */
 START_TEST(test_utf8_false_rejection)
 {
-    char *text = "<doc>\xEF\xBA\xBF</doc>";
+    const char *text = "<doc>\xEF\xBA\xBF</doc>";
     run_character_check(text, "\xEF\xBA\xBF");
 }
 END_TEST
@@ -331,7 +357,7 @@
 
     for (i = 128; i <= 255; ++i) {
         sprintf(text, "<e>%ccd</e>", i);
-        if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
+        if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
             sprintf(text,
                     "expected token error for '%c' (ordinal %d) in UTF-8 text",
                     i, i);
@@ -345,6 +371,68 @@
 }
 END_TEST
 
+
+/* Examples, not masks: */
+#define UTF8_LEAD_1  "\x7f"  /* 0b01111111 */
+#define UTF8_LEAD_2  "\xdf"  /* 0b11011111 */
+#define UTF8_LEAD_3  "\xef"  /* 0b11101111 */
+#define UTF8_LEAD_4  "\xf7"  /* 0b11110111 */
+#define UTF8_FOLLOW  "\xbf"  /* 0b10111111 */
+
+START_TEST(test_utf8_auto_align)
+{
+    struct TestCase {
+        ptrdiff_t expectedMovementInChars;
+        const char * input;
+    };
+
+    struct TestCase cases[] = {
+        {00, ""},
+
+        {00, UTF8_LEAD_1},
+
+        {-1, UTF8_LEAD_2},
+        {00, UTF8_LEAD_2 UTF8_FOLLOW},
+
+        {-1, UTF8_LEAD_3},
+        {-2, UTF8_LEAD_3 UTF8_FOLLOW},
+        {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
+
+        {-1, UTF8_LEAD_4},
+        {-2, UTF8_LEAD_4 UTF8_FOLLOW},
+        {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
+        {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
+    };
+
+    size_t i = 0;
+    bool success = true;
+    for (; i < sizeof(cases) / sizeof(*cases); i++) {
+        const char * fromLim = cases[i].input + strlen(cases[i].input);
+        const char * const fromLimInitially = fromLim;
+        ptrdiff_t actualMovementInChars;
+
+        align_limit_to_full_utf8_characters(cases[i].input, &fromLim);
+
+        actualMovementInChars = (fromLim - fromLimInitially);
+        if (actualMovementInChars != cases[i].expectedMovementInChars) {
+            size_t j = 0;
+            success = false;
+            printf("[-] UTF-8 case %2lu: Expected movement by %2ld chars"
+                    ", actually moved by %2ld chars: \"",
+                    i + 1, cases[i].expectedMovementInChars, actualMovementInChars);
+            for (; j < strlen(cases[i].input); j++) {
+                printf("\\x%02x", (unsigned char)cases[i].input[j]);
+            }
+            printf("\"\n");
+        }
+    }
+
+    if (! success) {
+        fail("UTF-8 auto-alignment is not bullet-proof\n");
+    }
+}
+END_TEST
+
 START_TEST(test_utf16)
 {
     /* <?xml version="1.0" encoding="UTF-16"?>
@@ -358,7 +446,7 @@
         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'"
         "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/"
         "\000d\000o\000c\000>";
-    if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -373,12 +461,12 @@
 
     if (first_chunk_bytes >= sizeof(text) - 1)
         fail("bad value of first_chunk_bytes");
-    if (  XML_Parse(parser, text, first_chunk_bytes, XML_FALSE)
+    if (  _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
           == XML_STATUS_ERROR)
         xml_failure(parser);
     else {
         enum XML_Status rc;
-        rc = XML_Parse(parser, text + first_chunk_bytes,
+        rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
                        sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
         if (rc == XML_STATUS_ERROR)
             xml_failure(parser);
@@ -389,11 +477,11 @@
 /* Regression test for SF bug #481609, #774028. */
 START_TEST(test_latin1_umlauts)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
-    char *utf8 =
+    const char *utf8 =
         "\xC3\xA4 \xC3\xB6 \xC3\xBC "
         "\xC3\xA4 \xC3\xB6 \xC3\xBC "
         "\xC3\xA4 \xC3\xB6 \xC3\xBC >";
@@ -406,13 +494,13 @@
 /* Regression test #1 for SF bug #653180. */
 START_TEST(test_line_number_after_parse)
 {  
-    char *text =
+    const char *text =
         "<tag>\n"
         "\n"
         "\n</tag>";
     XML_Size lineno;
 
-    if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
         xml_failure(parser);
     lineno = XML_GetCurrentLineNumber(parser);
     if (lineno != 4) {
@@ -427,10 +515,10 @@
 /* Regression test #2 for SF bug #653180. */
 START_TEST(test_column_number_after_parse)
 {
-    char *text = "<tag></tag>";
+    const char *text = "<tag></tag>";
     XML_Size colno;
 
-    if (XML_Parse(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
         xml_failure(parser);
     colno = XML_GetCurrentColumnNumber(parser);
     if (colno != 11) {
@@ -444,7 +532,7 @@
 
 static void XMLCALL
 start_element_event_handler2(void *userData, const XML_Char *name,
-			     const XML_Char **attr)
+			     const XML_Char **UNUSED_P(attr))
 {
     CharData *storage = (CharData *) userData;
     char buffer[100];
@@ -474,7 +562,7 @@
 /* Regression test #3 for SF bug #653180. */
 START_TEST(test_line_and_column_numbers_inside_handlers)
 {
-    char *text =
+    const char *text =
         "<a>\n"        /* Unix end-of-line */
         "  <b>\r\n"    /* Windows end-of-line */
         "    <c/>\r"   /* Mac OS end-of-line */
@@ -483,7 +571,7 @@
         "    <f/>\n"
         "  </d>\n"
         "</a>";
-    char *expected =
+    const char *expected =
         "<a> at col:0 line:1\n"
         "<b> at col:2 line:2\n"
         "<c> at col:4 line:3\n"
@@ -500,7 +588,7 @@
     XML_SetUserData(parser, &storage);
     XML_SetStartElementHandler(parser, start_element_event_handler2);
     XML_SetEndElementHandler(parser, end_element_event_handler2);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 
     CharData_CheckString(&storage, expected); 
@@ -510,12 +598,12 @@
 /* Regression test #4 for SF bug #653180. */
 START_TEST(test_line_number_after_error)
 {
-    char *text =
+    const char *text =
         "<a>\n"
         "  <b>\n"
         "  </a>";  /* missing </b> */
     XML_Size lineno;
-    if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
         fail("Expected a parse error");
 
     lineno = XML_GetCurrentLineNumber(parser);
@@ -530,12 +618,12 @@
 /* Regression test #5 for SF bug #653180. */
 START_TEST(test_column_number_after_error)
 {
-    char *text =
+    const char *text =
         "<a>\n"
         "  <b>\n"
         "  </a>";  /* missing </b> */
     XML_Size colno;
-    if (XML_Parse(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
         fail("Expected a parse error");
 
     colno = XML_GetCurrentColumnNumber(parser);
@@ -556,7 +644,7 @@
        really cheesy approach to building the input buffer, because
        this avoids writing bugs in buffer-filling code.
     */
-    char *text =
+    const char *text =
         "<e>"
         /* 64 chars */
         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
@@ -578,7 +666,7 @@
         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
         "</e>";
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -598,14 +686,14 @@
 
 START_TEST(test_end_element_events)
 {
-    char *text = "<a><b><c/></b><d><f/></d></a>";
-    char *expected = "/c/b/f/d/a";
+    const char *text = "<a><b><c/></b><d><f/></d></a>";
+    const char *expected = "/c/b/f/d/a";
     CharData storage;
 
     CharData_Init(&storage);
     XML_SetUserData(parser, &storage);
     XML_SetEndElementHandler(parser, end_element_event_handler);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
     CharData_CheckString(&storage, expected);
 }
@@ -679,8 +767,8 @@
 }
 
 static void XMLCALL
-check_attr_contains_normalized_whitespace(void *userData,
-                                          const XML_Char *name,
+check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
+                                          const XML_Char *UNUSED_P(name),
                                           const XML_Char **atts)
 {
     int i;
@@ -702,7 +790,7 @@
 
 START_TEST(test_attr_whitespace_normalization)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc [\n"
         "  <!ATTLIST doc\n"
         "            attr NMTOKENS #REQUIRED\n"
@@ -718,7 +806,7 @@
 
     XML_SetStartElementHandler(parser,
                                check_attr_contains_normalized_whitespace);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -740,7 +828,7 @@
 
 /* Regression test for SF bug #584832. */
 static int XMLCALL
-UnknownEncodingHandler(void *data,const XML_Char *encoding,XML_Encoding *info)
+UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
 {
     if (strcmp(encoding,"unsupported-encoding") == 0) {
         int i;
@@ -756,13 +844,13 @@
 
 START_TEST(test_unknown_encoding_internal_entity)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
         "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
         "<test a='&foo;'/>";
 
     XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -771,14 +859,14 @@
 static int XMLCALL
 external_entity_loader_set_encoding(XML_Parser parser,
                                     const XML_Char *context,
-                                    const XML_Char *base,
-                                    const XML_Char *systemId,
-                                    const XML_Char *publicId)
+                                    const XML_Char *UNUSED_P(base),
+                                    const XML_Char *UNUSED_P(systemId),
+                                    const XML_Char *UNUSED_P(publicId))
 {
     /* This text says it's an unsupported encoding, but it's really
        UTF-8, which we tell Expat using XML_SetEncoding().
     */
-    char *text =
+    const char *text =
         "<?xml encoding='iso-8859-3'?>"
         "\xC3\xA9";
     XML_Parser extparser;
@@ -788,7 +876,7 @@
         fail("Could not create external entity parser.");
     if (!XML_SetEncoding(extparser, "utf-8"))
         fail("XML_SetEncoding() ignored for external entity");
-    if (  XML_Parse(extparser, text, strlen(text), XML_TRUE)
+    if (  _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
           == XML_STATUS_ERROR) {
         xml_failure(parser);
         return 0;
@@ -798,7 +886,7 @@
 
 START_TEST(test_ext_entity_set_encoding)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc [\n"
         "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n"
         "]>\n"
@@ -814,11 +902,11 @@
    read an external subset.  This was fixed in Expat 1.95.5.
 */
 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc SYSTEM 'foo'>\n"
         "<doc>&entity;</doc>";
 
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -837,7 +925,7 @@
    read an external subset, but have been declared standalone.
 */
 START_TEST(test_wfc_undeclared_entity_standalone) {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
         "<!DOCTYPE doc SYSTEM 'foo'>\n"
         "<doc>&entity;</doc>";
@@ -851,9 +939,9 @@
 static int XMLCALL
 external_entity_loader(XML_Parser parser,
                        const XML_Char *context,
-                       const XML_Char *base,
-                       const XML_Char *systemId,
-                       const XML_Char *publicId)
+                       const XML_Char *UNUSED_P(base),
+                       const XML_Char *UNUSED_P(systemId),
+                       const XML_Char *UNUSED_P(publicId))
 {
     char *text = (char *)XML_GetUserData(parser);
     XML_Parser extparser;
@@ -861,7 +949,7 @@
     extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
     if (extparser == NULL)
         fail("Could not create external entity parser.");
-    if (  XML_Parse(extparser, text, strlen(text), XML_TRUE)
+    if (  _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
           == XML_STATUS_ERROR) {
         xml_failure(parser);
         return XML_STATUS_ERROR;
@@ -873,11 +961,11 @@
    an external subset, and standalone is true.
 */
 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
         "<!DOCTYPE doc SYSTEM 'foo'>\n"
         "<doc>&entity;</doc>";
-    char *foo_text =
+    char foo_text[] =
         "<!ELEMENT doc (#PCDATA)*>";
 
     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -893,24 +981,24 @@
    an external subset, and standalone is false.
 */
 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
-    char *text =
+    const char *text =
         "<?xml version='1.0' encoding='us-ascii'?>\n"
         "<!DOCTYPE doc SYSTEM 'foo'>\n"
         "<doc>&entity;</doc>";
-    char *foo_text =
+    char foo_text[] =
         "<!ELEMENT doc (#PCDATA)*>";
 
     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
     XML_SetUserData(parser, foo_text);
     XML_SetExternalEntityRefHandler(parser, external_entity_loader);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
 
 START_TEST(test_wfc_no_recursive_entity_refs)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc [\n"
         "  <!ENTITY entity '&#38;entity;'>\n"
         "]>\n"
@@ -925,7 +1013,7 @@
 /* Regression test for SF bug #483514. */
 START_TEST(test_dtd_default_handling)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc [\n"
         "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n"
         "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n"
@@ -957,12 +1045,12 @@
 */
 START_TEST(test_empty_ns_without_namespaces)
 {
-    char *text =
+    const char *text =
         "<doc xmlns:prefix='http://www.example.com/'>\n"
         "  <e xmlns:prefix=''/>\n"
         "</doc>";
 
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -973,19 +1061,19 @@
 */
 START_TEST(test_ns_in_attribute_default_without_namespaces)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE e:element [\n"
         "  <!ATTLIST e:element\n"
         "    xmlns:e CDATA 'http://example.com/'>\n"
         "      ]>\n"
         "<e:element/>";
 
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
 
-static char *long_character_data_text =
+static const char *long_character_data_text =
     "<?xml version='1.0' encoding='iso-8859-1'?><s>"
     "012345678901234567890123456789012345678901234567890123456789"
     "012345678901234567890123456789012345678901234567890123456789"
@@ -1012,8 +1100,8 @@
 static XML_Bool resumable = XML_FALSE;
 
 static void
-clearing_aborting_character_handler(void *userData,
-                                    const XML_Char *s, int len)
+clearing_aborting_character_handler(void *UNUSED_P(userData),
+                                    const XML_Char *UNUSED_P(s), int UNUSED_P(len))
 {
     XML_StopParser(parser, resumable);
     XML_SetCharacterDataHandler(parser, NULL);
@@ -1029,11 +1117,11 @@
        handler must stop the parser and clear the character data
        handler.
     */
-    char *text = long_character_data_text;
+    const char *text = long_character_data_text;
 
     XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
     resumable = XML_FALSE;
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
         xml_failure(parser);
     if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
         xml_failure(parser);
@@ -1050,17 +1138,112 @@
        handler must stop the parser and clear the character data
        handler.
     */
-    char *text = long_character_data_text;
+    const char *text = long_character_data_text;
 
     XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
     resumable = XML_TRUE;
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
         xml_failure(parser);
     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
         xml_failure(parser);
 }
 END_TEST
 
+START_TEST(test_good_cdata_ascii)
+{
+    const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
+    const char *expected = "<greeting>Hello, world!</greeting>";
+
+    CharData storage;
+    CharData_Init(&storage);
+    XML_SetUserData(parser, &storage);
+    XML_SetCharacterDataHandler(parser, accumulate_characters);
+
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+        xml_failure(parser);
+    CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+START_TEST(test_good_cdata_utf16)
+{
+    /* Test data is:
+     *   <?xml version='1.0' encoding='utf-16'?>
+     *   <a><![CDATA[hello]]></a>
+     */
+    const char text[] =
+            "\0<\0?\0x\0m\0l\0"
+                " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
+                " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
+                "\0?\0>\0\n"
+            "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
+    const char *expected = "hello";
+
+    CharData storage;
+    CharData_Init(&storage);
+    XML_SetUserData(parser, &storage);
+    XML_SetCharacterDataHandler(parser, accumulate_characters);
+
+    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
+        xml_failure(parser);
+    CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+START_TEST(test_bad_cdata)
+{
+    struct CaseData {
+        const char *text;
+        enum XML_Error expectedError;
+    };
+
+    struct CaseData cases[] = {
+        {"<a><", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
+        {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
+
+        {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
+        {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
+        {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
+
+        {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
+        {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
+        {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
+        {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
+        {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
+        {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
+        {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
+
+        {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
+        {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
+        {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
+    };
+
+    size_t i = 0;
+    for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
+        const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
+                parser, cases[i].text, strlen(cases[i].text), XML_TRUE);
+        const enum XML_Error actualError = XML_GetErrorCode(parser);
+
+        assert(actualStatus == XML_STATUS_ERROR);
+
+        if (actualError != cases[i].expectedError) {
+            char message[100];
+            sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
+                    cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
+            fail(message);
+        }
+
+        XML_ParserReset(parser, NULL);
+    }
+}
+END_TEST
+
 
 /*
  * Namespaces tests.
@@ -1118,17 +1301,17 @@
 
 START_TEST(test_return_ns_triplet)
 {
-    char *text =
+    const char *text =
         "<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n"
         "       xmlns:bar='http://expat.sf.net/'></foo:e>";
-    char *elemstr[] = {
+    const char *elemstr[] = {
         "http://expat.sf.net/ e foo",
         "http://expat.sf.net/ a bar"
     };
     XML_SetReturnNSTriplet(parser, XML_TRUE);
     XML_SetUserData(parser, elemstr);
     XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -1158,14 +1341,14 @@
 }
 
 static void
-run_ns_tagname_overwrite_test(char *text, char *result)
+run_ns_tagname_overwrite_test(const char *text, const char *result)
 {
     CharData storage;
     CharData_Init(&storage);
     XML_SetUserData(parser, &storage);
     XML_SetElementHandler(parser,
                           overwrite_start_checker, overwrite_end_checker);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
     CharData_CheckString(&storage, result);
 }
@@ -1173,12 +1356,12 @@
 /* Regression test for SF bug #566334. */
 START_TEST(test_ns_tagname_overwrite)
 {
-    char *text =
+    const char *text =
         "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
         "  <n:f n:attr='foo'/>\n"
         "  <n:g n:attr2='bar'/>\n"
         "</n:e>";
-    char *result =
+    const char *result =
         "start http://xml.libexpat.org/ e\n"
         "start http://xml.libexpat.org/ f\n"
         "attribute http://xml.libexpat.org/ attr\n"
@@ -1194,12 +1377,12 @@
 /* Regression test for SF bug #566334. */
 START_TEST(test_ns_tagname_overwrite_triplet)
 {
-    char *text =
+    const char *text =
         "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
         "  <n:f n:attr='foo'/>\n"
         "  <n:g n:attr2='bar'/>\n"
         "</n:e>";
-    char *result =
+    const char *result =
         "start http://xml.libexpat.org/ e n\n"
         "start http://xml.libexpat.org/ f n\n"
         "attribute http://xml.libexpat.org/ attr n\n"
@@ -1216,8 +1399,8 @@
 
 /* Regression test for SF bug #620343. */
 static void XMLCALL
-start_element_fail(void *userData,
-                   const XML_Char *name, const XML_Char **atts)
+start_element_fail(void *UNUSED_P(userData),
+                   const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
 {
     /* We should never get here. */
     fail("should never reach start_element_fail()");
@@ -1225,8 +1408,8 @@
 
 static void XMLCALL
 start_ns_clearing_start_element(void *userData,
-                                const XML_Char *prefix,
-                                const XML_Char *uri)
+                                const XML_Char *UNUSED_P(prefix),
+                                const XML_Char *UNUSED_P(uri))
 {
     XML_SetStartElementHandler((XML_Parser) userData, NULL);
 }
@@ -1237,12 +1420,12 @@
        syntax doesn't cause the problematic path through Expat to be
        taken.
     */
-    char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
+    const char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
 
     XML_SetStartElementHandler(parser, start_element_fail);
     XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element);
     XML_UseParserAsHandlerArg(parser);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -1251,12 +1434,12 @@
 static int XMLCALL
 external_entity_handler(XML_Parser parser,
                         const XML_Char *context,
-                        const XML_Char *base,
-                        const XML_Char *systemId,
-                        const XML_Char *publicId) 
+                        const XML_Char *UNUSED_P(base),
+                        const XML_Char *UNUSED_P(systemId),
+                        const XML_Char *UNUSED_P(publicId))
 {
     intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser);
-    char *text;
+    const char *text;
     XML_Parser p2;
 
     if (callno == 1)
@@ -1269,7 +1452,7 @@
 
     XML_SetUserData(parser, (void *) callno);
     p2 = XML_ExternalEntityParserCreate(parser, context, NULL);
-    if (XML_Parse(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
+    if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
         xml_failure(p2);
         return 0;
     }
@@ -1279,7 +1462,7 @@
 
 START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0'?>\n"
         "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n"
         "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n"
@@ -1293,7 +1476,7 @@
     /* We actually need to set this handler to tickle this bug. */
     XML_SetStartElementHandler(parser, dummy_start_element);
     XML_SetUserData(parser, NULL);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -1301,7 +1484,7 @@
 /* Regression test #1 for SF bug #673791. */
 START_TEST(test_ns_prefix_with_empty_uri_1)
 {
-    char *text =
+    const char *text =
         "<doc xmlns:prefix='http://xml.libexpat.org/'>\n"
         "  <e xmlns:prefix=''/>\n"
         "</doc>";
@@ -1316,7 +1499,7 @@
 /* Regression test #2 for SF bug #673791. */
 START_TEST(test_ns_prefix_with_empty_uri_2)
 {
-    char *text =
+    const char *text =
         "<?xml version='1.0'?>\n"
         "<docelem xmlns:pre=''/>";
 
@@ -1329,7 +1512,7 @@
 /* Regression test #3 for SF bug #673791. */
 START_TEST(test_ns_prefix_with_empty_uri_3)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc [\n"
         "  <!ELEMENT doc EMPTY>\n"
         "  <!ATTLIST doc\n"
@@ -1346,7 +1529,7 @@
 /* Regression test #4 for SF bug #673791. */
 START_TEST(test_ns_prefix_with_empty_uri_4)
 {
-    char *text =
+    const char *text =
         "<!DOCTYPE doc [\n"
         "  <!ELEMENT prefix:doc EMPTY>\n"
         "  <!ATTLIST prefix:doc\n"
@@ -1356,24 +1539,24 @@
     /* Packaged info expected by the end element handler;
        the weird structuring lets us re-use the triplet_end_checker()
        function also used for another test. */
-    char *elemstr[] = {
+    const char *elemstr[] = {
         "http://xml.libexpat.org/ doc prefix"
     };
     XML_SetReturnNSTriplet(parser, XML_TRUE);
     XML_SetUserData(parser, elemstr);
     XML_SetEndElementHandler(parser, triplet_end_checker);
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
 
 START_TEST(test_ns_default_with_empty_uri)
 {
-    char *text =
+    const char *text =
         "<doc xmlns='http://xml.libexpat.org/'>\n"
         "  <e xmlns=''/>\n"
         "</doc>";
-    if (XML_Parse(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
+    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
         xml_failure(parser);
 }
 END_TEST
@@ -1381,7 +1564,7 @@
 /* Regression test for SF bug #692964: two prefixes for one namespace. */
 START_TEST(test_ns_duplicate_attrs_diff_prefixes)
 {
-    char *text =
+    const char *text =
         "<doc xmlns:a='http://xml.libexpat.org/a'\n"
         "     xmlns:b='http://xml.libexpat.org/a'\n"
         "     a:a='v' b:a='v' />";
@@ -1394,7 +1577,7 @@
 /* Regression test for SF bug #695401: unbound prefix. */
 START_TEST(test_ns_unbound_prefix_on_attribute)
 {
-    char *text = "<doc a:attr=''/>";
+    const char *text = "<doc a:attr=''/>";
     expect_failure(text,
                    XML_ERROR_UNBOUND_PREFIX,
                    "did not report unbound prefix on attribute");
@@ -1404,7 +1587,7 @@
 /* Regression test for SF bug #695401: unbound prefix. */
 START_TEST(test_ns_unbound_prefix_on_element)
 {
-    char *text = "<a:doc/>";
+    const char *text = "<a:doc/>";
     expect_failure(text,
                    XML_ERROR_UNBOUND_PREFIX,
                    "did not report unbound prefix on element");
@@ -1426,6 +1609,7 @@
     tcase_add_test(tc_basic, test_bom_utf16_be);
     tcase_add_test(tc_basic, test_bom_utf16_le);
     tcase_add_test(tc_basic, test_illegal_utf8);
+    tcase_add_test(tc_basic, test_utf8_auto_align);
     tcase_add_test(tc_basic, test_utf16);
     tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
     tcase_add_test(tc_basic, test_latin1_umlauts);
@@ -1461,6 +1645,9 @@
     tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
     tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
     tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
+    tcase_add_test(tc_basic, test_good_cdata_ascii);
+    tcase_add_test(tc_basic, test_good_cdata_utf16);
+    tcase_add_test(tc_basic, test_bad_cdata);
 
     suite_add_tcase(s, tc_namespace);
     tcase_add_checked_fixture(tc_namespace,

diff --git a/tests/xmltest.sh b/tests/xmltest.sh
index 793a5cc..64a17ee 100755
--- a/tests/xmltest.sh
+++ b/tests/xmltest.sh

@@ -1,4 +1,4 @@
-#! /bin/sh
+#! /usr/bin/env bash
 
 #   EXPAT TEST SCRIPT FOR W3C XML TEST SUITE
 
@@ -20,12 +20,14 @@
 # produced by xmlwf conforms to an older definition of canonical XML
 # and does not generate notation declarations.
 
+shopt -s nullglob
+
 MYDIR="`dirname \"$0\"`"
 cd "$MYDIR"
 MYDIR="`pwd`"
 XMLWF="`dirname \"$MYDIR\"`/xmlwf/xmlwf"
 # XMLWF=/usr/local/bin/xmlwf
-TS="$MYDIR/XML-Test-Suite"
+TS="$MYDIR"
 # OUTPUT must terminate with the directory separator.
 OUTPUT="$TS/out/"
 # OUTPUT=/home/tmp/xml-testsuite-out/
@@ -100,7 +102,7 @@
       RunXmlwfWF "$xmlfile" "$xmldir/"
       UpdateStatus $?
   done
-  rm outfile
+  rm -f outfile
 done
 
 cd "$TS/xmlconf/oasis"
commit	470553711f98a9110beccab5c1648186f827404e	[log] [tgz]
author	Paul Duffin <paulduffin@google.com>	Wed Mar 01 13:43:33 2017 +0000
committer	android-build-merger <android-build-merger@google.com>	Wed Mar 01 13:43:33 2017 +0000
tree	f4cc4a019c9a6ddfbb5ef4873df40264cdb13d0b
parent	b05dbe320cc7cecb104c1b7ae38a57d5f3cecc89 [diff]
parent	d61777ec982f05a05e372d911704088e09f6c40f [diff]