gnulib/lib/striconveh.c - toolchain/make - Git at Google

 /* Character set conversion with error handling.
    Copyright (C) 2001-2020 Free Software Foundation, Inc.
    Written by Bruno Haible and Simon Josefsson.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 #include <config.h>

 /* Specification.  */
 #include "striconveh.h"

 #include <errno.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>

 #if HAVE_ICONV
 # include <iconv.h>
 # include "unistr.h"
 #endif

 #include "c-strcase.h"
 #include "c-strcaseeq.h"

 #ifndef SIZE_MAX
 # define SIZE_MAX ((size_t) -1)
 #endif


 #if HAVE_ICONV

 /* The caller must provide an iconveh_t, not just an iconv_t, because when a
    conversion error occurs, we may have to determine the Unicode representation
    of the inconvertible character.  */

 int
 iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)
 {
   iconv_t cd;
   iconv_t cd1;
   iconv_t cd2;

   /* Avoid glibc-2.1 bug with EUC-KR.  */
 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
      && !defined _LIBICONV_VERSION
   if (c_strcasecmp (from_codeset, "EUC-KR") == 0
       || c_strcasecmp (to_codeset, "EUC-KR") == 0)
     {
       errno = EINVAL;
       return -1;
     }
 # endif

   cd = iconv_open (to_codeset, from_codeset);

   if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
     cd1 = (iconv_t)(-1);
   else
     {
       cd1 = iconv_open ("UTF-8", from_codeset);
       if (cd1 == (iconv_t)(-1))
         {
           int saved_errno = errno;
           if (cd != (iconv_t)(-1))
             iconv_close (cd);
           errno = saved_errno;
           return -1;
         }
     }

   if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
       && !defined __UCLIBC__) \
      || _LIBICONV_VERSION >= 0x0105
       || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
 # endif
      )
     cd2 = (iconv_t)(-1);
   else
     {
       cd2 = iconv_open (to_codeset, "UTF-8");
       if (cd2 == (iconv_t)(-1))
         {
           int saved_errno = errno;
           if (cd1 != (iconv_t)(-1))
             iconv_close (cd1);
           if (cd != (iconv_t)(-1))
             iconv_close (cd);
           errno = saved_errno;
           return -1;
         }
     }

   cdp->cd = cd;
   cdp->cd1 = cd1;
   cdp->cd2 = cd2;
   return 0;
 }

 int
 iconveh_close (const iconveh_t *cd)
 {
   if (cd->cd2 != (iconv_t)(-1) && iconv_close (cd->cd2) < 0)
     {
       /* Return -1, but preserve the errno from iconv_close.  */
       int saved_errno = errno;
       if (cd->cd1 != (iconv_t)(-1))
         iconv_close (cd->cd1);
       if (cd->cd != (iconv_t)(-1))
         iconv_close (cd->cd);
       errno = saved_errno;
       return -1;
     }
   if (cd->cd1 != (iconv_t)(-1) && iconv_close (cd->cd1) < 0)
     {
       /* Return -1, but preserve the errno from iconv_close.  */
       int saved_errno = errno;
       if (cd->cd != (iconv_t)(-1))
         iconv_close (cd->cd);
       errno = saved_errno;
       return -1;
     }
   if (cd->cd != (iconv_t)(-1) && iconv_close (cd->cd) < 0)
     return -1;
   return 0;
 }

 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
    a conversion error, and it returns in *INCREMENTED a boolean telling whether
    it has incremented the input pointers past the error location.  */
 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
 /* Irix iconv() inserts a NUL byte if it cannot convert.
    NetBSD iconv() inserts a question mark if it cannot convert.
    Only GNU libiconv and GNU libc are known to prefer to fail rather
    than doing a lossy conversion.  */
 static size_t
 iconv_carefully (iconv_t cd,
                  const char **inbuf, size_t *inbytesleft,
                  char **outbuf, size_t *outbytesleft,
                  bool *incremented)
 {
   const char *inptr = *inbuf;
   const char *inptr_end = inptr + *inbytesleft;
   char *outptr = *outbuf;
   size_t outsize = *outbytesleft;
   const char *inptr_before;
   size_t res;

   do
     {
       size_t insize;

       inptr_before = inptr;
       res = (size_t)(-1);

       for (insize = 1; inptr + insize <= inptr_end; insize++)
         {
           res = iconv (cd,
                        (ICONV_CONST char **) &inptr, &insize,
                        &outptr, &outsize);
           if (!(res == (size_t)(-1) && errno == EINVAL))
             break;
           /* iconv can eat up a shift sequence but give EINVAL while attempting
              to convert the first character.  E.g. libiconv does this.  */
           if (inptr > inptr_before)
             {
               res = 0;
               break;
             }
         }

       if (res == 0)
         {
           *outbuf = outptr;
           *outbytesleft = outsize;
         }
     }
   while (res == 0 && inptr < inptr_end);

   *inbuf = inptr;
   *inbytesleft = inptr_end - inptr;
   if (res != (size_t)(-1) && res > 0)
     {
       /* iconv() has already incremented INPTR.  We cannot go back to a
          previous INPTR, otherwise the state inside CD would become invalid,
          if FROM_CODESET is a stateful encoding.  So, tell the caller that
          *INBUF has already been incremented.  */
       *incremented = (inptr > inptr_before);
       errno = EILSEQ;
       return (size_t)(-1);
     }
   else
     {
       *incremented = false;
       return res;
     }
 }
 # else
 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
      (*(incremented) = false, \
       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 # endif

 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
    converting one character or one shift sequence.  */
 static size_t
 iconv_carefully_1 (iconv_t cd,
                    const char **inbuf, size_t *inbytesleft,
                    char **outbuf, size_t *outbytesleft,
                    bool *incremented)
 {
   const char *inptr_before = *inbuf;
   const char *inptr = inptr_before;
   const char *inptr_end = inptr_before + *inbytesleft;
   char *outptr = *outbuf;
   size_t outsize = *outbytesleft;
   size_t res = (size_t)(-1);
   size_t insize;

   for (insize = 1; inptr_before + insize <= inptr_end; insize++)
     {
       inptr = inptr_before;
       res = iconv (cd,
                    (ICONV_CONST char **) &inptr, &insize,
                    &outptr, &outsize);
       if (!(res == (size_t)(-1) && errno == EINVAL))
         break;
       /* iconv can eat up a shift sequence but give EINVAL while attempting
          to convert the first character.  E.g. libiconv does this.  */
       if (inptr > inptr_before)
         {
           res = 0;
           break;
         }
     }

   *inbuf = inptr;
   *inbytesleft = inptr_end - inptr;
 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
   /* Irix iconv() inserts a NUL byte if it cannot convert.
      NetBSD iconv() inserts a question mark if it cannot convert.
      Only GNU libiconv and GNU libc are known to prefer to fail rather
      than doing a lossy conversion.  */
   if (res != (size_t)(-1) && res > 0)
     {
       /* iconv() has already incremented INPTR.  We cannot go back to a
          previous INPTR, otherwise the state inside CD would become invalid,
          if FROM_CODESET is a stateful encoding.  So, tell the caller that
          *INBUF has already been incremented.  */
       *incremented = (inptr > inptr_before);
       errno = EILSEQ;
       return (size_t)(-1);
     }
 # endif

   if (res != (size_t)(-1))
     {
       *outbuf = outptr;
       *outbytesleft = outsize;
     }
   *incremented = false;
   return res;
 }

 /* utf8conv_carefully is like iconv, except that
      - it converts from UTF-8 to UTF-8,
      - it stops as soon as it encounters a conversion error, and it returns
        in *INCREMENTED a boolean telling whether it has incremented the input
        pointers past the error location,
      - if one_character_only is true, it stops after converting one
        character.  */
 static size_t
 utf8conv_carefully (bool one_character_only,
                     const char **inbuf, size_t *inbytesleft,
                     char **outbuf, size_t *outbytesleft,
                     bool *incremented)
 {
   const char *inptr = *inbuf;
   size_t insize = *inbytesleft;
   char *outptr = *outbuf;
   size_t outsize = *outbytesleft;
   size_t res;

   res = 0;
   do
     {
       ucs4_t uc;
       int n;
       int m;

       n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize);
       if (n < 0)
         {
           errno = (n == -2 ? EINVAL : EILSEQ);
           n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize);
           inptr += n;
           insize -= n;
           res = (size_t)(-1);
           *incremented = true;
           break;
         }
       if (outsize == 0)
         {
           errno = E2BIG;
           res = (size_t)(-1);
           *incremented = false;
           break;
         }
       m = u8_uctomb ((uint8_t *) outptr, uc, outsize);
       if (m == -2)
         {
           errno = E2BIG;
           res = (size_t)(-1);
           *incremented = false;
           break;
         }
       inptr += n;
       insize -= n;
       if (m == -1)
         {
           errno = EILSEQ;
           res = (size_t)(-1);
           *incremented = true;
           break;
         }
       outptr += m;
       outsize -= m;
     }
   while (!one_character_only && insize > 0);

   *inbuf = inptr;
   *inbytesleft = insize;
   *outbuf = outptr;
   *outbytesleft = outsize;
   return res;
 }

 static int
 mem_cd_iconveh_internal (const char *src, size_t srclen,
                          iconv_t cd, iconv_t cd1, iconv_t cd2,
                          enum iconv_ilseq_handler handler,
                          size_t extra_alloc,
                          size_t *offsets,
                          char **resultp, size_t *lengthp)
 {
   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
      Instead, we have to start afresh from the beginning of SRC.  */
   /* Use a temporary buffer, so that for small strings, a single malloc()
      call will be sufficient.  */
 # define tmpbufsize 4096
   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
      libiconv's UCS-4-INTERNAL encoding.  */
   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 # define tmpbuf tmp.buf

   char *initial_result;
   char *result;
   size_t allocated;
   size_t length;
   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */

   if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
     {
       initial_result = *resultp;
       allocated = *lengthp;
     }
   else
     {
       initial_result = tmpbuf;
       allocated = sizeof (tmpbuf);
     }
   result = initial_result;

   /* Test whether a direct conversion is possible at all.  */
   if (cd == (iconv_t)(-1))
     goto indirectly;

   if (offsets != NULL)
     {
       size_t i;

       for (i = 0; i < srclen; i++)
         offsets[i] = (size_t)(-1);

       last_length = (size_t)(-1);
     }
   length = 0;

   /* First, try a direct conversion, and see whether a conversion error
      occurs at all.  */
   {
     const char *inptr = src;
     size_t insize = srclen;

     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 # if defined _LIBICONV_VERSION \
      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
           || defined __sun)
     /* Set to the initial state.  */
     iconv (cd, NULL, NULL, NULL, NULL);
 # endif

     while (insize > 0)
       {
         char *outptr = result + length;
         size_t outsize = allocated - extra_alloc - length;
         bool incremented;
         size_t res;
         bool grow;

         if (offsets != NULL)
           {
             if (length != last_length) /* ensure that offset[] be increasing */
               {
                 offsets[inptr - src] = length;
                 last_length = length;
               }
             res = iconv_carefully_1 (cd,
                                      &inptr, &insize,
                                      &outptr, &outsize,
                                      &incremented);
           }
         else
           /* Use iconv_carefully instead of iconv here, because:
              - If TO_CODESET is UTF-8, we can do the error handling in this
                loop, no need for a second loop,
              - With iconv() implementations other than GNU libiconv and GNU
                libc, if we use iconv() in a big swoop, checking for an E2BIG
                return, we lose the number of irreversible conversions.  */
           res = iconv_carefully (cd,
                                  &inptr, &insize,
                                  &outptr, &outsize,
                                  &incremented);

         length = outptr - result;
         grow = (length + extra_alloc > allocated / 2);
         if (res == (size_t)(-1))
           {
             if (errno == E2BIG)
               grow = true;
             else if (errno == EINVAL)
               break;
             else if (errno == EILSEQ && handler != iconveh_error)
               {
                 if (cd2 == (iconv_t)(-1))
                   {
                     /* TO_CODESET is UTF-8.  */
                     /* Error handling can produce up to 1 byte of output.  */
                     if (length + 1 + extra_alloc > allocated)
                       {
                         char *memory;

                         allocated = 2 * allocated;
                         if (length + 1 + extra_alloc > allocated)
                           abort ();
                         if (result == initial_result)
                           memory = (char *) malloc (allocated);
                         else
                           memory = (char *) realloc (result, allocated);
                         if (memory == NULL)
                           {
                             if (result != initial_result)
                               free (result);
                             errno = ENOMEM;
                             return -1;
                           }
                         if (result == initial_result)
                           memcpy (memory, initial_result, length);
                         result = memory;
                         grow = false;
                       }
                     /* The input is invalid in FROM_CODESET.  Eat up one byte
                        and emit a question mark.  */
                     if (!incremented)
                       {
                         if (insize == 0)
                           abort ();
                         inptr++;
                         insize--;
                       }
                     result[length] = '?';
                     length++;
                   }
                 else
                   goto indirectly;
               }
             else
               {
                 if (result != initial_result)
                   {
                     int saved_errno = errno;
                     free (result);
                     errno = saved_errno;
                   }
                 return -1;
               }
           }
         if (insize == 0)
           break;
         if (grow)
           {
             char *memory;

             allocated = 2 * allocated;
             if (result == initial_result)
               memory = (char *) malloc (allocated);
             else
               memory = (char *) realloc (result, allocated);
             if (memory == NULL)
               {
                 if (result != initial_result)
                   free (result);
                 errno = ENOMEM;
                 return -1;
               }
             if (result == initial_result)
               memcpy (memory, initial_result, length);
             result = memory;
           }
       }
   }

   /* Now get the conversion state back to the initial state.
      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 #if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
   for (;;)
     {
       char *outptr = result + length;
       size_t outsize = allocated - extra_alloc - length;
       size_t res;

       res = iconv (cd, NULL, NULL, &outptr, &outsize);
       length = outptr - result;
       if (res == (size_t)(-1))
         {
           if (errno == E2BIG)
             {
               char *memory;

               allocated = 2 * allocated;
               if (result == initial_result)
                 memory = (char *) malloc (allocated);
               else
                 memory = (char *) realloc (result, allocated);
               if (memory == NULL)
                 {
                   if (result != initial_result)
                     free (result);
                   errno = ENOMEM;
                   return -1;
                 }
               if (result == initial_result)
                 memcpy (memory, initial_result, length);
               result = memory;
             }
           else
             {
               if (result != initial_result)
                 {
                   int saved_errno = errno;
                   free (result);
                   errno = saved_errno;
                 }
               return -1;
             }
         }
       else
         break;
     }
 #endif

   /* The direct conversion succeeded.  */
   goto done;

  indirectly:
   /* The direct conversion failed.
      Use a conversion through UTF-8.  */
   if (offsets != NULL)
     {
       size_t i;

       for (i = 0; i < srclen; i++)
         offsets[i] = (size_t)(-1);

       last_length = (size_t)(-1);
     }
   length = 0;
   {
     const bool slowly = (offsets != NULL || handler == iconveh_error);
 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
     char utf8buf[utf8bufsize + 1];
     size_t utf8len = 0;
     const char *in1ptr = src;
     size_t in1size = srclen;
     bool do_final_flush1 = true;
     bool do_final_flush2 = true;

     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 # if defined _LIBICONV_VERSION \
      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
           || defined __sun)
     /* Set to the initial state.  */
     if (cd1 != (iconv_t)(-1))
       iconv (cd1, NULL, NULL, NULL, NULL);
     if (cd2 != (iconv_t)(-1))
       iconv (cd2, NULL, NULL, NULL, NULL);
 # endif

     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
       {
         char *out1ptr = utf8buf + utf8len;
         size_t out1size = utf8bufsize - utf8len;
         bool incremented1;
         size_t res1;
         int errno1;

         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
         if (in1size > 0)
           {
             if (offsets != NULL
                 && length != last_length) /* ensure that offset[] be increasing */
               {
                 offsets[in1ptr - src] = length;
                 last_length = length;
               }
             if (cd1 != (iconv_t)(-1))
               {
                 if (slowly)
                   res1 = iconv_carefully_1 (cd1,
                                             &in1ptr, &in1size,
                                             &out1ptr, &out1size,
                                             &incremented1);
                 else
                   res1 = iconv_carefully (cd1,
                                           &in1ptr, &in1size,
                                           &out1ptr, &out1size,
                                           &incremented1);
               }
             else
               {
                 /* FROM_CODESET is UTF-8.  */
                 res1 = utf8conv_carefully (slowly,
                                            &in1ptr, &in1size,
                                            &out1ptr, &out1size,
                                            &incremented1);
               }
           }
         else if (do_final_flush1)
           {
             /* Now get the conversion state of CD1 back to the initial state.
                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 # if defined _LIBICONV_VERSION \
      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
           || defined __sun)
             if (cd1 != (iconv_t)(-1))
               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
             else
 # endif
               res1 = 0;
             do_final_flush1 = false;
             incremented1 = true;
           }
         else
           {
             res1 = 0;
             incremented1 = true;
           }
         if (res1 == (size_t)(-1)
             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
           {
             if (result != initial_result)
               {
                 int saved_errno = errno;
                 free (result);
                 errno = saved_errno;
               }
             return -1;
           }
         if (res1 == (size_t)(-1)
             && errno == EILSEQ && handler != iconveh_error)
           {
             /* The input is invalid in FROM_CODESET.  Eat up one byte and
                emit a question mark.  Room for the question mark was allocated
                at the end of utf8buf.  */
             if (!incremented1)
               {
                 if (in1size == 0)
                   abort ();
                 in1ptr++;
                 in1size--;
               }
             *out1ptr++ = '?';
             res1 = 0;
           }
         errno1 = errno;
         utf8len = out1ptr - utf8buf;

         if (offsets != NULL
             || in1size == 0
             || utf8len > utf8bufsize / 2
             || (res1 == (size_t)(-1) && errno1 == E2BIG))
           {
             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
             const char *in2ptr = utf8buf;
             size_t in2size = utf8len;

             while (in2size > 0
                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
               {
                 char *out2ptr = result + length;
                 size_t out2size = allocated - extra_alloc - length;
                 bool incremented2;
                 size_t res2;
                 bool grow;

                 if (in2size > 0)
                   {
                     if (cd2 != (iconv_t)(-1))
                       res2 = iconv_carefully (cd2,
                                               &in2ptr, &in2size,
                                               &out2ptr, &out2size,
                                               &incremented2);
                     else
                       /* TO_CODESET is UTF-8.  */
                       res2 = utf8conv_carefully (false,
                                                  &in2ptr, &in2size,
                                                  &out2ptr, &out2size,
                                                  &incremented2);
                   }
                 else /* in1size == 0 && !do_final_flush1
                         && in2size == 0 && do_final_flush2 */
                   {
                     /* Now get the conversion state of CD1 back to the initial
                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 # if defined _LIBICONV_VERSION \
      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
           || defined __sun)
                     if (cd2 != (iconv_t)(-1))
                       res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
                     else
 # endif
                       res2 = 0;
                     do_final_flush2 = false;
                     incremented2 = true;
                   }

                 length = out2ptr - result;
                 grow = (length + extra_alloc > allocated / 2);
                 if (res2 == (size_t)(-1))
                   {
                     if (errno == E2BIG)
                       grow = true;
                     else if (errno == EINVAL)
                       break;
                     else if (errno == EILSEQ && handler != iconveh_error)
                       {
                         /* Error handling can produce up to 10 bytes of ASCII
                            output.  But TO_CODESET may be UCS-2, UTF-16 or
                            UCS-4, so use CD2 here as well.  */
                         char scratchbuf[10];
                         size_t scratchlen;
                         ucs4_t uc;
                         const char *inptr;
                         size_t insize;
                         size_t res;

                         if (incremented2)
                           {
                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
                                          (const uint8_t *) utf8buf)
                                 == NULL)
                               abort ();
                           }
                         else
                           {
                             int n;
                             if (in2size == 0)
                               abort ();
                             n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
                                                   in2size);
                             in2ptr += n;
                             in2size -= n;
                           }

                         if (handler == iconveh_escape_sequence)
                           {
                             static char hex[16] = "0123456789ABCDEF";
                             scratchlen = 0;
                             scratchbuf[scratchlen++] = '\\';
                             if (uc < 0x10000)
                               scratchbuf[scratchlen++] = 'u';
                             else
                               {
                                 scratchbuf[scratchlen++] = 'U';
                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
                               }
                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
                             scratchbuf[scratchlen++] = hex[uc & 15];
                           }
                         else
                           {
                             scratchbuf[0] = '?';
                             scratchlen = 1;
                           }

                         inptr = scratchbuf;
                         insize = scratchlen;
                         if (cd2 != (iconv_t)(-1))
                           res = iconv (cd2,
                                        (ICONV_CONST char **) &inptr, &insize,
                                        &out2ptr, &out2size);
                         else
                           {
                             /* TO_CODESET is UTF-8.  */
                             if (out2size >= insize)
                               {
                                 memcpy (out2ptr, inptr, insize);
                                 out2ptr += insize;
                                 out2size -= insize;
                                 inptr += insize;
                                 insize = 0;
                                 res = 0;
                               }
                             else
                               {
                                 errno = E2BIG;
                                 res = (size_t)(-1);
                               }
                           }
                         length = out2ptr - result;
                         if (res == (size_t)(-1) && errno == E2BIG)
                           {
                             char *memory;

                             allocated = 2 * allocated;
                             if (length + 1 + extra_alloc > allocated)
                               abort ();
                             if (result == initial_result)
                               memory = (char *) malloc (allocated);
                             else
                               memory = (char *) realloc (result, allocated);
                             if (memory == NULL)
                               {
                                 if (result != initial_result)
                                   free (result);
                                 errno = ENOMEM;
                                 return -1;
                               }
                             if (result == initial_result)
                               memcpy (memory, initial_result, length);
                             result = memory;
                             grow = false;

                             out2ptr = result + length;
                             out2size = allocated - extra_alloc - length;
                             if (cd2 != (iconv_t)(-1))
                               res = iconv (cd2,
                                            (ICONV_CONST char **) &inptr,
                                            &insize,
                                            &out2ptr, &out2size);
                             else
                               {
                                 /* TO_CODESET is UTF-8.  */
                                 if (!(out2size >= insize))
                                   abort ();
                                 memcpy (out2ptr, inptr, insize);
                                 out2ptr += insize;
                                 out2size -= insize;
                                 inptr += insize;
                                 insize = 0;
                                 res = 0;
                               }
                             length = out2ptr - result;
                           }
 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
                         /* Irix iconv() inserts a NUL byte if it cannot convert.
                            NetBSD iconv() inserts a question mark if it cannot
                            convert.
                            Only GNU libiconv and GNU libc are known to prefer
                            to fail rather than doing a lossy conversion.  */
                         if (res != (size_t)(-1) && res > 0)
                           {
                             errno = EILSEQ;
                             res = (size_t)(-1);
                           }
 # endif
                         if (res == (size_t)(-1))
                           {
                             /* Failure converting the ASCII replacement.  */
                             if (result != initial_result)
                               {
                                 int saved_errno = errno;
                                 free (result);
                                 errno = saved_errno;
                               }
                             return -1;
                           }
                       }
                     else
                       {
                         if (result != initial_result)
                           {
                             int saved_errno = errno;
                             free (result);
                             errno = saved_errno;
                           }
                         return -1;
                       }
                   }
                 if (!(in2size > 0
                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
                   break;
                 if (grow)
                   {
                     char *memory;

                     allocated = 2 * allocated;
                     if (result == initial_result)
                       memory = (char *) malloc (allocated);
                     else
                       memory = (char *) realloc (result, allocated);
                     if (memory == NULL)
                       {
                         if (result != initial_result)
                           free (result);
                         errno = ENOMEM;
                         return -1;
                       }
                     if (result == initial_result)
                       memcpy (memory, initial_result, length);
                     result = memory;
                   }
               }

             /* Move the remaining bytes to the beginning of utf8buf.  */
             if (in2size > 0)
               memmove (utf8buf, in2ptr, in2size);
             utf8len = in2size;
           }

         if (res1 == (size_t)(-1))
           {
             if (errno1 == EINVAL)
               in1size = 0;
             else if (errno1 == EILSEQ)
               {
                 if (result != initial_result)
                   free (result);
                 errno = errno1;
                 return -1;
               }
           }
       }
 # undef utf8bufsize
   }

  done:
   /* Now the final memory allocation.  */
   if (result == tmpbuf)
     {
       size_t memsize = length + extra_alloc;

       if (*resultp != NULL && *lengthp >= memsize)
         result = *resultp;
       else
         {
           char *memory;

           memory = (char *) malloc (memsize > 0 ? memsize : 1);
           if (memory != NULL)
             result = memory;
           else
             {
               errno = ENOMEM;
               return -1;
             }
         }
       memcpy (result, tmpbuf, length);
     }
   else if (result != *resultp && length + extra_alloc < allocated)
     {
       /* Shrink the allocated memory if possible.  */
       size_t memsize = length + extra_alloc;
       char *memory;

       memory = (char *) realloc (result, memsize > 0 ? memsize : 1);
       if (memory != NULL)
         result = memory;
     }
   *resultp = result;
   *lengthp = length;
   return 0;
 # undef tmpbuf
 # undef tmpbufsize
 }

 int
 mem_cd_iconveh (const char *src, size_t srclen,
                 const iconveh_t *cd,
                 enum iconv_ilseq_handler handler,
                 size_t *offsets,
                 char **resultp, size_t *lengthp)
 {
   return mem_cd_iconveh_internal (src, srclen, cd->cd, cd->cd1, cd->cd2,
                                   handler, 0, offsets, resultp, lengthp);
 }

 char *
 str_cd_iconveh (const char *src,
                 const iconveh_t *cd,
                 enum iconv_ilseq_handler handler)
 {
   /* For most encodings, a trailing NUL byte in the input will be converted
      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
      function is usable for UTF-7, we have to exclude the NUL byte from the
      conversion and add it by hand afterwards.  */
   char *result = NULL;
   size_t length = 0;
   int retval = mem_cd_iconveh_internal (src, strlen (src),
                                         cd->cd, cd->cd1, cd->cd2, handler, 1,
                                         NULL, &result, &length);

   if (retval < 0)
     {
       if (result != NULL)
         {
           int saved_errno = errno;
           free (result);
           errno = saved_errno;
         }
       return NULL;
     }

   /* Add the terminating NUL byte.  */
   result[length] = '\0';

   return result;
 }

 #endif

 int
 mem_iconveh (const char *src, size_t srclen,
              const char *from_codeset, const char *to_codeset,
              enum iconv_ilseq_handler handler,
              size_t *offsets,
              char **resultp, size_t *lengthp)
 {
   if (srclen == 0)
     {
       /* Nothing to convert.  */
       *lengthp = 0;
       return 0;
     }
   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
     {
       char *result;

       if (*resultp != NULL && *lengthp >= srclen)
         result = *resultp;
       else
         {
           result = (char *) malloc (srclen);
           if (result == NULL)
             {
               errno = ENOMEM;
               return -1;
             }
         }
       memcpy (result, src, srclen);
       *resultp = result;
       *lengthp = srclen;
       return 0;
     }
   else
     {
 #if HAVE_ICONV
       iconveh_t cd;
       char *result;
       size_t length;
       int retval;

       if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
         return -1;

       result = *resultp;
       length = *lengthp;
       retval = mem_cd_iconveh (src, srclen, &cd, handler, offsets,
                                &result, &length);

       if (retval < 0)
         {
           /* Close cd, but preserve the errno from str_cd_iconv.  */
           int saved_errno = errno;
           iconveh_close (&cd);
           errno = saved_errno;
         }
       else
         {
           if (iconveh_close (&cd) < 0)
             {
               /* Return -1, but free the allocated memory, and while doing
                  that, preserve the errno from iconveh_close.  */
               int saved_errno = errno;
               if (result != *resultp && result != NULL)
                 free (result);
               errno = saved_errno;
               return -1;
             }
           *resultp = result;
           *lengthp = length;
         }
       return retval;
 #else
       /* This is a different error code than if iconv_open existed but didn't
          support from_codeset and to_codeset, so that the caller can emit
          an error message such as
            "iconv() is not supported. Installing GNU libiconv and
             then reinstalling this package would fix this."  */
       errno = ENOSYS;
       return -1;
 #endif
     }
 }

 char *
 str_iconveh (const char *src,
              const char *from_codeset, const char *to_codeset,
              enum iconv_ilseq_handler handler)
 {
   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
     {
       char *result = strdup (src);

       if (result == NULL)
         errno = ENOMEM;
       return result;
     }
   else
     {
 #if HAVE_ICONV
       iconveh_t cd;
       char *result;

       if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
         return NULL;

       result = str_cd_iconveh (src, &cd, handler);

       if (result == NULL)
         {
           /* Close cd, but preserve the errno from str_cd_iconv.  */
           int saved_errno = errno;
           iconveh_close (&cd);
           errno = saved_errno;
         }
       else
         {
           if (iconveh_close (&cd) < 0)
             {
               /* Return NULL, but free the allocated memory, and while doing
                  that, preserve the errno from iconveh_close.  */
               int saved_errno = errno;
               free (result);
               errno = saved_errno;
               return NULL;
             }
         }
       return result;
 #else
       /* This is a different error code than if iconv_open existed but didn't
          support from_codeset and to_codeset, so that the caller can emit
          an error message such as
            "iconv() is not supported. Installing GNU libiconv and
             then reinstalling this package would fix this."  */
       errno = ENOSYS;
       return NULL;
 #endif
     }
 }