src/share/utf8/charset.c - platform/external/flac - Git at Google

 /*
  * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */

 /*
  * See the corresponding header file for a description of the functions
  * that this file provides.
  *
  * This was first written for Ogg Vorbis but could be of general use.
  *
  * The only deliberate assumption about data sizes is that a short has
  * at least 16 bits, but this code has only been tested on systems with
  * 8-bit char, 16-bit short and 32-bit int.
  */

 #ifdef HAVE_CONFIG_H
 #  include <config.h>
 #endif

 #if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */

 #include <stdlib.h>

 #include "share/alloc.h"
 #include "charset.h"

 #include "charmaps.h"

 /*
  * This is like the standard strcasecmp, but it does not depend
  * on the locale. Locale-dependent functions can be dangerous:
  * we once had a bug involving strcasecmp("iso", "ISO") in a
  * Turkish locale!
  *
  * (I'm not really sure what the official standard says
  * about the sign of strcasecmp("Z", "["), but usually
  * we're only interested in whether it's zero.)
  */

 static int ascii_strcasecmp(const char *s1, const char *s2)
 {
   char c1, c2;

   for (;; s1++, s2++) {
     if (!*s1 || !*s2)
       break;
     if (*s1 == *s2)
       continue;
     c1 = *s1;
     if ('a' <= c1 && c1 <= 'z')
       c1 += 'A' - 'a';
     c2 = *s2;
     if ('a' <= c2 && c2 <= 'z')
       c2 += 'A' - 'a';
     if (c1 != c2)
       break;
   }
   return (uint8_t)*s1 - (uint8_t)*s2;
 }

 /*
  * UTF-8 equivalents of the C library's wctomb() and mbtowc().
  */

 int utf8_mbtowc(int *pwc, const char *s, size_t n)
 {
   uint8_t c;
   int wc, i, k;

   if (!n || !s)
     return 0;

   c = *s;
   if (c < 0x80) {
     if (pwc)
       *pwc = c;
     return c ? 1 : 0;
   }
   else if (c < 0xc2)
     return -1;
   else if (c < 0xe0) {
     if (n >= 2 && (s[1] & 0xc0) == 0x80) {
       if (pwc)
 	*pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f);
       return 2;
     }
     else
       return -1;
   }
   else if (c < 0xf0)
     k = 3;
   else if (c < 0xf8)
     k = 4;
   else if (c < 0xfc)
     k = 5;
   else if (c < 0xfe)
     k = 6;
   else
     return -1;

   if (n < (size_t)k)
     return -1;
   wc = *s++ & ((1 << (7 - k)) - 1);
   for (i = 1; i < k; i++) {
     if ((*s & 0xc0) != 0x80)
       return -1;
     wc = (wc << 6) | (*s++ & 0x3f);
   }
   if (wc < (1 << (5 * k - 4)))
     return -1;
   if (pwc)
     *pwc = wc;
   return k;
 }

 int utf8_wctomb(char *s, int wc1)
 {
   uint32_t wc = wc1;

   if (!s)
     return 0;
   if (wc < (1u << 7)) {
     *s++ = wc;
     return 1;
   }
   else if (wc < (1u << 11)) {
     *s++ = 0xc0 | (wc >> 6);
     *s++ = 0x80 | (wc & 0x3f);
     return 2;
   }
   else if (wc < (1u << 16)) {
     *s++ = 0xe0 | (wc >> 12);
     *s++ = 0x80 | ((wc >> 6) & 0x3f);
     *s++ = 0x80 | (wc & 0x3f);
     return 3;
   }
   else if (wc < (1u << 21)) {
     *s++ = 0xf0 | (wc >> 18);
     *s++ = 0x80 | ((wc >> 12) & 0x3f);
     *s++ = 0x80 | ((wc >> 6) & 0x3f);
     *s++ = 0x80 | (wc & 0x3f);
     return 4;
   }
   else if (wc < (1u << 26)) {
     *s++ = 0xf8 | (wc >> 24);
     *s++ = 0x80 | ((wc >> 18) & 0x3f);
     *s++ = 0x80 | ((wc >> 12) & 0x3f);
     *s++ = 0x80 | ((wc >> 6) & 0x3f);
     *s++ = 0x80 | (wc & 0x3f);
     return 5;
   }
   else if (wc < (1u << 31)) {
     *s++ = 0xfc | (wc >> 30);
     *s++ = 0x80 | ((wc >> 24) & 0x3f);
     *s++ = 0x80 | ((wc >> 18) & 0x3f);
     *s++ = 0x80 | ((wc >> 12) & 0x3f);
     *s++ = 0x80 | ((wc >> 6) & 0x3f);
     *s++ = 0x80 | (wc & 0x3f);
     return 6;
   }
   else
     return -1;
 }

 /*
  * The charset "object" and methods.
  */

 struct charset {
   int max;
   int (*mbtowc)(void *table, int *pwc, const char *s, size_t n);
   int (*wctomb)(void *table, char *s, int wc);
   void *map;
 };

 int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n)
 {
   return (*charset->mbtowc)(charset->map, pwc, s, n);
 }

 int charset_wctomb(struct charset *charset, char *s, int wc)
 {
   return (*charset->wctomb)(charset->map, s, wc);
 }

 int charset_max(struct charset *charset)
 {
   return charset->max;
 }

 /*
  * Implementation of UTF-8.
  */

 static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n)
 {
   (void)map;
   return utf8_mbtowc(pwc, s, n);
 }

 static int wctomb_utf8(void *map, char *s, int wc)
 {
   (void)map;
   return utf8_wctomb(s, wc);
 }

 /*
  * Implementation of US-ASCII.
  * Probably on most architectures this compiles to less than 256 bytes
  * of code, so we can save space by not having a table for this one.
  */

 static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n)
 {
   int wc;

   (void)map;
   if (!n || !s)
     return 0;
   wc = (uint8_t)*s;
   if (wc & ~0x7f)
     return -1;
   if (pwc)
     *pwc = wc;
   return wc ? 1 : 0;
 }

 static int wctomb_ascii(void *map, char *s, int wc)
 {
   (void)map;
   if (!s)
     return 0;
   if (wc & ~0x7f)
     return -1;
   *s = wc;
   return 1;
 }

 /*
  * Implementation of ISO-8859-1.
  * Probably on most architectures this compiles to less than 256 bytes
  * of code, so we can save space by not having a table for this one.
  */

 static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n)
 {
   int wc;

   (void)map;
   if (!n || !s)
     return 0;
   wc = (uint8_t)*s;
   if (wc & ~0xff)
     return -1;
   if (pwc)
     *pwc = wc;
   return wc ? 1 : 0;
 }

 static int wctomb_iso1(void *map, char *s, int wc)
 {
   (void)map;
   if (!s)
     return 0;
   if (wc & ~0xff)
     return -1;
   *s = wc;
   return 1;
 }

 /*
  * Implementation of any 8-bit charset.
  */

 struct map {
   const uint16_t *from;
   struct inverse_map *to;
 };

 static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n)
 {
   struct map *map = map1;
   uint16_t wc;

   if (!n || !s)
     return 0;
   wc = map->from[(uint8_t)*s];
   if (wc == 0xffff)
     return -1;
   if (pwc)
     *pwc = (int)wc;
   return wc ? 1 : 0;
 }

 /*
  * For the inverse map we use a hash table, which has the advantages
  * of small constant memory requirement and simple memory allocation,
  * but the disadvantage of slow conversion in the worst case.
  * If you need real-time performance while letting a potentially
  * malicious user define their own map, then the method used in
  * linux/drivers/char/consolemap.c would be more appropriate.
  */

 struct inverse_map {
   uint8_t first[256];
   uint8_t next[256];
 };

 /*
  * The simple hash is good enough for this application.
  * Use the alternative trivial hashes for testing.
  */
 #define HASH(i) ((i) & 0xff)
 /* #define HASH(i) 0 */
 /* #define HASH(i) 99 */

 static struct inverse_map *make_inverse_map(const uint16_t *from)
 {
   struct inverse_map *to;
   char used[256];
   int i, j, k;

   to = malloc(sizeof(struct inverse_map));
   if (!to)
     return 0;
   for (i = 0; i < 256; i++)
     to->first[i] = to->next[i] = used[i] = 0;
   for (i = 255; i >= 0; i--)
     if (from[i] != 0xffff) {
       k = HASH(from[i]);
       to->next[i] = to->first[k];
       to->first[k] = i;
       used[k] = 1;
     }

   /* Point the empty buckets at an empty list. */
   for (i = 0; i < 256; i++)
     if (!to->next[i])
       break;
   if (i < 256)
     for (j = 0; j < 256; j++)
       if (!used[j])
 	to->first[j] = i;

   return to;
 }

 static int wctomb_8bit(void *map1, char *s, int wc1)
 {
   struct map *map = map1;
   uint16_t wc = wc1;
   int i;

   if (!s)
     return 0;

   if (wc1 & ~0xffff)
     return -1;

   if (1) /* Change 1 to 0 to test the case where malloc fails. */
     if (!map->to)
       map->to = make_inverse_map(map->from);

   if (map->to) {
     /* Use the inverse map. */
     i = map->to->first[HASH(wc)];
     for (;;) {
       if (map->from[i] == wc) {
 	*s = i;
 	return 1;
       }
       if (!(i = map->to->next[i]))
 	break;
     }
   }
   else {
     /* We don't have an inverse map, so do a linear search. */
     for (i = 0; i < 256; i++)
       if (map->from[i] == wc) {
 	*s = i;
 	return 1;
       }
   }

   return -1;
 }

 /*
  * The "constructor" charset_find().
  */

 struct charset charset_utf8 = {
   6,
   &mbtowc_utf8,
   &wctomb_utf8,
   0
 };

 struct charset charset_iso1 = {
   1,
   &mbtowc_iso1,
   &wctomb_iso1,
   0
 };

 struct charset charset_ascii = {
   1,
   &mbtowc_ascii,
   &wctomb_ascii,
   0
 };

 struct charset *charset_find(const char *code)
 {
   int i;

   /* Find good (MIME) name. */
   for (i = 0; names[i].bad; i++)
     if (!ascii_strcasecmp(code, names[i].bad)) {
       code = names[i].good;
       break;
     }

   /* Recognise some charsets for which we avoid using a table. */
   if (!ascii_strcasecmp(code, "UTF-8"))
     return &charset_utf8;
   if (!ascii_strcasecmp(code, "US-ASCII"))
     return &charset_ascii;
   if (!ascii_strcasecmp(code, "ISO-8859-1"))
     return &charset_iso1;

   /* Look for a mapping for a simple 8-bit encoding. */
   for (i = 0; maps[i].name; i++)
     if (!ascii_strcasecmp(code, maps[i].name)) {
       if (!maps[i].charset) {
 	maps[i].charset = malloc(sizeof(struct charset));
 	if (maps[i].charset) {
 	  struct map *map = malloc(sizeof(struct map));
 	  if (!map) {
 	    free(maps[i].charset);
 	    maps[i].charset = 0;
 	  }
 	  else {
 	    maps[i].charset->max = 1;
 	    maps[i].charset->mbtowc = &mbtowc_8bit;
 	    maps[i].charset->wctomb = &wctomb_8bit;
 	    maps[i].charset->map = map;
 	    map->from = maps[i].map;
 	    map->to = 0; /* inverse mapping is created when required */
 	  }
 	}
       }
       return maps[i].charset;
     }

   return 0;
 }

 /*
  * Function to convert a buffer from one encoding to another.
  * Invalid bytes are replaced by '#', and characters that are
  * not available in the target encoding are replaced by '?'.
  * Each of TO and TOLEN may be zero, if the result is not needed.
  * The output buffer is null-terminated, so it is all right to
  * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
  */

 int charset_convert(const char *fromcode, const char *tocode,
 		    const char *from, size_t fromlen,
 		    char **to, size_t *tolen)
 {
   int ret = 0;
   struct charset *charset1, *charset2;
   char *tobuf, *p;
   int i, j, wc;

   charset1 = charset_find(fromcode);
   charset2 = charset_find(tocode);
   if (!charset1 || !charset2 )
     return -1;

   tobuf = safe_malloc_mul2add_(fromlen, /*times*/charset2->max, /*+*/1);
   if (!tobuf)
     return -2;

   for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
     i = charset_mbtowc(charset1, &wc, from, fromlen);
     if (!i)
       i = 1;
     else if (i == -1) {
       i  = 1;
       wc = '#';
       ret = 2;
     }
     j = charset_wctomb(charset2, p, wc);
     if (j == -1) {
       if (!ret)
 	ret = 1;
       j = charset_wctomb(charset2, p, '?');
       if (j == -1)
 	j = 0;
     }
   }

   if (tolen)
     *tolen = p - tobuf;
   *p++ = '\0';
   if (to) {
     char *tobuf_saved = tobuf;
     *to = realloc(tobuf, p - tobuf);
     if (*to == NULL)
       *to = tobuf_saved;
   }
   else
     free(tobuf);

   return ret;
 }

 #endif /* USE_CHARSET_ICONV */
	/*
	* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License along
	* with this program; if not, write to the Free Software Foundation, Inc.,
	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
	*/

	/*
	* See the corresponding header file for a description of the functions
	* that this file provides.
	*
	* This was first written for Ogg Vorbis but could be of general use.
	*
	* The only deliberate assumption about data sizes is that a short has
	* at least 16 bits, but this code has only been tested on systems with
	* 8-bit char, 16-bit short and 32-bit int.
	*/

	#ifdef HAVE_CONFIG_H
	# include <config.h>
	#endif

	#if !defined _WIN32 && !defined HAVE_ICONV /* should be && defined USE_CHARSET_CONVERT */

	#include <stdlib.h>

	#include "share/alloc.h"
	#include "charset.h"

	#include "charmaps.h"

	/*
	* This is like the standard strcasecmp, but it does not depend
	* on the locale. Locale-dependent functions can be dangerous:
	* we once had a bug involving strcasecmp("iso", "ISO") in a
	* Turkish locale!
	*
	* (I'm not really sure what the official standard says
	* about the sign of strcasecmp("Z", "["), but usually
	* we're only interested in whether it's zero.)
	*/

	static int ascii_strcasecmp(const char s1, const char s2)
	{
	char c1, c2;

	for (;; s1++, s2++) {
	if (!s1 \|\| !s2)
	break;
	if (s1 == s2)
	continue;
	c1 = *s1;
	if ('a' <= c1 && c1 <= 'z')
	c1 += 'A' - 'a';
	c2 = *s2;
	if ('a' <= c2 && c2 <= 'z')
	c2 += 'A' - 'a';
	if (c1 != c2)
	break;
	}
	return (uint8_t)s1 - (uint8_t)s2;
	}

	/*
	* UTF-8 equivalents of the C library's wctomb() and mbtowc().
	*/

	int utf8_mbtowc(int pwc, const char s, size_t n)
	{
	uint8_t c;
	int wc, i, k;

	if (!n \|\| !s)
	return 0;

	c = *s;
	if (c < 0x80) {
	if (pwc)
	*pwc = c;
	return c ? 1 : 0;
	}
	else if (c < 0xc2)
	return -1;
	else if (c < 0xe0) {
	if (n >= 2 && (s[1] & 0xc0) == 0x80) {
	if (pwc)
	*pwc = ((c & 0x1f) << 6) \| (s[1] & 0x3f);
	return 2;
	}
	else
	return -1;
	}
	else if (c < 0xf0)
	k = 3;
	else if (c < 0xf8)
	k = 4;
	else if (c < 0xfc)
	k = 5;
	else if (c < 0xfe)
	k = 6;
	else
	return -1;

	if (n < (size_t)k)
	return -1;
	wc = *s++ & ((1 << (7 - k)) - 1);
	for (i = 1; i < k; i++) {
	if ((*s & 0xc0) != 0x80)
	return -1;
	wc = (wc << 6) \| (*s++ & 0x3f);
	}
	if (wc < (1 << (5 * k - 4)))
	return -1;
	if (pwc)
	*pwc = wc;
	return k;
	}

	int utf8_wctomb(char *s, int wc1)
	{
	uint32_t wc = wc1;

	if (!s)
	return 0;
	if (wc < (1u << 7)) {
	*s++ = wc;
	return 1;
	}
	else if (wc < (1u << 11)) {
	*s++ = 0xc0 \| (wc >> 6);
	*s++ = 0x80 \| (wc & 0x3f);
	return 2;
	}
	else if (wc < (1u << 16)) {
	*s++ = 0xe0 \| (wc >> 12);
	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	*s++ = 0x80 \| (wc & 0x3f);
	return 3;
	}
	else if (wc < (1u << 21)) {
	*s++ = 0xf0 \| (wc >> 18);
	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	*s++ = 0x80 \| (wc & 0x3f);
	return 4;
	}
	else if (wc < (1u << 26)) {
	*s++ = 0xf8 \| (wc >> 24);
	*s++ = 0x80 \| ((wc >> 18) & 0x3f);
	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	*s++ = 0x80 \| (wc & 0x3f);
	return 5;
	}
	else if (wc < (1u << 31)) {
	*s++ = 0xfc \| (wc >> 30);
	*s++ = 0x80 \| ((wc >> 24) & 0x3f);
	*s++ = 0x80 \| ((wc >> 18) & 0x3f);
	*s++ = 0x80 \| ((wc >> 12) & 0x3f);
	*s++ = 0x80 \| ((wc >> 6) & 0x3f);
	*s++ = 0x80 \| (wc & 0x3f);
	return 6;
	}
	else
	return -1;
	}

	/*
	* The charset "object" and methods.
	*/

	struct charset {
	int max;
	int (mbtowc)(void table, int pwc, const char s, size_t n);
	int (wctomb)(void table, char *s, int wc);
	void *map;
	};

	int charset_mbtowc(struct charset charset, int pwc, const char *s, size_t n)
	{
	return (*charset->mbtowc)(charset->map, pwc, s, n);
	}

	int charset_wctomb(struct charset charset, char s, int wc)
	{
	return (*charset->wctomb)(charset->map, s, wc);
	}

	int charset_max(struct charset *charset)
	{
	return charset->max;
	}

	/*
	* Implementation of UTF-8.
	*/

	static int mbtowc_utf8(void map, int pwc, const char *s, size_t n)
	{
	(void)map;
	return utf8_mbtowc(pwc, s, n);
	}

	static int wctomb_utf8(void map, char s, int wc)
	{
	(void)map;
	return utf8_wctomb(s, wc);
	}

	/*
	* Implementation of US-ASCII.
	* Probably on most architectures this compiles to less than 256 bytes
	* of code, so we can save space by not having a table for this one.
	*/

	static int mbtowc_ascii(void map, int pwc, const char *s, size_t n)
	{
	int wc;

	(void)map;
	if (!n \|\| !s)
	return 0;
	wc = (uint8_t)*s;
	if (wc & ~0x7f)
	return -1;
	if (pwc)
	*pwc = wc;
	return wc ? 1 : 0;
	}

	static int wctomb_ascii(void map, char s, int wc)
	{
	(void)map;
	if (!s)
	return 0;
	if (wc & ~0x7f)
	return -1;
	*s = wc;
	return 1;
	}

	/*
	* Implementation of ISO-8859-1.
	* Probably on most architectures this compiles to less than 256 bytes
	* of code, so we can save space by not having a table for this one.
	*/

	static int mbtowc_iso1(void map, int pwc, const char *s, size_t n)
	{
	int wc;

	(void)map;
	if (!n \|\| !s)
	return 0;
	wc = (uint8_t)*s;
	if (wc & ~0xff)
	return -1;
	if (pwc)
	*pwc = wc;
	return wc ? 1 : 0;
	}

	static int wctomb_iso1(void map, char s, int wc)
	{
	(void)map;
	if (!s)
	return 0;
	if (wc & ~0xff)
	return -1;
	*s = wc;
	return 1;
	}

	/*
	* Implementation of any 8-bit charset.
	*/

	struct map {
	const uint16_t *from;
	struct inverse_map *to;
	};

	static int mbtowc_8bit(void map1, int pwc, const char *s, size_t n)
	{
	struct map *map = map1;
	uint16_t wc;

	if (!n \|\| !s)
	return 0;
	wc = map->from[(uint8_t)*s];
	if (wc == 0xffff)
	return -1;
	if (pwc)
	*pwc = (int)wc;
	return wc ? 1 : 0;
	}

	/*
	* For the inverse map we use a hash table, which has the advantages
	* of small constant memory requirement and simple memory allocation,
	* but the disadvantage of slow conversion in the worst case.
	* If you need real-time performance while letting a potentially
	* malicious user define their own map, then the method used in
	* linux/drivers/char/consolemap.c would be more appropriate.
	*/

	struct inverse_map {
	uint8_t first[256];
	uint8_t next[256];
	};

	/*
	* The simple hash is good enough for this application.
	* Use the alternative trivial hashes for testing.
	*/
	#define HASH(i) ((i) & 0xff)
	/* #define HASH(i) 0 */
	/* #define HASH(i) 99 */

	static struct inverse_map make_inverse_map(const uint16_t from)
	{
	struct inverse_map *to;
	char used[256];
	int i, j, k;

	to = malloc(sizeof(struct inverse_map));
	if (!to)
	return 0;
	for (i = 0; i < 256; i++)
	to->first[i] = to->next[i] = used[i] = 0;
	for (i = 255; i >= 0; i--)
	if (from[i] != 0xffff) {
	k = HASH(from[i]);
	to->next[i] = to->first[k];
	to->first[k] = i;
	used[k] = 1;
	}

	/* Point the empty buckets at an empty list. */
	for (i = 0; i < 256; i++)
	if (!to->next[i])
	break;
	if (i < 256)
	for (j = 0; j < 256; j++)
	if (!used[j])
	to->first[j] = i;

	return to;
	}

	static int wctomb_8bit(void map1, char s, int wc1)
	{
	struct map *map = map1;
	uint16_t wc = wc1;
	int i;

	if (!s)
	return 0;

	if (wc1 & ~0xffff)
	return -1;

	if (1) /* Change 1 to 0 to test the case where malloc fails. */
	if (!map->to)
	map->to = make_inverse_map(map->from);

	if (map->to) {
	/* Use the inverse map. */
	i = map->to->first[HASH(wc)];
	for (;;) {
	if (map->from[i] == wc) {
	*s = i;
	return 1;
	}
	if (!(i = map->to->next[i]))
	break;
	}
	}
	else {
	/* We don't have an inverse map, so do a linear search. */
	for (i = 0; i < 256; i++)
	if (map->from[i] == wc) {
	*s = i;
	return 1;
	}
	}

	return -1;
	}

	/*
	* The "constructor" charset_find().
	*/

	struct charset charset_utf8 = {
	6,
	&mbtowc_utf8,
	&wctomb_utf8,
	0
	};

	struct charset charset_iso1 = {
	1,
	&mbtowc_iso1,
	&wctomb_iso1,
	0
	};

	struct charset charset_ascii = {
	1,
	&mbtowc_ascii,
	&wctomb_ascii,
	0
	};

	struct charset charset_find(const char code)
	{
	int i;

	/* Find good (MIME) name. */
	for (i = 0; names[i].bad; i++)
	if (!ascii_strcasecmp(code, names[i].bad)) {
	code = names[i].good;
	break;
	}

	/* Recognise some charsets for which we avoid using a table. */
	if (!ascii_strcasecmp(code, "UTF-8"))
	return &charset_utf8;
	if (!ascii_strcasecmp(code, "US-ASCII"))
	return &charset_ascii;
	if (!ascii_strcasecmp(code, "ISO-8859-1"))
	return &charset_iso1;

	/* Look for a mapping for a simple 8-bit encoding. */
	for (i = 0; maps[i].name; i++)
	if (!ascii_strcasecmp(code, maps[i].name)) {
	if (!maps[i].charset) {
	maps[i].charset = malloc(sizeof(struct charset));
	if (maps[i].charset) {
	struct map *map = malloc(sizeof(struct map));
	if (!map) {
	free(maps[i].charset);
	maps[i].charset = 0;
	}
	else {
	maps[i].charset->max = 1;
	maps[i].charset->mbtowc = &mbtowc_8bit;
	maps[i].charset->wctomb = &wctomb_8bit;
	maps[i].charset->map = map;
	map->from = maps[i].map;
	map->to = 0; /* inverse mapping is created when required */
	}
	}
	}
	return maps[i].charset;
	}

	return 0;
	}

	/*
	* Function to convert a buffer from one encoding to another.
	* Invalid bytes are replaced by '#', and characters that are
	* not available in the target encoding are replaced by '?'.
	* Each of TO and TOLEN may be zero, if the result is not needed.
	* The output buffer is null-terminated, so it is all right to
	* use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
	*/

	int charset_convert(const char fromcode, const char tocode,
	const char *from, size_t fromlen,
	char *to, size_t tolen)
	{
	int ret = 0;
	struct charset charset1, charset2;
	char tobuf, p;
	int i, j, wc;

	charset1 = charset_find(fromcode);
	charset2 = charset_find(tocode);
	if (!charset1 \|\| !charset2 )
	return -1;

	tobuf = safe_malloc_mul2add_(fromlen, /times/charset2->max, /+/1);
	if (!tobuf)
	return -2;

	for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
	i = charset_mbtowc(charset1, &wc, from, fromlen);
	if (!i)
	i = 1;
	else if (i == -1) {
	i = 1;
	wc = '#';
	ret = 2;
	}
	j = charset_wctomb(charset2, p, wc);
	if (j == -1) {
	if (!ret)
	ret = 1;
	j = charset_wctomb(charset2, p, '?');
	if (j == -1)
	j = 0;
	}
	}

	if (tolen)
	*tolen = p - tobuf;
	*p++ = '\0';
	if (to) {
	char *tobuf_saved = tobuf;
	*to = realloc(tobuf, p - tobuf);
	if (*to == NULL)
	*to = tobuf_saved;
	}
	else
	free(tobuf);

	return ret;
	}

	#endif /* USE_CHARSET_ICONV */