gnulib/tests/test-wcwidth.c - toolchain/make - Git at Google

 /* Test of wcwidth() function.
    Copyright (C) 2007-2020 Free Software Foundation, Inc.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

 /* Written by Bruno Haible <bruno@clisp.org>, 2007.  */

 #include <config.h>

 #include <wchar.h>

 #include "signature.h"
 SIGNATURE_CHECK (wcwidth, int, (wchar_t));

 #include <locale.h>
 #include <string.h>

 #include "c-ctype.h"
 #include "localcharset.h"
 #include "macros.h"

 int
 main ()
 {
   wchar_t wc;

 #if !GNULIB_WCHAR_SINGLE
 # ifdef C_CTYPE_ASCII
   /* Test width of ASCII characters.  */
   for (wc = 0x20; wc < 0x7F; wc++)
     ASSERT (wcwidth (wc) == 1);
 # endif
 #endif

   /* Switch to an UTF-8 locale.  */
   if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL
       /* Check whether it's really an UTF-8 locale.
          On OpenBSD 4.0, the setlocale call succeeds only for the LC_CTYPE
          category and therefore returns "C/fr_FR.UTF-8/C/C/C/C", but the
          LC_CTYPE category is effectively set to an ASCII LC_CTYPE category;
          in particular, locale_charset() returns "ASCII".  */
       && strcmp (locale_charset (), "UTF-8") == 0)
     {
       /* Test width of ASCII characters.  */
       for (wc = 0x20; wc < 0x7F; wc++)
         ASSERT (wcwidth (wc) == 1);

       /* Test width of some non-spacing characters.  */
       ASSERT (wcwidth (0x0301) == 0);
       ASSERT (wcwidth (0x05B0) == 0);

       /* Test width of some format control characters.  */
       ASSERT (wcwidth (0x200E) <= 0);
       ASSERT (wcwidth (0x2060) <= 0);
 #if 0  /* wchar_t may be only 16 bits.  */
       ASSERT (wcwidth (0xE0001) <= 0);
       ASSERT (wcwidth (0xE0044) <= 0);
 #endif

       /* Test width of some zero width characters.  */
       /* While it is desirable that U+200B, U+200C, U+200D have width 0,
          because this makes wcswidth work better on strings that contain these
          characters, it is acceptable if an implementation treats these
          characters like control characters.  */
       ASSERT (wcwidth (0x200B) <= 0);
       ASSERT (wcwidth (0xFEFF) <= 0);

       /* Test width of some math symbols.
          U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt
          (see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>).
          The Unicode Standard Annex 11
          <https://www.unicode.org/reports/tr11/tr11-36.html>
          says
            "Ambiguous characters behave like wide or narrow characters
             depending on the context (language tag, script identification,
             associated font, source of data, or explicit markup; all can
             provide the context). If the context cannot be established
             reliably, they should be treated as narrow characters by default."
          For wcwidth(), the only available context information is the locale.
          "fr_FR.UTF-8" is a Western locale, not an East Asian locale, therefore
          U+2202 should be treated like a narrow character.  */
       ASSERT (wcwidth (0x2202) == 1);

       /* Test width of some CJK characters.  */
       ASSERT (wcwidth (0x3000) == 2);
       ASSERT (wcwidth (0xB250) == 2);
       ASSERT (wcwidth (0xFF1A) == 2);
 #if 0  /* wchar_t may be only 16 bits.  */
       ASSERT (wcwidth (0x20369) == 2);
       ASSERT (wcwidth (0x2F876) == 2);
 #endif
     }

   return 0;
 }
	/* Test of wcwidth() function.
	Copyright (C) 2007-2020 Free Software Foundation, Inc.

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <https://www.gnu.org/licenses/>. */

	/* Written by Bruno Haible <bruno@clisp.org>, 2007. */

	#include <config.h>

	#include <wchar.h>

	#include "signature.h"
	SIGNATURE_CHECK (wcwidth, int, (wchar_t));

	#include <locale.h>
	#include <string.h>

	#include "c-ctype.h"
	#include "localcharset.h"
	#include "macros.h"

	int
	main ()
	{
	wchar_t wc;

	#if !GNULIB_WCHAR_SINGLE
	# ifdef C_CTYPE_ASCII
	/* Test width of ASCII characters. */
	for (wc = 0x20; wc < 0x7F; wc++)
	ASSERT (wcwidth (wc) == 1);
	# endif
	#endif

	/* Switch to an UTF-8 locale. */
	if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL
	/* Check whether it's really an UTF-8 locale.
	On OpenBSD 4.0, the setlocale call succeeds only for the LC_CTYPE
	category and therefore returns "C/fr_FR.UTF-8/C/C/C/C", but the
	LC_CTYPE category is effectively set to an ASCII LC_CTYPE category;
	in particular, locale_charset() returns "ASCII". */
	&& strcmp (locale_charset (), "UTF-8") == 0)
	{
	/* Test width of ASCII characters. */
	for (wc = 0x20; wc < 0x7F; wc++)
	ASSERT (wcwidth (wc) == 1);

	/* Test width of some non-spacing characters. */
	ASSERT (wcwidth (0x0301) == 0);
	ASSERT (wcwidth (0x05B0) == 0);

	/* Test width of some format control characters. */
	ASSERT (wcwidth (0x200E) <= 0);
	ASSERT (wcwidth (0x2060) <= 0);
	#if 0 /* wchar_t may be only 16 bits. */
	ASSERT (wcwidth (0xE0001) <= 0);
	ASSERT (wcwidth (0xE0044) <= 0);
	#endif

	/* Test width of some zero width characters. */
	/* While it is desirable that U+200B, U+200C, U+200D have width 0,
	because this makes wcswidth work better on strings that contain these
	characters, it is acceptable if an implementation treats these
	characters like control characters. */
	ASSERT (wcwidth (0x200B) <= 0);
	ASSERT (wcwidth (0xFEFF) <= 0);

	/* Test width of some math symbols.
	U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt
	(see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>).
	The Unicode Standard Annex 11
	<https://www.unicode.org/reports/tr11/tr11-36.html>
	says
	"Ambiguous characters behave like wide or narrow characters
	depending on the context (language tag, script identification,
	associated font, source of data, or explicit markup; all can
	provide the context). If the context cannot be established
	reliably, they should be treated as narrow characters by default."
	For wcwidth(), the only available context information is the locale.
	"fr_FR.UTF-8" is a Western locale, not an East Asian locale, therefore
	U+2202 should be treated like a narrow character. */
	ASSERT (wcwidth (0x2202) == 1);

	/* Test width of some CJK characters. */
	ASSERT (wcwidth (0x3000) == 2);
	ASSERT (wcwidth (0xB250) == 2);
	ASSERT (wcwidth (0xFF1A) == 2);
	#if 0 /* wchar_t may be only 16 bits. */
	ASSERT (wcwidth (0x20369) == 2);
	ASSERT (wcwidth (0x2F876) == 2);
	#endif
	}

	return 0;
	}