Lib/test/test_capi/test_unicode.py - platform/external/python/cpython3 - Git at Google

 import unittest
 import sys
 from test import support
 from test.support import import_helper

 try:
     import _testcapi
 except ImportError:
     _testcapi = None


 NULL = None

 class Str(str):
     pass


 class CAPITest(unittest.TestCase):

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_fromobject(self):
         """Test PyUnicode_FromObject()"""
         from _testcapi import unicode_fromobject as fromobject

         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                   'a\ud800b\udfffc', '\ud834\udd1e']:
             self.assertEqual(fromobject(s), s)
             o = Str(s)
             s2 = fromobject(o)
             self.assertEqual(s2, s)
             self.assertIs(type(s2), str)
             self.assertIsNot(s2, s)

         self.assertRaises(TypeError, fromobject, b'abc')
         self.assertRaises(TypeError, fromobject, [])
         # CRASHES fromobject(NULL)

     def test_from_format(self):
         """Test PyUnicode_FromFormat()"""
         import_helper.import_module('ctypes')
         from ctypes import (
             c_char_p,
             pythonapi, py_object, sizeof,
             c_int, c_long, c_longlong, c_ssize_t,
             c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
         name = "PyUnicode_FromFormat"
         _PyUnicode_FromFormat = getattr(pythonapi, name)
         _PyUnicode_FromFormat.argtypes = (c_char_p,)
         _PyUnicode_FromFormat.restype = py_object

         def PyUnicode_FromFormat(format, *args):
             cargs = tuple(
                 py_object(arg) if isinstance(arg, str) else arg
                 for arg in args)
             return _PyUnicode_FromFormat(format, *cargs)

         def check_format(expected, format, *args):
             text = PyUnicode_FromFormat(format, *args)
             self.assertEqual(expected, text)

         # ascii format, non-ascii argument
         check_format('ascii\x7f=unicode\xe9',
                      b'ascii\x7f=%U', 'unicode\xe9')

         # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
         # raises an error
         self.assertRaisesRegex(ValueError,
             r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
             'string, got a non-ASCII byte: 0xe9$',
             PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')

         # test "%c"
         check_format('\uabcd',
                      b'%c', c_int(0xabcd))
         check_format('\U0010ffff',
                      b'%c', c_int(0x10ffff))
         with self.assertRaises(OverflowError):
             PyUnicode_FromFormat(b'%c', c_int(0x110000))
         # Issue #18183
         check_format('\U00010000\U00100000',
                      b'%c%c', c_int(0x10000), c_int(0x100000))

         # test "%"
         check_format('%',
                      b'%%')
         check_format('%s',
                      b'%%s')
         check_format('[%]',
                      b'[%%]')
         check_format('%abc',
                      b'%%%s', b'abc')

         # truncated string
         check_format('abc',
                      b'%.3s', b'abcdef')
         check_format('abc[\ufffd',
                      b'%.5s', 'abc[\u20ac]'.encode('utf8'))
         check_format("'\\u20acABC'",
                      b'%A', '\u20acABC')
         check_format("'\\u20",
                      b'%.5A', '\u20acABCDEF')
         check_format("'\u20acABC'",
                      b'%R', '\u20acABC')
         check_format("'\u20acA",
                      b'%.3R', '\u20acABCDEF')
         check_format('\u20acAB',
                      b'%.3S', '\u20acABCDEF')
         check_format('\u20acAB',
                      b'%.3U', '\u20acABCDEF')
         check_format('\u20acAB',
                      b'%.3V', '\u20acABCDEF', None)
         check_format('abc[\ufffd',
                      b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))

         # following tests comes from #7330
         # test width modifier and precision modifier with %S
         check_format("repr=  abc",
                      b'repr=%5S', 'abc')
         check_format("repr=ab",
                      b'repr=%.2S', 'abc')
         check_format("repr=   ab",
                      b'repr=%5.2S', 'abc')

         # test width modifier and precision modifier with %R
         check_format("repr=   'abc'",
                      b'repr=%8R', 'abc')
         check_format("repr='ab",
                      b'repr=%.3R', 'abc')
         check_format("repr=  'ab",
                      b'repr=%5.3R', 'abc')

         # test width modifier and precision modifier with %A
         check_format("repr=   'abc'",
                      b'repr=%8A', 'abc')
         check_format("repr='ab",
                      b'repr=%.3A', 'abc')
         check_format("repr=  'ab",
                      b'repr=%5.3A', 'abc')

         # test width modifier and precision modifier with %s
         check_format("repr=  abc",
                      b'repr=%5s', b'abc')
         check_format("repr=ab",
                      b'repr=%.2s', b'abc')
         check_format("repr=   ab",
                      b'repr=%5.2s', b'abc')

         # test width modifier and precision modifier with %U
         check_format("repr=  abc",
                      b'repr=%5U', 'abc')
         check_format("repr=ab",
                      b'repr=%.2U', 'abc')
         check_format("repr=   ab",
                      b'repr=%5.2U', 'abc')

         # test width modifier and precision modifier with %V
         check_format("repr=  abc",
                      b'repr=%5V', 'abc', b'123')
         check_format("repr=ab",
                      b'repr=%.2V', 'abc', b'123')
         check_format("repr=   ab",
                      b'repr=%5.2V', 'abc', b'123')
         check_format("repr=  123",
                      b'repr=%5V', None, b'123')
         check_format("repr=12",
                      b'repr=%.2V', None, b'123')
         check_format("repr=   12",
                      b'repr=%5.2V', None, b'123')

         # test integer formats (%i, %d, %u)
         check_format('010',
                      b'%03i', c_int(10))
         check_format('0010',
                      b'%0.4i', c_int(10))
         check_format('-123',
                      b'%i', c_int(-123))
         check_format('-123',
                      b'%li', c_long(-123))
         check_format('-123',
                      b'%lli', c_longlong(-123))
         check_format('-123',
                      b'%zi', c_ssize_t(-123))

         check_format('-123',
                      b'%d', c_int(-123))
         check_format('-123',
                      b'%ld', c_long(-123))
         check_format('-123',
                      b'%lld', c_longlong(-123))
         check_format('-123',
                      b'%zd', c_ssize_t(-123))

         check_format('123',
                      b'%u', c_uint(123))
         check_format('123',
                      b'%lu', c_ulong(123))
         check_format('123',
                      b'%llu', c_ulonglong(123))
         check_format('123',
                      b'%zu', c_size_t(123))

         # test long output
         min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
         max_longlong = -min_longlong - 1
         check_format(str(min_longlong),
                      b'%lld', c_longlong(min_longlong))
         check_format(str(max_longlong),
                      b'%lld', c_longlong(max_longlong))
         max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
         check_format(str(max_ulonglong),
                      b'%llu', c_ulonglong(max_ulonglong))
         PyUnicode_FromFormat(b'%p', c_void_p(-1))

         # test padding (width and/or precision)
         check_format('123'.rjust(10, '0'),
                      b'%010i', c_int(123))
         check_format('123'.rjust(100),
                      b'%100i', c_int(123))
         check_format('123'.rjust(100, '0'),
                      b'%.100i', c_int(123))
         check_format('123'.rjust(80, '0').rjust(100),
                      b'%100.80i', c_int(123))

         check_format('123'.rjust(10, '0'),
                      b'%010u', c_uint(123))
         check_format('123'.rjust(100),
                      b'%100u', c_uint(123))
         check_format('123'.rjust(100, '0'),
                      b'%.100u', c_uint(123))
         check_format('123'.rjust(80, '0').rjust(100),
                      b'%100.80u', c_uint(123))

         check_format('123'.rjust(10, '0'),
                      b'%010x', c_int(0x123))
         check_format('123'.rjust(100),
                      b'%100x', c_int(0x123))
         check_format('123'.rjust(100, '0'),
                      b'%.100x', c_int(0x123))
         check_format('123'.rjust(80, '0').rjust(100),
                      b'%100.80x', c_int(0x123))

         # test %A
         check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
                      b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')

         # test %V
         check_format('repr=abc',
                      b'repr=%V', 'abc', b'xyz')

         # test %p
         # We cannot test the exact result,
         # because it returns a hex representation of a C pointer,
         # which is going to be different each time. But, we can test the format.
         p_format_regex = r'^0x[a-zA-Z0-9]{3,}$'
         p_format1 = PyUnicode_FromFormat(b'%p', 'abc')
         self.assertIsInstance(p_format1, str)
         self.assertRegex(p_format1, p_format_regex)

         p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz')
         self.assertIsInstance(p_format2, str)
         self.assertRegex(p_format2,
                          r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}')

         # Extra args are ignored:
         p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')
         self.assertIsInstance(p_format3, str)
         self.assertRegex(p_format3, p_format_regex)

         # Test string decode from parameter of %s using utf-8.
         # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
         # '\u4eba\u6c11'
         check_format('repr=\u4eba\u6c11',
                      b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')

         #Test replace error handler.
         check_format('repr=abc\ufffd',
                      b'repr=%V', None, b'abc\xff')

         # Issue #33817: empty strings
         check_format('',
                      b'')
         check_format('',
                      b'%s', b'')

         # check for crashes
         for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
                     b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
                     b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'):
             with self.subTest(fmt=fmt):
                 self.assertRaisesRegex(SystemError, 'invalid format string',
                     PyUnicode_FromFormat, fmt, b'abc')
         self.assertRaisesRegex(SystemError, 'invalid format string',
             PyUnicode_FromFormat, b'%+i', c_int(10))

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_aswidechar(self):
         """Test PyUnicode_AsWideChar()"""
         from _testcapi import unicode_aswidechar
         import_helper.import_module('ctypes')
         from ctypes import c_wchar, sizeof

         wchar, size = unicode_aswidechar('abcdef', 2)
         self.assertEqual(size, 2)
         self.assertEqual(wchar, 'ab')

         wchar, size = unicode_aswidechar('abc', 3)
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc')

         wchar, size = unicode_aswidechar('abc', 4)
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc\0')

         wchar, size = unicode_aswidechar('abc', 10)
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc\0')

         wchar, size = unicode_aswidechar('abc\0def', 20)
         self.assertEqual(size, 7)
         self.assertEqual(wchar, 'abc\0def\0')

         nonbmp = chr(0x10ffff)
         if sizeof(c_wchar) == 2:
             buflen = 3
             nchar = 2
         else: # sizeof(c_wchar) == 4
             buflen = 2
             nchar = 1
         wchar, size = unicode_aswidechar(nonbmp, buflen)
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_aswidecharstring(self):
         """Test PyUnicode_AsWideCharString()"""
         from _testcapi import unicode_aswidecharstring
         import_helper.import_module('ctypes')
         from ctypes import c_wchar, sizeof

         wchar, size = unicode_aswidecharstring('abc')
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc\0')

         wchar, size = unicode_aswidecharstring('abc\0def')
         self.assertEqual(size, 7)
         self.assertEqual(wchar, 'abc\0def\0')

         nonbmp = chr(0x10ffff)
         if sizeof(c_wchar) == 2:
             nchar = 2
         else: # sizeof(c_wchar) == 4
             nchar = 1
         wchar, size = unicode_aswidecharstring(nonbmp)
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asucs4(self):
         """Test PyUnicode_AsUCS4()"""
         from _testcapi import unicode_asucs4
         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                   'a\ud800b\udfffc', '\ud834\udd1e']:
             l = len(s)
             self.assertEqual(unicode_asucs4(s, l, True), s+'\0')
             self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff')
             self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff')
             self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff')
             self.assertRaises(SystemError, unicode_asucs4, s, l-1, True)
             self.assertRaises(SystemError, unicode_asucs4, s, l-2, False)
             s = '\0'.join([s, s])
             self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
             self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asutf8(self):
         """Test PyUnicode_AsUTF8()"""
         from _testcapi import unicode_asutf8

         bmp = '\u0100'
         bmp2 = '\uffff'
         nonbmp = chr(0x10ffff)

         self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
         self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
         self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
         self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asutf8andsize(self):
         """Test PyUnicode_AsUTF8AndSize()"""
         from _testcapi import unicode_asutf8andsize

         bmp = '\u0100'
         bmp2 = '\uffff'
         nonbmp = chr(0x10ffff)

         self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
         self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
         self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
         self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_concat(self):
         """Test PyUnicode_Concat()"""
         from _testcapi import unicode_concat as concat

         self.assertEqual(concat('abc', 'def'), 'abcdef')
         self.assertEqual(concat('abc', 'где'), 'abcгде')
         self.assertEqual(concat('абв', 'def'), 'абвdef')
         self.assertEqual(concat('абв', 'где'), 'абвгде')
         self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d')

         self.assertRaises(TypeError, concat, b'abc', 'def')
         self.assertRaises(TypeError, concat, 'abc', b'def')
         self.assertRaises(TypeError, concat, b'abc', b'def')
         self.assertRaises(TypeError, concat, [], 'def')
         self.assertRaises(TypeError, concat, 'abc', [])
         self.assertRaises(TypeError, concat, [], [])
         # CRASHES concat(NULL, 'def')
         # CRASHES concat('abc', NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_split(self):
         """Test PyUnicode_Split()"""
         from _testcapi import unicode_split as split

         self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
         self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d'])
         self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d'])
         self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd'])
         self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
         self.assertEqual(split('абабагаламага', 'а'),
                          ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
         self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL),
                          ['a', 'b', 'c', 'd', 'e'])
         self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL),
                          ['a', 'b', 'c', 'd', 'e'])

         self.assertRaises(ValueError, split, 'a|b|c|d', '')
         self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
         self.assertRaises(TypeError, split, [], '|')
         # CRASHES split(NULL, '|')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_rsplit(self):
         """Test PyUnicode_RSplit()"""
         from _testcapi import unicode_rsplit as rsplit

         self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
         self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd'])
         self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d'])
         self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd'])
         self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
         self.assertEqual(rsplit('абабагаламага', 'а'),
                          ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
         self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd'])
         self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL),
                          ['a', 'b', 'c', 'd', 'e'])
         self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL),
                          ['a', 'b', 'c', 'd', 'e'])

         self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
         self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
         self.assertRaises(TypeError, rsplit, [], '|')
         # CRASHES rsplit(NULL, '|')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_partition(self):
         """Test PyUnicode_Partition()"""
         from _testcapi import unicode_partition as partition

         self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c'))
         self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c'))
         self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в'))
         self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан'))
         self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc'))

         self.assertRaises(ValueError, partition, 'a|b|c', '')
         self.assertRaises(TypeError, partition, b'a|b|c', '|')
         self.assertRaises(TypeError, partition, 'a|b|c', b'|')
         self.assertRaises(TypeError, partition, 'a|b|c', ord('|'))
         self.assertRaises(TypeError, partition, [], '|')
         # CRASHES partition(NULL, '|')
         # CRASHES partition('a|b|c', NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_rpartition(self):
         """Test PyUnicode_RPartition()"""
         from _testcapi import unicode_rpartition as rpartition

         self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c'))
         self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c'))
         self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в'))
         self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н'))
         self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c'))

         self.assertRaises(ValueError, rpartition, 'a|b|c', '')
         self.assertRaises(TypeError, rpartition, b'a|b|c', '|')
         self.assertRaises(TypeError, rpartition, 'a|b|c', b'|')
         self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|'))
         self.assertRaises(TypeError, rpartition, [], '|')
         # CRASHES rpartition(NULL, '|')
         # CRASHES rpartition('a|b|c', NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_splitlines(self):
         """Test PyUnicode_SplitLines()"""
         from _testcapi import unicode_splitlines as splitlines

         self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd'])
         self.assertEqual(splitlines('a\nb\rc\r\nd', True),
                          ['a\n', 'b\r', 'c\r\n', 'd'])
         self.assertEqual(splitlines('a\x85b\u2028c\u2029d'),
                          ['a', 'b', 'c', 'd'])
         self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True),
                          ['a\x85', 'b\u2028', 'c\u2029', 'd'])
         self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г'])

         self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd')
         # CRASHES splitlines(NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_translate(self):
         """Test PyUnicode_Translate()"""
         from _testcapi import unicode_translate as translate

         self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
         self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
         self.assertEqual(translate('abc', {}), 'abc')
         self.assertEqual(translate('abc', []), 'abc')
         self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
         self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
         self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
         self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac')
         self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc')
         self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c')
         # XXX Other error handlers do not support UnicodeTranslateError
         self.assertRaises(TypeError, translate, b'abc', [])
         self.assertRaises(TypeError, translate, 123, [])
         self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
         self.assertRaises(TypeError, translate, 'abc', 123)
         self.assertRaises(TypeError, translate, 'abc', NULL)
         self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
         # CRASHES translate(NULL, [])

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_join(self):
         """Test PyUnicode_Join()"""
         from _testcapi import unicode_join as join
         self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
         self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
         self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
         self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
         self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
         self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв')
         self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c'])
         self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c'])
         self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c'])
         self.assertRaises(TypeError, join, '|', b'123')
         self.assertRaises(TypeError, join, '|', 123)
         self.assertRaises(SystemError, join, '|', NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_count(self):
         """Test PyUnicode_Count()"""
         from _testcapi import unicode_count

         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
             for i, ch in enumerate(str):
                 self.assertEqual(unicode_count(str, ch, 0, len(str)), 1)

         str = "!>_<!"
         self.assertEqual(unicode_count(str, 'z', 0, len(str)), 0)
         self.assertEqual(unicode_count(str, '', 0, len(str)), len(str)+1)
         # start < end
         self.assertEqual(unicode_count(str, '!', 1, len(str)+1), 1)
         # start >= end
         self.assertEqual(unicode_count(str, '!', 0, 0), 0)
         self.assertEqual(unicode_count(str, '!', len(str), 0), 0)
         # negative
         self.assertEqual(unicode_count(str, '!', -len(str), -1), 1)
         # bad arguments
         self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str))
         self.assertRaises(TypeError, unicode_count, b"!>_<!", '!', 0, len(str))
         self.assertRaises(TypeError, unicode_count, str, ord('!'), 0, len(str))
         self.assertRaises(TypeError, unicode_count, [], '!', 0, len(str), 1)
         # CRASHES unicode_count(NULL, '!', 0, len(str))
         # CRASHES unicode_count(str, NULL, 0, len(str))

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_tailmatch(self):
         """Test PyUnicode_Tailmatch()"""
         from _testcapi import unicode_tailmatch as tailmatch

         str = 'ababahalamaha'
         self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
         self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)

         self.assertEqual(tailmatch(str, 'aba', 0, sys.maxsize, -1), 1)
         self.assertEqual(tailmatch(str, 'aba', -len(str), sys.maxsize, -1), 1)
         self.assertEqual(tailmatch(str, 'aba', -sys.maxsize-1, len(str), -1), 1)
         self.assertEqual(tailmatch(str, 'aha', 0, sys.maxsize, 1), 1)
         self.assertEqual(tailmatch(str, 'aha', -sys.maxsize-1, len(str), 1), 1)

         self.assertEqual(tailmatch(str, 'z', 0, len(str), 1), 0)
         self.assertEqual(tailmatch(str, 'z', 0, len(str), -1), 0)
         self.assertEqual(tailmatch(str, '', 0, len(str), 1), 1)
         self.assertEqual(tailmatch(str, '', 0, len(str), -1), 1)

         self.assertEqual(tailmatch(str, 'ba', 0, len(str)-1, -1), 0)
         self.assertEqual(tailmatch(str, 'ba', 1, len(str)-1, -1), 1)
         self.assertEqual(tailmatch(str, 'aba', 1, len(str)-1, -1), 0)
         self.assertEqual(tailmatch(str, 'ba', -len(str)+1, -1, -1), 1)
         self.assertEqual(tailmatch(str, 'ah', 0, len(str), 1), 0)
         self.assertEqual(tailmatch(str, 'ah', 0, len(str)-1, 1), 1)
         self.assertEqual(tailmatch(str, 'ah', -len(str), -1, 1), 1)

         # bad arguments
         self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), -1)
         self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), 1)
         # CRASHES tailmatch(NULL, 'aba', 0, len(str), -1)
         # CRASHES tailmatch(str, NULL, 0, len(str), -1)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_find(self):
         """Test PyUnicode_Find()"""
         from _testcapi import unicode_find as find

         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
             for i, ch in enumerate(str):
                 self.assertEqual(find(str, ch, 0, len(str), 1), i)
                 self.assertEqual(find(str, ch, 0, len(str), -1), i)

         str = "!>_<!"
         self.assertEqual(find(str, 'z', 0, len(str), 1), -1)
         self.assertEqual(find(str, 'z', 0, len(str), -1), -1)
         self.assertEqual(find(str, '', 0, len(str), 1), 0)
         self.assertEqual(find(str, '', 0, len(str), -1), len(str))
         # start < end
         self.assertEqual(find(str, '!', 1, len(str)+1, 1), 4)
         self.assertEqual(find(str, '!', 1, len(str)+1, -1), 4)
         # start >= end
         self.assertEqual(find(str, '!', 0, 0, 1), -1)
         self.assertEqual(find(str, '!', len(str), 0, 1), -1)
         # negative
         self.assertEqual(find(str, '!', -len(str), -1, 1), 0)
         self.assertEqual(find(str, '!', -len(str), -1, -1), 0)
         # bad arguments
         self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1)
         self.assertRaises(TypeError, find, b"!>_<!", '!', 0, len(str), 1)
         self.assertRaises(TypeError, find, str, ord('!'), 0, len(str), 1)
         self.assertRaises(TypeError, find, [], '!', 0, len(str), 1)
         # CRASHES find(NULL, '!', 0, len(str), 1)
         # CRASHES find(str, NULL, 0, len(str), 1)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_findchar(self):
         """Test PyUnicode_FindChar()"""
         from _testcapi import unicode_findchar

         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
             for i, ch in enumerate(str):
                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)

         str = "!>_<!"
         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
         # start < end
         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
         # start >= end
         self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
         self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
         # negative
         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
         # bad arguments
         # CRASHES unicode_findchar(b"!>_<!", ord('!'), 0, len(str), 1)
         # CRASHES unicode_findchar([], ord('!'), 0, len(str), 1)
         # CRASHES unicode_findchar(NULL, ord('!'), 0, len(str), 1), 1)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_replace(self):
         """Test PyUnicode_Replace()"""
         from _testcapi import unicode_replace as replace

         str = 'abracadabra'
         self.assertEqual(replace(str, 'a', '='), '=br=c=d=br=')
         self.assertEqual(replace(str, 'a', '<>'), '<>br<>c<>d<>br<>')
         self.assertEqual(replace(str, 'abra', '='), '=cad=')
         self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra')
         self.assertEqual(replace(str, 'a', '=', 0), str)
         self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=')
         self.assertEqual(replace(str, 'z', '='), str)
         self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=')
         self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж')
         self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=')
         self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden')
         # bad arguments
         self.assertRaises(TypeError, replace, 'a', 'a', b'=')
         self.assertRaises(TypeError, replace, 'a', b'a', '=')
         self.assertRaises(TypeError, replace, b'a', 'a', '=')
         self.assertRaises(TypeError, replace, 'a', 'a', ord('='))
         self.assertRaises(TypeError, replace, 'a', ord('a'), '=')
         self.assertRaises(TypeError, replace, [], 'a', '=')
         # CRASHES replace('a', 'a', NULL)
         # CRASHES replace('a', NULL, '=')
         # CRASHES replace(NULL, 'a', '=')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_compare(self):
         """Test PyUnicode_Compare()"""
         from _testcapi import unicode_compare as compare

         self.assertEqual(compare('abc', 'abc'), 0)
         self.assertEqual(compare('abc', 'def'), -1)
         self.assertEqual(compare('def', 'abc'), 1)
         self.assertEqual(compare('abc', 'abc\0def'), -1)
         self.assertEqual(compare('abc\0def', 'abc\0def'), 0)
         self.assertEqual(compare('абв', 'abc'), 1)

         self.assertRaises(TypeError, compare, b'abc', 'abc')
         self.assertRaises(TypeError, compare, 'abc', b'abc')
         self.assertRaises(TypeError, compare, b'abc', b'abc')
         self.assertRaises(TypeError, compare, [], 'abc')
         self.assertRaises(TypeError, compare, 'abc', [])
         self.assertRaises(TypeError, compare, [], [])
         # CRASHES compare(NULL, 'abc')
         # CRASHES compare('abc', NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_comparewithasciistring(self):
         """Test PyUnicode_CompareWithASCIIString()"""
         from _testcapi import unicode_comparewithasciistring as comparewithasciistring

         self.assertEqual(comparewithasciistring('abc', b'abc'), 0)
         self.assertEqual(comparewithasciistring('abc', b'def'), -1)
         self.assertEqual(comparewithasciistring('def', b'abc'), 1)
         self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0)
         self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1)
         self.assertEqual(comparewithasciistring('абв', b'abc'), 1)

         # CRASHES comparewithasciistring(b'abc', b'abc')
         # CRASHES comparewithasciistring([], b'abc')
         # CRASHES comparewithasciistring(NULL, b'abc')

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_richcompare(self):
         """Test PyUnicode_RichCompare()"""
         from _testcapi import unicode_richcompare as richcompare

         LT, LE, EQ, NE, GT, GE = range(6)
         strings = ('abc', 'абв', '\U0001f600', 'abc\0')
         for s1 in strings:
             for s2 in strings:
                 self.assertIs(richcompare(s1, s2, LT), s1 < s2)
                 self.assertIs(richcompare(s1, s2, LE), s1 <= s2)
                 self.assertIs(richcompare(s1, s2, EQ), s1 == s2)
                 self.assertIs(richcompare(s1, s2, NE), s1 != s2)
                 self.assertIs(richcompare(s1, s2, GT), s1 > s2)
                 self.assertIs(richcompare(s1, s2, GE), s1 >= s2)

         for op in LT, LE, EQ, NE, GT, GE:
             self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented)
             self.assertIs(richcompare('abc', b'abc', op), NotImplemented)
             self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented)
             self.assertIs(richcompare([], 'abc', op), NotImplemented)
             self.assertIs(richcompare('abc', [], op), NotImplemented)
             self.assertIs(richcompare([], [], op), NotImplemented)

             # CRASHES richcompare(NULL, 'abc', op)
             # CRASHES richcompare('abc', NULL, op)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_format(self):
         """Test PyUnicode_Format()"""
         from _testcapi import unicode_format as format

         self.assertEqual(format('x=%d!', 42), 'x=42!')
         self.assertEqual(format('x=%d!', (42,)), 'x=42!')
         self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!')

         self.assertRaises(SystemError, format, 'x=%d!', NULL)
         self.assertRaises(SystemError, format, NULL, 42)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_contains(self):
         """Test PyUnicode_Contains()"""
         from _testcapi import unicode_contains as contains

         self.assertEqual(contains('abcd', ''), 1)
         self.assertEqual(contains('abcd', 'b'), 1)
         self.assertEqual(contains('abcd', 'x'), 0)
         self.assertEqual(contains('abcd', 'ж'), 0)
         self.assertEqual(contains('abcd', '\0'), 0)
         self.assertEqual(contains('abc\0def', '\0'), 1)
         self.assertEqual(contains('abcd', 'bc'), 1)

         self.assertRaises(TypeError, contains, b'abcd', 'b')
         self.assertRaises(TypeError, contains, 'abcd', b'b')
         self.assertRaises(TypeError, contains, b'abcd', b'b')
         self.assertRaises(TypeError, contains, [], 'b')
         self.assertRaises(TypeError, contains, 'abcd', ord('b'))
         # CRASHES contains(NULL, 'b')
         # CRASHES contains('abcd', NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_isidentifier(self):
         """Test PyUnicode_IsIdentifier()"""
         from _testcapi import unicode_isidentifier as isidentifier

         self.assertEqual(isidentifier("a"), 1)
         self.assertEqual(isidentifier("b0"), 1)
         self.assertEqual(isidentifier("µ"), 1)
         self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1)

         self.assertEqual(isidentifier(""), 0)
         self.assertEqual(isidentifier(" "), 0)
         self.assertEqual(isidentifier("["), 0)
         self.assertEqual(isidentifier("©"), 0)
         self.assertEqual(isidentifier("0"), 0)
         self.assertEqual(isidentifier("32M"), 0)

         # CRASHES isidentifier(b"a")
         # CRASHES isidentifier([])
         # CRASHES isidentifier(NULL)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_copycharacters(self):
         """Test PyUnicode_CopyCharacters()"""
         from _testcapi import unicode_copycharacters

         strings = [
             'abcde', '\xa1\xa2\xa3\xa4\xa5',
             '\u4f60\u597d\u4e16\u754c\uff01',
             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
         ]

         for idx, from_ in enumerate(strings):
             # wide -> narrow: exceed maxchar limitation
             for to in strings[:idx]:
                 self.assertRaises(
                     SystemError,
                     unicode_copycharacters, to, 0, from_, 0, 5
                 )
             # same kind
             for from_start in range(5):
                 self.assertEqual(
                     unicode_copycharacters(from_, 0, from_, from_start, 5),
                     (from_[from_start:from_start+5].ljust(5, '\0'),
                      5-from_start)
                 )
             for to_start in range(5):
                 self.assertEqual(
                     unicode_copycharacters(from_, to_start, from_, to_start, 5),
                     (from_[to_start:to_start+5].rjust(5, '\0'),
                      5-to_start)
                 )
             # narrow -> wide
             # Tests omitted since this creates invalid strings.

         s = strings[0]
         self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
         self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
         self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
         self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
         self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_pep393_utf8_caching_bug(self):
         # Issue #25709: Problem with string concatenation and utf-8 cache
         from _testcapi import getargs_s_hash
         for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
             s = ''
             for i in range(5):
                 # Due to CPython specific optimization the 's' string can be
                 # resized in-place.
                 s += chr(k)
                 # Parsing with the "s#" format code calls indirectly
                 # PyUnicode_AsUTF8AndSize() which creates the UTF-8
                 # encoded string cached in the Unicode object.
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))


 if __name__ == "__main__":
     unittest.main()