Lib/test/test_capi/test_unicode.py - platform/external/python/cpython3 - Git at Google

 import unittest
 import sys
 from test import support
 from test.support import import_helper

 try:
     import _testcapi
 except ImportError:
     _testcapi = None


 class CAPITest(unittest.TestCase):

     # Test PyUnicode_FromFormat()
     def test_from_format(self):
         import_helper.import_module('ctypes')
         from ctypes import (
             c_char_p,
             pythonapi, py_object, sizeof,
             c_int, c_long, c_longlong, c_ssize_t,
             c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
         name = "PyUnicode_FromFormat"
         _PyUnicode_FromFormat = getattr(pythonapi, name)
         _PyUnicode_FromFormat.argtypes = (c_char_p,)
         _PyUnicode_FromFormat.restype = py_object

         def PyUnicode_FromFormat(format, *args):
             cargs = tuple(
                 py_object(arg) if isinstance(arg, str) else arg
                 for arg in args)
             return _PyUnicode_FromFormat(format, *cargs)

         def check_format(expected, format, *args):
             text = PyUnicode_FromFormat(format, *args)
             self.assertEqual(expected, text)

         # ascii format, non-ascii argument
         check_format('ascii\x7f=unicode\xe9',
                      b'ascii\x7f=%U', 'unicode\xe9')

         # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
         # raises an error
         self.assertRaisesRegex(ValueError,
             r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
             'string, got a non-ASCII byte: 0xe9$',
             PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')

         # test "%c"
         check_format('\uabcd',
                      b'%c', c_int(0xabcd))
         check_format('\U0010ffff',
                      b'%c', c_int(0x10ffff))
         with self.assertRaises(OverflowError):
             PyUnicode_FromFormat(b'%c', c_int(0x110000))
         # Issue #18183
         check_format('\U00010000\U00100000',
                      b'%c%c', c_int(0x10000), c_int(0x100000))

         # test "%"
         check_format('%',
                      b'%')
         check_format('%',
                      b'%%')
         check_format('%s',
                      b'%%s')
         check_format('[%]',
                      b'[%%]')
         check_format('%abc',
                      b'%%%s', b'abc')

         # truncated string
         check_format('abc',
                      b'%.3s', b'abcdef')
         check_format('abc[\ufffd',
                      b'%.5s', 'abc[\u20ac]'.encode('utf8'))
         check_format("'\\u20acABC'",
                      b'%A', '\u20acABC')
         check_format("'\\u20",
                      b'%.5A', '\u20acABCDEF')
         check_format("'\u20acABC'",
                      b'%R', '\u20acABC')
         check_format("'\u20acA",
                      b'%.3R', '\u20acABCDEF')
         check_format('\u20acAB',
                      b'%.3S', '\u20acABCDEF')
         check_format('\u20acAB',
                      b'%.3U', '\u20acABCDEF')
         check_format('\u20acAB',
                      b'%.3V', '\u20acABCDEF', None)
         check_format('abc[\ufffd',
                      b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))

         # following tests comes from #7330
         # test width modifier and precision modifier with %S
         check_format("repr=  abc",
                      b'repr=%5S', 'abc')
         check_format("repr=ab",
                      b'repr=%.2S', 'abc')
         check_format("repr=   ab",
                      b'repr=%5.2S', 'abc')

         # test width modifier and precision modifier with %R
         check_format("repr=   'abc'",
                      b'repr=%8R', 'abc')
         check_format("repr='ab",
                      b'repr=%.3R', 'abc')
         check_format("repr=  'ab",
                      b'repr=%5.3R', 'abc')

         # test width modifier and precision modifier with %A
         check_format("repr=   'abc'",
                      b'repr=%8A', 'abc')
         check_format("repr='ab",
                      b'repr=%.3A', 'abc')
         check_format("repr=  'ab",
                      b'repr=%5.3A', 'abc')

         # test width modifier and precision modifier with %s
         check_format("repr=  abc",
                      b'repr=%5s', b'abc')
         check_format("repr=ab",
                      b'repr=%.2s', b'abc')
         check_format("repr=   ab",
                      b'repr=%5.2s', b'abc')

         # test width modifier and precision modifier with %U
         check_format("repr=  abc",
                      b'repr=%5U', 'abc')
         check_format("repr=ab",
                      b'repr=%.2U', 'abc')
         check_format("repr=   ab",
                      b'repr=%5.2U', 'abc')

         # test width modifier and precision modifier with %V
         check_format("repr=  abc",
                      b'repr=%5V', 'abc', b'123')
         check_format("repr=ab",
                      b'repr=%.2V', 'abc', b'123')
         check_format("repr=   ab",
                      b'repr=%5.2V', 'abc', b'123')
         check_format("repr=  123",
                      b'repr=%5V', None, b'123')
         check_format("repr=12",
                      b'repr=%.2V', None, b'123')
         check_format("repr=   12",
                      b'repr=%5.2V', None, b'123')

         # test integer formats (%i, %d, %u)
         check_format('010',
                      b'%03i', c_int(10))
         check_format('0010',
                      b'%0.4i', c_int(10))
         check_format('-123',
                      b'%i', c_int(-123))
         check_format('-123',
                      b'%li', c_long(-123))
         check_format('-123',
                      b'%lli', c_longlong(-123))
         check_format('-123',
                      b'%zi', c_ssize_t(-123))

         check_format('-123',
                      b'%d', c_int(-123))
         check_format('-123',
                      b'%ld', c_long(-123))
         check_format('-123',
                      b'%lld', c_longlong(-123))
         check_format('-123',
                      b'%zd', c_ssize_t(-123))

         check_format('123',
                      b'%u', c_uint(123))
         check_format('123',
                      b'%lu', c_ulong(123))
         check_format('123',
                      b'%llu', c_ulonglong(123))
         check_format('123',
                      b'%zu', c_size_t(123))

         # test long output
         min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
         max_longlong = -min_longlong - 1
         check_format(str(min_longlong),
                      b'%lld', c_longlong(min_longlong))
         check_format(str(max_longlong),
                      b'%lld', c_longlong(max_longlong))
         max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
         check_format(str(max_ulonglong),
                      b'%llu', c_ulonglong(max_ulonglong))
         PyUnicode_FromFormat(b'%p', c_void_p(-1))

         # test padding (width and/or precision)
         check_format('123'.rjust(10, '0'),
                      b'%010i', c_int(123))
         check_format('123'.rjust(100),
                      b'%100i', c_int(123))
         check_format('123'.rjust(100, '0'),
                      b'%.100i', c_int(123))
         check_format('123'.rjust(80, '0').rjust(100),
                      b'%100.80i', c_int(123))

         check_format('123'.rjust(10, '0'),
                      b'%010u', c_uint(123))
         check_format('123'.rjust(100),
                      b'%100u', c_uint(123))
         check_format('123'.rjust(100, '0'),
                      b'%.100u', c_uint(123))
         check_format('123'.rjust(80, '0').rjust(100),
                      b'%100.80u', c_uint(123))

         check_format('123'.rjust(10, '0'),
                      b'%010x', c_int(0x123))
         check_format('123'.rjust(100),
                      b'%100x', c_int(0x123))
         check_format('123'.rjust(100, '0'),
                      b'%.100x', c_int(0x123))
         check_format('123'.rjust(80, '0').rjust(100),
                      b'%100.80x', c_int(0x123))

         # test %A
         check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
                      b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')

         # test %V
         check_format('repr=abc',
                      b'repr=%V', 'abc', b'xyz')

         # test %p
         # We cannot test the exact result,
         # because it returns a hex representation of a C pointer,
         # which is going to be different each time. But, we can test the format.
         p_format_regex = r'^0x[a-zA-Z0-9]{3,}$'
         p_format1 = PyUnicode_FromFormat(b'%p', 'abc')
         self.assertIsInstance(p_format1, str)
         self.assertRegex(p_format1, p_format_regex)

         p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz')
         self.assertIsInstance(p_format2, str)
         self.assertRegex(p_format2,
                          r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}')

         # Extra args are ignored:
         p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')
         self.assertIsInstance(p_format3, str)
         self.assertRegex(p_format3, p_format_regex)

         # Test string decode from parameter of %s using utf-8.
         # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
         # '\u4eba\u6c11'
         check_format('repr=\u4eba\u6c11',
                      b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')

         #Test replace error handler.
         check_format('repr=abc\ufffd',
                      b'repr=%V', None, b'abc\xff')

         # not supported: copy the raw format string. these tests are just here
         # to check for crashes and should not be considered as specifications
         check_format('%s',
                      b'%1%s', b'abc')
         check_format('%1abc',
                      b'%1abc')
         check_format('%+i',
                      b'%+i', c_int(10))
         check_format('%.%s',
                      b'%.%s', b'abc')

         # Issue #33817: empty strings
         check_format('',
                      b'')
         check_format('',
                      b'%s', b'')

     # Test PyUnicode_AsWideChar()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_aswidechar(self):
         from _testcapi import unicode_aswidechar
         import_helper.import_module('ctypes')
         from ctypes import c_wchar, sizeof

         wchar, size = unicode_aswidechar('abcdef', 2)
         self.assertEqual(size, 2)
         self.assertEqual(wchar, 'ab')

         wchar, size = unicode_aswidechar('abc', 3)
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc')

         wchar, size = unicode_aswidechar('abc', 4)
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc\0')

         wchar, size = unicode_aswidechar('abc', 10)
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc\0')

         wchar, size = unicode_aswidechar('abc\0def', 20)
         self.assertEqual(size, 7)
         self.assertEqual(wchar, 'abc\0def\0')

         nonbmp = chr(0x10ffff)
         if sizeof(c_wchar) == 2:
             buflen = 3
             nchar = 2
         else: # sizeof(c_wchar) == 4
             buflen = 2
             nchar = 1
         wchar, size = unicode_aswidechar(nonbmp, buflen)
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')

     # Test PyUnicode_AsWideCharString()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_aswidecharstring(self):
         from _testcapi import unicode_aswidecharstring
         import_helper.import_module('ctypes')
         from ctypes import c_wchar, sizeof

         wchar, size = unicode_aswidecharstring('abc')
         self.assertEqual(size, 3)
         self.assertEqual(wchar, 'abc\0')

         wchar, size = unicode_aswidecharstring('abc\0def')
         self.assertEqual(size, 7)
         self.assertEqual(wchar, 'abc\0def\0')

         nonbmp = chr(0x10ffff)
         if sizeof(c_wchar) == 2:
             nchar = 2
         else: # sizeof(c_wchar) == 4
             nchar = 1
         wchar, size = unicode_aswidecharstring(nonbmp)
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')

     # Test PyUnicode_AsUCS4()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asucs4(self):
         from _testcapi import unicode_asucs4
         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                   'a\ud800b\udfffc', '\ud834\udd1e']:
             l = len(s)
             self.assertEqual(unicode_asucs4(s, l, True), s+'\0')
             self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff')
             self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff')
             self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff')
             self.assertRaises(SystemError, unicode_asucs4, s, l-1, True)
             self.assertRaises(SystemError, unicode_asucs4, s, l-2, False)
             s = '\0'.join([s, s])
             self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
             self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')

     # Test PyUnicode_AsUTF8()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asutf8(self):
         from _testcapi import unicode_asutf8

         bmp = '\u0100'
         bmp2 = '\uffff'
         nonbmp = chr(0x10ffff)

         self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
         self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
         self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
         self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')

     # Test PyUnicode_AsUTF8AndSize()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asutf8andsize(self):
         from _testcapi import unicode_asutf8andsize

         bmp = '\u0100'
         bmp2 = '\uffff'
         nonbmp = chr(0x10ffff)

         self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
         self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
         self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
         self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')

     # Test PyUnicode_FindChar()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_findchar(self):
         from _testcapi import unicode_findchar

         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
             for i, ch in enumerate(str):
                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
                 self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)

         str = "!>_<!"
         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
         self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
         # start < end
         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
         self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
         # start >= end
         self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
         self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
         # negative
         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)

     # Test PyUnicode_CopyCharacters()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_copycharacters(self):
         from _testcapi import unicode_copycharacters

         strings = [
             'abcde', '\xa1\xa2\xa3\xa4\xa5',
             '\u4f60\u597d\u4e16\u754c\uff01',
             '\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
         ]

         for idx, from_ in enumerate(strings):
             # wide -> narrow: exceed maxchar limitation
             for to in strings[:idx]:
                 self.assertRaises(
                     SystemError,
                     unicode_copycharacters, to, 0, from_, 0, 5
                 )
             # same kind
             for from_start in range(5):
                 self.assertEqual(
                     unicode_copycharacters(from_, 0, from_, from_start, 5),
                     (from_[from_start:from_start+5].ljust(5, '\0'),
                      5-from_start)
                 )
             for to_start in range(5):
                 self.assertEqual(
                     unicode_copycharacters(from_, to_start, from_, to_start, 5),
                     (from_[to_start:to_start+5].rjust(5, '\0'),
                      5-to_start)
                 )
             # narrow -> wide
             # Tests omitted since this creates invalid strings.

         s = strings[0]
         self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
         self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
         self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
         self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
         self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
         self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)

     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_pep393_utf8_caching_bug(self):
         # Issue #25709: Problem with string concatenation and utf-8 cache
         from _testcapi import getargs_s_hash
         for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
             s = ''
             for i in range(5):
                 # Due to CPython specific optimization the 's' string can be
                 # resized in-place.
                 s += chr(k)
                 # Parsing with the "s#" format code calls indirectly
                 # PyUnicode_AsUTF8AndSize() which creates the UTF-8
                 # encoded string cached in the Unicode object.
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))


 if __name__ == "__main__":
     unittest.main()
	import unittest
	import sys
	from test import support
	from test.support import import_helper

	try:
	import _testcapi
	except ImportError:
	_testcapi = None


	class CAPITest(unittest.TestCase):

	# Test PyUnicode_FromFormat()
	def test_from_format(self):
	import_helper.import_module('ctypes')
	from ctypes import (
	c_char_p,
	pythonapi, py_object, sizeof,
	c_int, c_long, c_longlong, c_ssize_t,
	c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
	name = "PyUnicode_FromFormat"
	_PyUnicode_FromFormat = getattr(pythonapi, name)
	_PyUnicode_FromFormat.argtypes = (c_char_p,)
	_PyUnicode_FromFormat.restype = py_object

	def PyUnicode_FromFormat(format, *args):
	cargs = tuple(
	py_object(arg) if isinstance(arg, str) else arg
	for arg in args)
	return _PyUnicode_FromFormat(format, *cargs)

	def check_format(expected, format, *args):
	text = PyUnicode_FromFormat(format, *args)
	self.assertEqual(expected, text)

	# ascii format, non-ascii argument
	check_format('ascii\x7f=unicode\xe9',
	b'ascii\x7f=%U', 'unicode\xe9')

	# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
	# raises an error
	self.assertRaisesRegex(ValueError,
	r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
	'string, got a non-ASCII byte: 0xe9$',
	PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')

	# test "%c"
	check_format('\uabcd',
	b'%c', c_int(0xabcd))
	check_format('\U0010ffff',
	b'%c', c_int(0x10ffff))
	with self.assertRaises(OverflowError):
	PyUnicode_FromFormat(b'%c', c_int(0x110000))
	# Issue #18183
	check_format('\U00010000\U00100000',
	b'%c%c', c_int(0x10000), c_int(0x100000))

	# test "%"
	check_format('%',
	b'%')
	check_format('%',
	b'%%')
	check_format('%s',
	b'%%s')
	check_format('[%]',
	b'[%%]')
	check_format('%abc',
	b'%%%s', b'abc')

	# truncated string
	check_format('abc',
	b'%.3s', b'abcdef')
	check_format('abc[\ufffd',
	b'%.5s', 'abc[\u20ac]'.encode('utf8'))
	check_format("'\\u20acABC'",
	b'%A', '\u20acABC')
	check_format("'\\u20",
	b'%.5A', '\u20acABCDEF')
	check_format("'\u20acABC'",
	b'%R', '\u20acABC')
	check_format("'\u20acA",
	b'%.3R', '\u20acABCDEF')
	check_format('\u20acAB',
	b'%.3S', '\u20acABCDEF')
	check_format('\u20acAB',
	b'%.3U', '\u20acABCDEF')
	check_format('\u20acAB',
	b'%.3V', '\u20acABCDEF', None)
	check_format('abc[\ufffd',
	b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))

	# following tests comes from #7330
	# test width modifier and precision modifier with %S
	check_format("repr= abc",
	b'repr=%5S', 'abc')
	check_format("repr=ab",
	b'repr=%.2S', 'abc')
	check_format("repr= ab",
	b'repr=%5.2S', 'abc')

	# test width modifier and precision modifier with %R
	check_format("repr= 'abc'",
	b'repr=%8R', 'abc')
	check_format("repr='ab",
	b'repr=%.3R', 'abc')
	check_format("repr= 'ab",
	b'repr=%5.3R', 'abc')

	# test width modifier and precision modifier with %A
	check_format("repr= 'abc'",
	b'repr=%8A', 'abc')
	check_format("repr='ab",
	b'repr=%.3A', 'abc')
	check_format("repr= 'ab",
	b'repr=%5.3A', 'abc')

	# test width modifier and precision modifier with %s
	check_format("repr= abc",
	b'repr=%5s', b'abc')
	check_format("repr=ab",
	b'repr=%.2s', b'abc')
	check_format("repr= ab",
	b'repr=%5.2s', b'abc')

	# test width modifier and precision modifier with %U
	check_format("repr= abc",
	b'repr=%5U', 'abc')
	check_format("repr=ab",
	b'repr=%.2U', 'abc')
	check_format("repr= ab",
	b'repr=%5.2U', 'abc')

	# test width modifier and precision modifier with %V
	check_format("repr= abc",
	b'repr=%5V', 'abc', b'123')
	check_format("repr=ab",
	b'repr=%.2V', 'abc', b'123')
	check_format("repr= ab",
	b'repr=%5.2V', 'abc', b'123')
	check_format("repr= 123",
	b'repr=%5V', None, b'123')
	check_format("repr=12",
	b'repr=%.2V', None, b'123')
	check_format("repr= 12",
	b'repr=%5.2V', None, b'123')

	# test integer formats (%i, %d, %u)
	check_format('010',
	b'%03i', c_int(10))
	check_format('0010',
	b'%0.4i', c_int(10))
	check_format('-123',
	b'%i', c_int(-123))
	check_format('-123',
	b'%li', c_long(-123))
	check_format('-123',
	b'%lli', c_longlong(-123))
	check_format('-123',
	b'%zi', c_ssize_t(-123))

	check_format('-123',
	b'%d', c_int(-123))
	check_format('-123',
	b'%ld', c_long(-123))
	check_format('-123',
	b'%lld', c_longlong(-123))
	check_format('-123',
	b'%zd', c_ssize_t(-123))

	check_format('123',
	b'%u', c_uint(123))
	check_format('123',
	b'%lu', c_ulong(123))
	check_format('123',
	b'%llu', c_ulonglong(123))
	check_format('123',
	b'%zu', c_size_t(123))

	# test long output
	min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
	max_longlong = -min_longlong - 1
	check_format(str(min_longlong),
	b'%lld', c_longlong(min_longlong))
	check_format(str(max_longlong),
	b'%lld', c_longlong(max_longlong))
	max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
	check_format(str(max_ulonglong),
	b'%llu', c_ulonglong(max_ulonglong))
	PyUnicode_FromFormat(b'%p', c_void_p(-1))

	# test padding (width and/or precision)
	check_format('123'.rjust(10, '0'),
	b'%010i', c_int(123))
	check_format('123'.rjust(100),
	b'%100i', c_int(123))
	check_format('123'.rjust(100, '0'),
	b'%.100i', c_int(123))
	check_format('123'.rjust(80, '0').rjust(100),
	b'%100.80i', c_int(123))

	check_format('123'.rjust(10, '0'),
	b'%010u', c_uint(123))
	check_format('123'.rjust(100),
	b'%100u', c_uint(123))
	check_format('123'.rjust(100, '0'),
	b'%.100u', c_uint(123))
	check_format('123'.rjust(80, '0').rjust(100),
	b'%100.80u', c_uint(123))

	check_format('123'.rjust(10, '0'),
	b'%010x', c_int(0x123))
	check_format('123'.rjust(100),
	b'%100x', c_int(0x123))
	check_format('123'.rjust(100, '0'),
	b'%.100x', c_int(0x123))
	check_format('123'.rjust(80, '0').rjust(100),
	b'%100.80x', c_int(0x123))

	# test %A
	check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
	b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')

	# test %V
	check_format('repr=abc',
	b'repr=%V', 'abc', b'xyz')

	# test %p
	# We cannot test the exact result,
	# because it returns a hex representation of a C pointer,
	# which is going to be different each time. But, we can test the format.
	p_format_regex = r'^0x[a-zA-Z0-9]{3,}$'
	p_format1 = PyUnicode_FromFormat(b'%p', 'abc')
	self.assertIsInstance(p_format1, str)
	self.assertRegex(p_format1, p_format_regex)

	p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz')
	self.assertIsInstance(p_format2, str)
	self.assertRegex(p_format2,
	r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}')

	# Extra args are ignored:
	p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')
	self.assertIsInstance(p_format3, str)
	self.assertRegex(p_format3, p_format_regex)

	# Test string decode from parameter of %s using utf-8.
	# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
	# '\u4eba\u6c11'
	check_format('repr=\u4eba\u6c11',
	b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')

	#Test replace error handler.
	check_format('repr=abc\ufffd',
	b'repr=%V', None, b'abc\xff')

	# not supported: copy the raw format string. these tests are just here
	# to check for crashes and should not be considered as specifications
	check_format('%s',
	b'%1%s', b'abc')
	check_format('%1abc',
	b'%1abc')
	check_format('%+i',
	b'%+i', c_int(10))
	check_format('%.%s',
	b'%.%s', b'abc')

	# Issue #33817: empty strings
	check_format('',
	b'')
	check_format('',
	b'%s', b'')

	# Test PyUnicode_AsWideChar()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_aswidechar(self):
	from _testcapi import unicode_aswidechar
	import_helper.import_module('ctypes')
	from ctypes import c_wchar, sizeof

	wchar, size = unicode_aswidechar('abcdef', 2)
	self.assertEqual(size, 2)
	self.assertEqual(wchar, 'ab')

	wchar, size = unicode_aswidechar('abc', 3)
	self.assertEqual(size, 3)
	self.assertEqual(wchar, 'abc')

	wchar, size = unicode_aswidechar('abc', 4)
	self.assertEqual(size, 3)
	self.assertEqual(wchar, 'abc\0')

	wchar, size = unicode_aswidechar('abc', 10)
	self.assertEqual(size, 3)
	self.assertEqual(wchar, 'abc\0')

	wchar, size = unicode_aswidechar('abc\0def', 20)
	self.assertEqual(size, 7)
	self.assertEqual(wchar, 'abc\0def\0')

	nonbmp = chr(0x10ffff)
	if sizeof(c_wchar) == 2:
	buflen = 3
	nchar = 2
	else: # sizeof(c_wchar) == 4
	buflen = 2
	nchar = 1
	wchar, size = unicode_aswidechar(nonbmp, buflen)
	self.assertEqual(size, nchar)
	self.assertEqual(wchar, nonbmp + '\0')

	# Test PyUnicode_AsWideCharString()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_aswidecharstring(self):
	from _testcapi import unicode_aswidecharstring
	import_helper.import_module('ctypes')
	from ctypes import c_wchar, sizeof

	wchar, size = unicode_aswidecharstring('abc')
	self.assertEqual(size, 3)
	self.assertEqual(wchar, 'abc\0')

	wchar, size = unicode_aswidecharstring('abc\0def')
	self.assertEqual(size, 7)
	self.assertEqual(wchar, 'abc\0def\0')

	nonbmp = chr(0x10ffff)
	if sizeof(c_wchar) == 2:
	nchar = 2
	else: # sizeof(c_wchar) == 4
	nchar = 1
	wchar, size = unicode_aswidecharstring(nonbmp)
	self.assertEqual(size, nchar)
	self.assertEqual(wchar, nonbmp + '\0')

	# Test PyUnicode_AsUCS4()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_asucs4(self):
	from _testcapi import unicode_asucs4
	for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
	'a\ud800b\udfffc', '\ud834\udd1e']:
	l = len(s)
	self.assertEqual(unicode_asucs4(s, l, True), s+'\0')
	self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff')
	self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff')
	self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff')
	self.assertRaises(SystemError, unicode_asucs4, s, l-1, True)
	self.assertRaises(SystemError, unicode_asucs4, s, l-2, False)
	s = '\0'.join([s, s])
	self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
	self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')

	# Test PyUnicode_AsUTF8()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_asutf8(self):
	from _testcapi import unicode_asutf8

	bmp = '\u0100'
	bmp2 = '\uffff'
	nonbmp = chr(0x10ffff)

	self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
	self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
	self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
	self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')

	# Test PyUnicode_AsUTF8AndSize()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_asutf8andsize(self):
	from _testcapi import unicode_asutf8andsize

	bmp = '\u0100'
	bmp2 = '\uffff'
	nonbmp = chr(0x10ffff)

	self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
	self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
	self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
	self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')

	# Test PyUnicode_FindChar()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_findchar(self):
	from _testcapi import unicode_findchar

	for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
	for i, ch in enumerate(str):
	self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
	self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)

	str = "!>_<!"
	self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
	self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
	# start < end
	self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
	self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
	# start >= end
	self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
	self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
	# negative
	self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
	self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)

	# Test PyUnicode_CopyCharacters()
	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_copycharacters(self):
	from _testcapi import unicode_copycharacters

	strings = [
	'abcde', '\xa1\xa2\xa3\xa4\xa5',
	'\u4f60\u597d\u4e16\u754c\uff01',
	'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
	]

	for idx, from_ in enumerate(strings):
	# wide -> narrow: exceed maxchar limitation
	for to in strings[:idx]:
	self.assertRaises(
	SystemError,
	unicode_copycharacters, to, 0, from_, 0, 5
	)
	# same kind
	for from_start in range(5):
	self.assertEqual(
	unicode_copycharacters(from_, 0, from_, from_start, 5),
	(from_[from_start:from_start+5].ljust(5, '\0'),
	5-from_start)
	)
	for to_start in range(5):
	self.assertEqual(
	unicode_copycharacters(from_, to_start, from_, to_start, 5),
	(from_[to_start:to_start+5].rjust(5, '\0'),
	5-to_start)
	)
	# narrow -> wide
	# Tests omitted since this creates invalid strings.

	s = strings[0]
	self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
	self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
	self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
	self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
	self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
	self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
	self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)

	@support.cpython_only
	@unittest.skipIf(_testcapi is None, 'need _testcapi module')
	def test_pep393_utf8_caching_bug(self):
	# Issue #25709: Problem with string concatenation and utf-8 cache
	from _testcapi import getargs_s_hash
	for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
	s = ''
	for i in range(5):
	# Due to CPython specific optimization the 's' string can be
	# resized in-place.
	s += chr(k)
	# Parsing with the "s#" format code calls indirectly
	# PyUnicode_AsUTF8AndSize() which creates the UTF-8
	# encoded string cached in the Unicode object.
	self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
	# Check that the second call returns the same result
	self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))


	if __name__ == "__main__":
	unittest.main()