scripts/generate_identifier_pattern.py - platform/external/python/jinja - Git at Google

 #!/usr/bin/env python3
 import itertools
 import os
 import re
 import sys

 if sys.version_info[0] < 3:
     raise RuntimeError('This needs to run on Python 3.')


 def get_characters():
     """Find every Unicode character that is valid in a Python `identifier`_ but
     is not matched by the regex ``\w`` group.

     ``\w`` matches some characters that aren't valid in identifiers, but
     :meth:`str.isidentifier` will catch that later in lexing.

     All start characters are valid continue characters, so we only test for
     continue characters.

     _identifier: https://docs.python.org/3/reference/lexical_analysis.html#identifiers
     """
     for cp in range(sys.maxunicode + 1):
         s = chr(cp)

         if ('a' + s).isidentifier() and not re.match(r'\w', s):
             yield s


 def collapse_ranges(data):
     """Given a sorted list of unique characters, generate ranges representing
     sequential code points.

     Source: https://stackoverflow.com/a/4629241/400617
     """
     for a, b in itertools.groupby(
         enumerate(data),
         lambda x: ord(x[1]) - x[0]
     ):
         b = list(b)
         yield b[0][1], b[-1][1]


 def build_pattern(ranges):
     """Output the regex pattern for ranges of characters.

     One and two character ranges output the individual characters.
     """
     out = []

     for a, b in ranges:
         if a == b:  # single char
             out.append(a)
         elif ord(b) - ord(a) == 1:  # two chars, range is redundant
             out.append(a)
             out.append(b)
         else:
             out.append(f'{a}-{b}')

     return ''.join(out)


 def main():
     """Build the regex pattern and write it to the file
     :file:`jinja2/_identifier.py`."""
     pattern = build_pattern(collapse_ranges(get_characters()))
     filename = os.path.abspath(os.path.join(
         os.path.dirname(__file__), '..', 'jinja2', '_identifier.py'
     ))

     with open(filename, 'w', encoding='utf8') as f:
         f.write('# generated by scripts/generate_identifier_pattern.py\n')
         f.write(f'pattern = \'{pattern}\'\n')


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python3
	import itertools
	import os
	import re
	import sys

	if sys.version_info[0] < 3:
	raise RuntimeError('This needs to run on Python 3.')


	def get_characters():
	"""Find every Unicode character that is valid in a Python `identifier`_ but
	is not matched by the regex ``\w`` group.

	``\w`` matches some characters that aren't valid in identifiers, but
	:meth:`str.isidentifier` will catch that later in lexing.

	All start characters are valid continue characters, so we only test for
	continue characters.

	_identifier: https://docs.python.org/3/reference/lexical_analysis.html#identifiers
	"""
	for cp in range(sys.maxunicode + 1):
	s = chr(cp)

	if ('a' + s).isidentifier() and not re.match(r'\w', s):
	yield s


	def collapse_ranges(data):
	"""Given a sorted list of unique characters, generate ranges representing
	sequential code points.

	Source: https://stackoverflow.com/a/4629241/400617
	"""
	for a, b in itertools.groupby(
	enumerate(data),
	lambda x: ord(x[1]) - x[0]
	):
	b = list(b)
	yield b[0][1], b[-1][1]


	def build_pattern(ranges):
	"""Output the regex pattern for ranges of characters.

	One and two character ranges output the individual characters.
	"""
	out = []

	for a, b in ranges:
	if a == b: # single char
	out.append(a)
	elif ord(b) - ord(a) == 1: # two chars, range is redundant
	out.append(a)
	out.append(b)
	else:
	out.append(f'{a}-{b}')

	return ''.join(out)


	def main():
	"""Build the regex pattern and write it to the file
	:file:`jinja2/_identifier.py`."""
	pattern = build_pattern(collapse_ranges(get_characters()))
	filename = os.path.abspath(os.path.join(
	os.path.dirname(__file__), '..', 'jinja2', '_identifier.py'
	))

	with open(filename, 'w', encoding='utf8') as f:
	f.write('# generated by scripts/generate_identifier_pattern.py\n')
	f.write(f'pattern = \'{pattern}\'\n')


	if __name__ == '__main__':
	main()