| import itertools |
| import pathlib |
| import sys |
| import sysconfig |
| import tempfile |
| import tokenize |
| from typing import IO, Dict, List, Optional, Set, Tuple |
| |
| from pegen.c_generator import CParserGenerator |
| from pegen.grammar import Grammar |
| from pegen.grammar_parser import GeneratedParser as GrammarParser |
| from pegen.parser import Parser |
| from pegen.parser_generator import ParserGenerator |
| from pegen.python_generator import PythonParserGenerator |
| from pegen.tokenizer import Tokenizer |
| |
| MOD_DIR = pathlib.Path(__file__).resolve().parent |
| |
| TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] |
| |
| |
| def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: |
| flags = sysconfig.get_config_var(compiler_flags) |
| py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) |
| if flags is None or py_flags_nodist is None: |
| return [] |
| return f"{flags} {py_flags_nodist}".split() |
| |
| |
| def compile_c_extension( |
| generated_source_path: str, |
| build_dir: Optional[str] = None, |
| verbose: bool = False, |
| keep_asserts: bool = True, |
| disable_optimization: bool = False, |
| library_dir: Optional[str] = None, |
| ) -> str: |
| """Compile the generated source for a parser generator into an extension module. |
| |
| The extension module will be generated in the same directory as the provided path |
| for the generated source, with the same basename (in addition to extension module |
| metadata). For example, for the source mydir/parser.c the generated extension |
| in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so. |
| |
| If *build_dir* is provided, that path will be used as the temporary build directory |
| of distutils (this is useful in case you want to use a temporary directory). |
| |
| If *library_dir* is provided, that path will be used as the directory for a |
| static library of the common parser sources (this is useful in case you are |
| creating multiple extensions). |
| """ |
| import distutils.log |
| from distutils.core import Distribution, Extension |
| from distutils.tests.support import fixup_build_ext # type: ignore |
| |
| from distutils.ccompiler import new_compiler |
| from distutils.dep_util import newer_group |
| from distutils.sysconfig import customize_compiler |
| |
| if verbose: |
| distutils.log.set_threshold(distutils.log.DEBUG) |
| |
| source_file_path = pathlib.Path(generated_source_path) |
| extension_name = source_file_path.stem |
| extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") |
| extra_compile_args.append("-DPy_BUILD_CORE_MODULE") |
| # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c |
| extra_compile_args.append("-D_Py_TEST_PEGEN") |
| extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") |
| if keep_asserts: |
| extra_compile_args.append("-UNDEBUG") |
| if disable_optimization: |
| if sys.platform == 'win32': |
| extra_compile_args.append("/Od") |
| extra_link_args.append("/LTCG:OFF") |
| else: |
| extra_compile_args.append("-O0") |
| if sysconfig.get_config_var("GNULD") == "yes": |
| extra_link_args.append("-fno-lto") |
| |
| common_sources = [ |
| str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"), |
| str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"), |
| str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"), |
| str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"), |
| str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"), |
| str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"), |
| str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"), |
| str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"), |
| ] |
| include_dirs = [ |
| str(MOD_DIR.parent.parent.parent / "Include" / "internal"), |
| str(MOD_DIR.parent.parent.parent / "Parser"), |
| ] |
| extension = Extension( |
| extension_name, |
| sources=[generated_source_path], |
| extra_compile_args=extra_compile_args, |
| extra_link_args=extra_link_args, |
| ) |
| dist = Distribution({"name": extension_name, "ext_modules": [extension]}) |
| cmd = dist.get_command_obj("build_ext") |
| fixup_build_ext(cmd) |
| cmd.build_lib = str(source_file_path.parent) |
| cmd.include_dirs = include_dirs |
| if build_dir: |
| cmd.build_temp = build_dir |
| cmd.ensure_finalized() |
| |
| compiler = new_compiler() |
| customize_compiler(compiler) |
| compiler.set_include_dirs(cmd.include_dirs) |
| compiler.set_library_dirs(cmd.library_dirs) |
| # build static lib |
| if library_dir: |
| library_filename = compiler.library_filename(extension_name, |
| output_dir=library_dir) |
| if newer_group(common_sources, library_filename, 'newer'): |
| if sys.platform == 'win32': |
| pdb = compiler.static_lib_format % (extension_name, '.pdb') |
| compile_opts = [f"/Fd{library_dir}\\{pdb}"] |
| compile_opts.extend(extra_compile_args) |
| else: |
| compile_opts = extra_compile_args |
| objects = compiler.compile(common_sources, |
| output_dir=library_dir, |
| debug=cmd.debug, |
| extra_postargs=compile_opts) |
| compiler.create_static_lib(objects, extension_name, |
| output_dir=library_dir, |
| debug=cmd.debug) |
| if sys.platform == 'win32': |
| compiler.add_library_dir(library_dir) |
| extension.libraries = [extension_name] |
| elif sys.platform == 'darwin': |
| compiler.set_link_objects([ |
| '-Wl,-force_load', library_filename, |
| ]) |
| else: |
| compiler.set_link_objects([ |
| '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive', |
| ]) |
| else: |
| extension.sources[0:0] = common_sources |
| |
| # Compile the source code to object files. |
| ext_path = cmd.get_ext_fullpath(extension_name) |
| if newer_group(extension.sources, ext_path, 'newer'): |
| objects = compiler.compile(extension.sources, |
| output_dir=cmd.build_temp, |
| debug=cmd.debug, |
| extra_postargs=extra_compile_args) |
| else: |
| objects = compiler.object_filenames(extension.sources, |
| output_dir=cmd.build_temp) |
| # Now link the object files together into a "shared object" |
| compiler.link_shared_object( |
| objects, ext_path, |
| libraries=cmd.get_libraries(extension), |
| extra_postargs=extra_link_args, |
| export_symbols=cmd.get_export_symbols(extension), |
| debug=cmd.debug, |
| build_temp=cmd.build_temp) |
| |
| return pathlib.Path(ext_path) |
| |
| |
| def build_parser( |
| grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False |
| ) -> Tuple[Grammar, Parser, Tokenizer]: |
| with open(grammar_file) as file: |
| tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) |
| parser = GrammarParser(tokenizer, verbose=verbose_parser) |
| grammar = parser.start() |
| |
| if not grammar: |
| raise parser.make_syntax_error(grammar_file) |
| |
| return grammar, parser, tokenizer |
| |
| |
| def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions: |
| all_tokens = {} |
| exact_tokens = {} |
| non_exact_tokens = set() |
| numbers = itertools.count(0) |
| |
| for line in tokens: |
| line = line.strip() |
| |
| if not line or line.startswith("#"): |
| continue |
| |
| pieces = line.split() |
| index = next(numbers) |
| |
| if len(pieces) == 1: |
| (token,) = pieces |
| non_exact_tokens.add(token) |
| all_tokens[index] = token |
| elif len(pieces) == 2: |
| token, op = pieces |
| exact_tokens[op.strip("'")] = index |
| all_tokens[index] = token |
| else: |
| raise ValueError(f"Unexpected line found in Tokens file: {line}") |
| |
| return all_tokens, exact_tokens, non_exact_tokens |
| |
| |
| def build_c_generator( |
| grammar: Grammar, |
| grammar_file: str, |
| tokens_file: str, |
| output_file: str, |
| compile_extension: bool = False, |
| verbose_c_extension: bool = False, |
| keep_asserts_in_extension: bool = True, |
| skip_actions: bool = False, |
| ) -> ParserGenerator: |
| with open(tokens_file, "r") as tok_file: |
| all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) |
| with open(output_file, "w") as file: |
| gen: ParserGenerator = CParserGenerator( |
| grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions |
| ) |
| gen.generate(grammar_file) |
| |
| if compile_extension: |
| with tempfile.TemporaryDirectory() as build_dir: |
| compile_c_extension( |
| output_file, |
| build_dir=build_dir, |
| verbose=verbose_c_extension, |
| keep_asserts=keep_asserts_in_extension, |
| ) |
| return gen |
| |
| |
| def build_python_generator( |
| grammar: Grammar, |
| grammar_file: str, |
| output_file: str, |
| skip_actions: bool = False, |
| ) -> ParserGenerator: |
| with open(output_file, "w") as file: |
| gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions |
| gen.generate(grammar_file) |
| return gen |
| |
| |
| def build_c_parser_and_generator( |
| grammar_file: str, |
| tokens_file: str, |
| output_file: str, |
| compile_extension: bool = False, |
| verbose_tokenizer: bool = False, |
| verbose_parser: bool = False, |
| verbose_c_extension: bool = False, |
| keep_asserts_in_extension: bool = True, |
| skip_actions: bool = False, |
| ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: |
| """Generate rules, C parser, tokenizer, parser generator for a given grammar |
| |
| Args: |
| grammar_file (string): Path for the grammar file |
| tokens_file (string): Path for the tokens file |
| output_file (string): Path for the output file |
| compile_extension (bool, optional): Whether to compile the C extension. |
| Defaults to False. |
| verbose_tokenizer (bool, optional): Whether to display additional output |
| when generating the tokenizer. Defaults to False. |
| verbose_parser (bool, optional): Whether to display additional output |
| when generating the parser. Defaults to False. |
| verbose_c_extension (bool, optional): Whether to display additional |
| output when compiling the C extension . Defaults to False. |
| keep_asserts_in_extension (bool, optional): Whether to keep the assert statements |
| when compiling the extension module. Defaults to True. |
| skip_actions (bool, optional): Whether to pretend no rule has any actions. |
| """ |
| grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) |
| gen = build_c_generator( |
| grammar, |
| grammar_file, |
| tokens_file, |
| output_file, |
| compile_extension, |
| verbose_c_extension, |
| keep_asserts_in_extension, |
| skip_actions=skip_actions, |
| ) |
| |
| return grammar, parser, tokenizer, gen |
| |
| |
| def build_python_parser_and_generator( |
| grammar_file: str, |
| output_file: str, |
| verbose_tokenizer: bool = False, |
| verbose_parser: bool = False, |
| skip_actions: bool = False, |
| ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: |
| """Generate rules, python parser, tokenizer, parser generator for a given grammar |
| |
| Args: |
| grammar_file (string): Path for the grammar file |
| output_file (string): Path for the output file |
| verbose_tokenizer (bool, optional): Whether to display additional output |
| when generating the tokenizer. Defaults to False. |
| verbose_parser (bool, optional): Whether to display additional output |
| when generating the parser. Defaults to False. |
| skip_actions (bool, optional): Whether to pretend no rule has any actions. |
| """ |
| grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) |
| gen = build_python_generator( |
| grammar, |
| grammar_file, |
| output_file, |
| skip_actions=skip_actions, |
| ) |
| return grammar, parser, tokenizer, gen |