blob: 920d24657c5edf65d0a0198bfa24421977b69dc0 [file] [log] [blame]
import importlib.util
import io
import os
import pathlib
import sys
import textwrap
import tokenize
import token
from typing import Any, cast, Dict, IO, Type, Final
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.parser import Parser
from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer
ALL_TOKENS = token.tok_name
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
NON_EXACT_TOKENS = {
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
}
def generate_parser(grammar: Grammar) -> Type[Parser]:
# Generate a parser.
out = io.StringIO()
genr = PythonParserGenerator(grammar, out)
genr.generate("<string>")
# Load the generated parser class.
ns: Dict[str, Any] = {}
exec(out.getvalue(), ns)
return ns["GeneratedParser"]
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
# Run a parser on a file (stream).
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515
parser = parser_class(tokenizer, verbose=verbose)
result = parser.start()
if result is None:
raise parser.make_syntax_error()
return result
def parse_string(
source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any:
# Run the parser on a string.
if dedent:
source = textwrap.dedent(source)
file = io.StringIO(source)
return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515
def make_parser(source: str) -> Type[Parser]:
# Combine parse_string() and generate_parser().
grammar = parse_string(source, GrammarParser)
return generate_parser(grammar)
def import_file(full_name: str, path: str) -> Any:
"""Import a python module from a path"""
spec = importlib.util.spec_from_file_location(full_name, path)
mod = importlib.util.module_from_spec(spec)
# We assume this is not None and has an exec_module() method.
# See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
loader = cast(Any, spec.loader)
loader.exec_module(mod)
return mod
def generate_c_parser_source(grammar: Grammar) -> str:
out = io.StringIO()
genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
genr.generate("<string>")
return out.getvalue()
def generate_parser_c_extension(
grammar: Grammar, path: pathlib.PurePath, debug: bool = False
) -> Any:
"""Generate a parser c extension for the given grammar in the given path
Returns a module object with a parse_string() method.
TODO: express that using a Protocol.
"""
# Make sure that the working directory is empty: reusing non-empty temporary
# directories when generating extensions can lead to segmentation faults.
# Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
# context.
assert not os.listdir(path)
source = path / "parse.c"
with open(source, "w", encoding="utf-8") as file:
genr = CParserGenerator(
grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
)
genr.generate("parse.c")
compile_c_extension(str(source), build_dir=str(path))
def print_memstats() -> bool:
MiB: Final = 2 ** 20
try:
import psutil # type: ignore
except ImportError:
return False
print("Memory stats:")
process = psutil.Process()
meminfo = process.memory_info()
res = {}
res["rss"] = meminfo.rss / MiB
res["vms"] = meminfo.vms / MiB
if sys.platform == "win32":
res["maxrss"] = meminfo.peak_wset / MiB
else:
# See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
import resource # Since it doesn't exist on Windows.
rusage = resource.getrusage(resource.RUSAGE_SELF)
if sys.platform == "darwin":
factor = 1
else:
factor = 1024 # Linux
res["maxrss"] = rusage.ru_maxrss * factor / MiB
for key, value in res.items():
print(f" {key:12.12s}: {value:10.0f} MiB")
return True