|  | #------------------------------------------------------------------------------- | 
|  | # Parser for ASDL [1] definition files. Reads in an ASDL description and parses | 
|  | # it into an AST that describes it. | 
|  | # | 
|  | # The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support | 
|  | # modules and attributes after a product. Words starting with Capital letters | 
|  | # are terminals. Literal tokens are in "double quotes". Others are | 
|  | # non-terminals. Id is either TokenId or ConstructorId. | 
|  | # | 
|  | # module        ::= "module" Id "{" [definitions] "}" | 
|  | # definitions   ::= { TypeId "=" type } | 
|  | # type          ::= product | sum | 
|  | # product       ::= fields ["attributes" fields] | 
|  | # fields        ::= "(" { field, "," } field ")" | 
|  | # field         ::= TypeId ["?" | "*"] [Id] | 
|  | # sum           ::= constructor { "|" constructor } ["attributes" fields] | 
|  | # constructor   ::= ConstructorId [fields] | 
|  | # | 
|  | # [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See | 
|  | #     http://asdl.sourceforge.net/ | 
|  | #------------------------------------------------------------------------------- | 
|  | from collections import namedtuple | 
|  | import re | 
|  |  | 
|  | __all__ = [ | 
|  | 'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor', | 
|  | 'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check'] | 
|  |  | 
|  | # The following classes define nodes into which the ASDL description is parsed. | 
|  | # Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST | 
|  | # structure used by a programming language. But ASDL files themselves need to be | 
|  | # parsed. This module parses ASDL files and uses a simple AST to represent them. | 
|  | # See the EBNF at the top of the file to understand the logical connection | 
|  | # between the various node types. | 
|  |  | 
|  | builtin_types = {'identifier', 'string', 'int', 'constant'} | 
|  |  | 
|  | class AST: | 
|  | def __repr__(self): | 
|  | raise NotImplementedError | 
|  |  | 
|  | class Module(AST): | 
|  | def __init__(self, name, dfns): | 
|  | self.name = name | 
|  | self.dfns = dfns | 
|  | self.types = {type.name: type.value for type in dfns} | 
|  |  | 
|  | def __repr__(self): | 
|  | return 'Module({0.name}, {0.dfns})'.format(self) | 
|  |  | 
|  | class Type(AST): | 
|  | def __init__(self, name, value): | 
|  | self.name = name | 
|  | self.value = value | 
|  |  | 
|  | def __repr__(self): | 
|  | return 'Type({0.name}, {0.value})'.format(self) | 
|  |  | 
|  | class Constructor(AST): | 
|  | def __init__(self, name, fields=None): | 
|  | self.name = name | 
|  | self.fields = fields or [] | 
|  |  | 
|  | def __repr__(self): | 
|  | return 'Constructor({0.name}, {0.fields})'.format(self) | 
|  |  | 
|  | class Field(AST): | 
|  | def __init__(self, type, name=None, seq=False, opt=False): | 
|  | self.type = type | 
|  | self.name = name | 
|  | self.seq = seq | 
|  | self.opt = opt | 
|  |  | 
|  | def __str__(self): | 
|  | if self.seq: | 
|  | extra = "*" | 
|  | elif self.opt: | 
|  | extra = "?" | 
|  | else: | 
|  | extra = "" | 
|  |  | 
|  | return "{}{} {}".format(self.type, extra, self.name) | 
|  |  | 
|  | def __repr__(self): | 
|  | if self.seq: | 
|  | extra = ", seq=True" | 
|  | elif self.opt: | 
|  | extra = ", opt=True" | 
|  | else: | 
|  | extra = "" | 
|  | if self.name is None: | 
|  | return 'Field({0.type}{1})'.format(self, extra) | 
|  | else: | 
|  | return 'Field({0.type}, {0.name}{1})'.format(self, extra) | 
|  |  | 
|  | class Sum(AST): | 
|  | def __init__(self, types, attributes=None): | 
|  | self.types = types | 
|  | self.attributes = attributes or [] | 
|  |  | 
|  | def __repr__(self): | 
|  | if self.attributes: | 
|  | return 'Sum({0.types}, {0.attributes})'.format(self) | 
|  | else: | 
|  | return 'Sum({0.types})'.format(self) | 
|  |  | 
|  | class Product(AST): | 
|  | def __init__(self, fields, attributes=None): | 
|  | self.fields = fields | 
|  | self.attributes = attributes or [] | 
|  |  | 
|  | def __repr__(self): | 
|  | if self.attributes: | 
|  | return 'Product({0.fields}, {0.attributes})'.format(self) | 
|  | else: | 
|  | return 'Product({0.fields})'.format(self) | 
|  |  | 
|  | # A generic visitor for the meta-AST that describes ASDL. This can be used by | 
|  | # emitters. Note that this visitor does not provide a generic visit method, so a | 
|  | # subclass needs to define visit methods from visitModule to as deep as the | 
|  | # interesting node. | 
|  | # We also define a Check visitor that makes sure the parsed ASDL is well-formed. | 
|  |  | 
|  | class VisitorBase(object): | 
|  | """Generic tree visitor for ASTs.""" | 
|  | def __init__(self): | 
|  | self.cache = {} | 
|  |  | 
|  | def visit(self, obj, *args): | 
|  | klass = obj.__class__ | 
|  | meth = self.cache.get(klass) | 
|  | if meth is None: | 
|  | methname = "visit" + klass.__name__ | 
|  | meth = getattr(self, methname, None) | 
|  | self.cache[klass] = meth | 
|  | if meth: | 
|  | try: | 
|  | meth(obj, *args) | 
|  | except Exception as e: | 
|  | print("Error visiting %r: %s" % (obj, e)) | 
|  | raise | 
|  |  | 
|  | class Check(VisitorBase): | 
|  | """A visitor that checks a parsed ASDL tree for correctness. | 
|  |  | 
|  | Errors are printed and accumulated. | 
|  | """ | 
|  | def __init__(self): | 
|  | super(Check, self).__init__() | 
|  | self.cons = {} | 
|  | self.errors = 0 | 
|  | self.types = {} | 
|  |  | 
|  | def visitModule(self, mod): | 
|  | for dfn in mod.dfns: | 
|  | self.visit(dfn) | 
|  |  | 
|  | def visitType(self, type): | 
|  | self.visit(type.value, str(type.name)) | 
|  |  | 
|  | def visitSum(self, sum, name): | 
|  | for t in sum.types: | 
|  | self.visit(t, name) | 
|  |  | 
|  | def visitConstructor(self, cons, name): | 
|  | key = str(cons.name) | 
|  | conflict = self.cons.get(key) | 
|  | if conflict is None: | 
|  | self.cons[key] = name | 
|  | else: | 
|  | print('Redefinition of constructor {}'.format(key)) | 
|  | print('Defined in {} and {}'.format(conflict, name)) | 
|  | self.errors += 1 | 
|  | for f in cons.fields: | 
|  | self.visit(f, key) | 
|  |  | 
|  | def visitField(self, field, name): | 
|  | key = str(field.type) | 
|  | l = self.types.setdefault(key, []) | 
|  | l.append(name) | 
|  |  | 
|  | def visitProduct(self, prod, name): | 
|  | for f in prod.fields: | 
|  | self.visit(f, name) | 
|  |  | 
|  | def check(mod): | 
|  | """Check the parsed ASDL tree for correctness. | 
|  |  | 
|  | Return True if success. For failure, the errors are printed out and False | 
|  | is returned. | 
|  | """ | 
|  | v = Check() | 
|  | v.visit(mod) | 
|  |  | 
|  | for t in v.types: | 
|  | if t not in mod.types and not t in builtin_types: | 
|  | v.errors += 1 | 
|  | uses = ", ".join(v.types[t]) | 
|  | print('Undefined type {}, used in {}'.format(t, uses)) | 
|  | return not v.errors | 
|  |  | 
|  | # The ASDL parser itself comes next. The only interesting external interface | 
|  | # here is the top-level parse function. | 
|  |  | 
|  | def parse(filename): | 
|  | """Parse ASDL from the given file and return a Module node describing it.""" | 
|  | with open(filename) as f: | 
|  | parser = ASDLParser() | 
|  | return parser.parse(f.read()) | 
|  |  | 
|  | # Types for describing tokens in an ASDL specification. | 
|  | class TokenKind: | 
|  | """TokenKind is provides a scope for enumerated token kinds.""" | 
|  | (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk, | 
|  | LParen, RParen, LBrace, RBrace) = range(11) | 
|  |  | 
|  | operator_table = { | 
|  | '=': Equals, ',': Comma,    '?': Question, '|': Pipe,    '(': LParen, | 
|  | ')': RParen, '*': Asterisk, '{': LBrace,   '}': RBrace} | 
|  |  | 
|  | Token = namedtuple('Token', 'kind value lineno') | 
|  |  | 
|  | class ASDLSyntaxError(Exception): | 
|  | def __init__(self, msg, lineno=None): | 
|  | self.msg = msg | 
|  | self.lineno = lineno or '<unknown>' | 
|  |  | 
|  | def __str__(self): | 
|  | return 'Syntax error on line {0.lineno}: {0.msg}'.format(self) | 
|  |  | 
|  | def tokenize_asdl(buf): | 
|  | """Tokenize the given buffer. Yield Token objects.""" | 
|  | for lineno, line in enumerate(buf.splitlines(), 1): | 
|  | for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()): | 
|  | c = m.group(1) | 
|  | if c[0].isalpha(): | 
|  | # Some kind of identifier | 
|  | if c[0].isupper(): | 
|  | yield Token(TokenKind.ConstructorId, c, lineno) | 
|  | else: | 
|  | yield Token(TokenKind.TypeId, c, lineno) | 
|  | elif c[:2] == '--': | 
|  | # Comment | 
|  | break | 
|  | else: | 
|  | # Operators | 
|  | try: | 
|  | op_kind = TokenKind.operator_table[c] | 
|  | except KeyError: | 
|  | raise ASDLSyntaxError('Invalid operator %s' % c, lineno) | 
|  | yield Token(op_kind, c, lineno) | 
|  |  | 
|  | class ASDLParser: | 
|  | """Parser for ASDL files. | 
|  |  | 
|  | Create, then call the parse method on a buffer containing ASDL. | 
|  | This is a simple recursive descent parser that uses tokenize_asdl for the | 
|  | lexing. | 
|  | """ | 
|  | def __init__(self): | 
|  | self._tokenizer = None | 
|  | self.cur_token = None | 
|  |  | 
|  | def parse(self, buf): | 
|  | """Parse the ASDL in the buffer and return an AST with a Module root. | 
|  | """ | 
|  | self._tokenizer = tokenize_asdl(buf) | 
|  | self._advance() | 
|  | return self._parse_module() | 
|  |  | 
|  | def _parse_module(self): | 
|  | if self._at_keyword('module'): | 
|  | self._advance() | 
|  | else: | 
|  | raise ASDLSyntaxError( | 
|  | 'Expected "module" (found {})'.format(self.cur_token.value), | 
|  | self.cur_token.lineno) | 
|  | name = self._match(self._id_kinds) | 
|  | self._match(TokenKind.LBrace) | 
|  | defs = self._parse_definitions() | 
|  | self._match(TokenKind.RBrace) | 
|  | return Module(name, defs) | 
|  |  | 
|  | def _parse_definitions(self): | 
|  | defs = [] | 
|  | while self.cur_token.kind == TokenKind.TypeId: | 
|  | typename = self._advance() | 
|  | self._match(TokenKind.Equals) | 
|  | type = self._parse_type() | 
|  | defs.append(Type(typename, type)) | 
|  | return defs | 
|  |  | 
|  | def _parse_type(self): | 
|  | if self.cur_token.kind == TokenKind.LParen: | 
|  | # If we see a (, it's a product | 
|  | return self._parse_product() | 
|  | else: | 
|  | # Otherwise it's a sum. Look for ConstructorId | 
|  | sumlist = [Constructor(self._match(TokenKind.ConstructorId), | 
|  | self._parse_optional_fields())] | 
|  | while self.cur_token.kind  == TokenKind.Pipe: | 
|  | # More constructors | 
|  | self._advance() | 
|  | sumlist.append(Constructor( | 
|  | self._match(TokenKind.ConstructorId), | 
|  | self._parse_optional_fields())) | 
|  | return Sum(sumlist, self._parse_optional_attributes()) | 
|  |  | 
|  | def _parse_product(self): | 
|  | return Product(self._parse_fields(), self._parse_optional_attributes()) | 
|  |  | 
|  | def _parse_fields(self): | 
|  | fields = [] | 
|  | self._match(TokenKind.LParen) | 
|  | while self.cur_token.kind == TokenKind.TypeId: | 
|  | typename = self._advance() | 
|  | is_seq, is_opt = self._parse_optional_field_quantifier() | 
|  | id = (self._advance() if self.cur_token.kind in self._id_kinds | 
|  | else None) | 
|  | fields.append(Field(typename, id, seq=is_seq, opt=is_opt)) | 
|  | if self.cur_token.kind == TokenKind.RParen: | 
|  | break | 
|  | elif self.cur_token.kind == TokenKind.Comma: | 
|  | self._advance() | 
|  | self._match(TokenKind.RParen) | 
|  | return fields | 
|  |  | 
|  | def _parse_optional_fields(self): | 
|  | if self.cur_token.kind == TokenKind.LParen: | 
|  | return self._parse_fields() | 
|  | else: | 
|  | return None | 
|  |  | 
|  | def _parse_optional_attributes(self): | 
|  | if self._at_keyword('attributes'): | 
|  | self._advance() | 
|  | return self._parse_fields() | 
|  | else: | 
|  | return None | 
|  |  | 
|  | def _parse_optional_field_quantifier(self): | 
|  | is_seq, is_opt = False, False | 
|  | if self.cur_token.kind == TokenKind.Asterisk: | 
|  | is_seq = True | 
|  | self._advance() | 
|  | elif self.cur_token.kind == TokenKind.Question: | 
|  | is_opt = True | 
|  | self._advance() | 
|  | return is_seq, is_opt | 
|  |  | 
|  | def _advance(self): | 
|  | """ Return the value of the current token and read the next one into | 
|  | self.cur_token. | 
|  | """ | 
|  | cur_val = None if self.cur_token is None else self.cur_token.value | 
|  | try: | 
|  | self.cur_token = next(self._tokenizer) | 
|  | except StopIteration: | 
|  | self.cur_token = None | 
|  | return cur_val | 
|  |  | 
|  | _id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId) | 
|  |  | 
|  | def _match(self, kind): | 
|  | """The 'match' primitive of RD parsers. | 
|  |  | 
|  | * Verifies that the current token is of the given kind (kind can | 
|  | be a tuple, in which the kind must match one of its members). | 
|  | * Returns the value of the current token | 
|  | * Reads in the next token | 
|  | """ | 
|  | if (isinstance(kind, tuple) and self.cur_token.kind in kind or | 
|  | self.cur_token.kind == kind | 
|  | ): | 
|  | value = self.cur_token.value | 
|  | self._advance() | 
|  | return value | 
|  | else: | 
|  | raise ASDLSyntaxError( | 
|  | 'Unmatched {} (found {})'.format(kind, self.cur_token.kind), | 
|  | self.cur_token.lineno) | 
|  |  | 
|  | def _at_keyword(self, keyword): | 
|  | return (self.cur_token.kind == TokenKind.TypeId and | 
|  | self.cur_token.value == keyword) |