| #!/usr/bin/env python |
| # |
| # Copyright 2010 The Closure Linter Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS-IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Metadata pass for annotating tokens in EcmaScript files.""" |
| |
| __author__ = ('robbyw@google.com (Robert Walker)') |
| |
| from closure_linter import javascripttokens |
| from closure_linter import tokenutil |
| |
| |
| TokenType = javascripttokens.JavaScriptTokenType |
| |
| |
| class ParseError(Exception): |
| """Exception indicating a parse error at the given token. |
| |
| Attributes: |
| token: The token where the parse error occurred. |
| """ |
| |
| def __init__(self, token, message=None): |
| """Initialize a parse error at the given token with an optional message. |
| |
| Args: |
| token: The token where the parse error occurred. |
| message: A message describing the parse error. |
| """ |
| Exception.__init__(self, message) |
| self.token = token |
| |
| |
| class EcmaContext(object): |
| """Context object for EcmaScript languages. |
| |
| Attributes: |
| type: The context type. |
| start_token: The token where this context starts. |
| end_token: The token where this context ends. |
| parent: The parent context. |
| """ |
| |
| # The root context. |
| ROOT = 'root' |
| |
| # A block of code. |
| BLOCK = 'block' |
| |
| # A pseudo-block of code for a given case or default section. |
| CASE_BLOCK = 'case_block' |
| |
| # Block of statements in a for loop's parentheses. |
| FOR_GROUP_BLOCK = 'for_block' |
| |
| # An implied block of code for 1 line if, while, and for statements |
| IMPLIED_BLOCK = 'implied_block' |
| |
| # An index in to an array or object. |
| INDEX = 'index' |
| |
| # An array literal in []. |
| ARRAY_LITERAL = 'array_literal' |
| |
| # An object literal in {}. |
| OBJECT_LITERAL = 'object_literal' |
| |
| # An individual element in an array or object literal. |
| LITERAL_ELEMENT = 'literal_element' |
| |
| # The portion of a ternary statement between ? and : |
| TERNARY_TRUE = 'ternary_true' |
| |
| # The portion of a ternary statment after : |
| TERNARY_FALSE = 'ternary_false' |
| |
| # The entire switch statment. This will contain a GROUP with the variable |
| # and a BLOCK with the code. |
| |
| # Since that BLOCK is not a normal block, it can not contain statements except |
| # for case and default. |
| SWITCH = 'switch' |
| |
| # A normal comment. |
| COMMENT = 'comment' |
| |
| # A JsDoc comment. |
| DOC = 'doc' |
| |
| # An individual statement. |
| STATEMENT = 'statement' |
| |
| # Code within parentheses. |
| GROUP = 'group' |
| |
| # Parameter names in a function declaration. |
| PARAMETERS = 'parameters' |
| |
| # A set of variable declarations appearing after the 'var' keyword. |
| VAR = 'var' |
| |
| # Context types that are blocks. |
| BLOCK_TYPES = frozenset([ |
| ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) |
| |
| def __init__(self, type, start_token, parent): |
| """Initializes the context object. |
| |
| Args: |
| type: The context type. |
| start_token: The token where this context starts. |
| parent: The parent context. |
| """ |
| self.type = type |
| self.start_token = start_token |
| self.end_token = None |
| self.parent = parent |
| |
| def __repr__(self): |
| """Returns a string representation of the context object.""" |
| stack = [] |
| context = self |
| while context: |
| stack.append(context.type) |
| context = context.parent |
| return 'Context(%s)' % ' > '.join(stack) |
| |
| |
| class EcmaMetaData(object): |
| """Token metadata for EcmaScript languages. |
| |
| Attributes: |
| last_code: The last code token to appear before this one. |
| context: The context this token appears in. |
| operator_type: The operator type, will be one of the *_OPERATOR constants |
| defined below. |
| """ |
| |
| UNARY_OPERATOR = 'unary' |
| |
| UNARY_POST_OPERATOR = 'unary_post' |
| |
| BINARY_OPERATOR = 'binary' |
| |
| TERNARY_OPERATOR = 'ternary' |
| |
| def __init__(self): |
| """Initializes a token metadata object.""" |
| self.last_code = None |
| self.context = None |
| self.operator_type = None |
| self.is_implied_semicolon = False |
| self.is_implied_block = False |
| self.is_implied_block_close = False |
| |
| def __repr__(self): |
| """Returns a string representation of the context object.""" |
| parts = ['%r' % self.context] |
| if self.operator_type: |
| parts.append('optype: %r' % self.operator_type) |
| if self.is_implied_semicolon: |
| parts.append('implied;') |
| return 'MetaData(%s)' % ', '.join(parts) |
| |
| def IsUnaryOperator(self): |
| return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, |
| EcmaMetaData.UNARY_POST_OPERATOR) |
| |
| def IsUnaryPostOperator(self): |
| return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR |
| |
| |
| class EcmaMetaDataPass(object): |
| """A pass that iterates over all tokens and builds metadata about them.""" |
| |
| def __init__(self): |
| """Initialize the meta data pass object.""" |
| self.Reset() |
| |
| def Reset(self): |
| """Resets the metadata pass to prepare for the next file.""" |
| self._token = None |
| self._context = None |
| self._AddContext(EcmaContext.ROOT) |
| self._last_code = None |
| |
| def _CreateContext(self, type): |
| """Overridable by subclasses to create the appropriate context type.""" |
| return EcmaContext(type, self._token, self._context) |
| |
| def _CreateMetaData(self): |
| """Overridable by subclasses to create the appropriate metadata type.""" |
| return EcmaMetaData() |
| |
| def _AddContext(self, type): |
| """Adds a context of the given type to the context stack. |
| |
| Args: |
| type: The type of context to create |
| """ |
| self._context = self._CreateContext(type) |
| |
| def _PopContext(self): |
| """Moves up one level in the context stack. |
| |
| Returns: |
| The former context. |
| |
| Raises: |
| ParseError: If the root context is popped. |
| """ |
| top_context = self._context |
| top_context.end_token = self._token |
| self._context = top_context.parent |
| if self._context: |
| return top_context |
| else: |
| raise ParseError(self._token) |
| |
| def _PopContextType(self, *stop_types): |
| """Pops the context stack until a context of the given type is popped. |
| |
| Args: |
| stop_types: The types of context to pop to - stops at the first match. |
| |
| Returns: |
| The context object of the given type that was popped. |
| """ |
| last = None |
| while not last or last.type not in stop_types: |
| last = self._PopContext() |
| return last |
| |
| def _EndStatement(self): |
| """Process the end of a statement.""" |
| self._PopContextType(EcmaContext.STATEMENT) |
| if self._context.type == EcmaContext.IMPLIED_BLOCK: |
| self._token.metadata.is_implied_block_close = True |
| self._PopContext() |
| |
| def _ProcessContext(self): |
| """Process the context at the current token. |
| |
| Returns: |
| The context that should be assigned to the current token, or None if |
| the current context after this method should be used. |
| |
| Raises: |
| ParseError: When the token appears in an invalid context. |
| """ |
| token = self._token |
| token_type = token.type |
| |
| if self._context.type in EcmaContext.BLOCK_TYPES: |
| # Whenever we're in a block, we add a statement context. We make an |
| # exception for switch statements since they can only contain case: and |
| # default: and therefore don't directly contain statements. |
| # The block we add here may be immediately removed in some cases, but |
| # that causes no harm. |
| parent = self._context.parent |
| if not parent or parent.type != EcmaContext.SWITCH: |
| self._AddContext(EcmaContext.STATEMENT) |
| |
| elif self._context.type == EcmaContext.ARRAY_LITERAL: |
| self._AddContext(EcmaContext.LITERAL_ELEMENT) |
| |
| if token_type == TokenType.START_PAREN: |
| if self._last_code and self._last_code.IsKeyword('for'): |
| # for loops contain multiple statements in the group unlike while, |
| # switch, if, etc. |
| self._AddContext(EcmaContext.FOR_GROUP_BLOCK) |
| else: |
| self._AddContext(EcmaContext.GROUP) |
| |
| elif token_type == TokenType.END_PAREN: |
| result = self._PopContextType(EcmaContext.GROUP, |
| EcmaContext.FOR_GROUP_BLOCK) |
| keyword_token = result.start_token.metadata.last_code |
| # keyword_token will not exist if the open paren is the first line of the |
| # file, for example if all code is wrapped in an immediately executed |
| # annonymous function. |
| if keyword_token and keyword_token.string in ('if', 'for', 'while'): |
| next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) |
| if next_code.type != TokenType.START_BLOCK: |
| # Check for do-while. |
| is_do_while = False |
| pre_keyword_token = keyword_token.metadata.last_code |
| if (pre_keyword_token and |
| pre_keyword_token.type == TokenType.END_BLOCK): |
| start_block_token = pre_keyword_token.metadata.context.start_token |
| is_do_while = start_block_token.metadata.last_code.string == 'do' |
| |
| # If it's not do-while, it's an implied block. |
| if not is_do_while: |
| self._AddContext(EcmaContext.IMPLIED_BLOCK) |
| token.metadata.is_implied_block = True |
| |
| return result |
| |
| # else (not else if) with no open brace after it should be considered the |
| # start of an implied block, similar to the case with if, for, and while |
| # above. |
| elif (token_type == TokenType.KEYWORD and |
| token.string == 'else'): |
| next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) |
| if (next_code.type != TokenType.START_BLOCK and |
| (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): |
| self._AddContext(EcmaContext.IMPLIED_BLOCK) |
| token.metadata.is_implied_block = True |
| |
| elif token_type == TokenType.START_PARAMETERS: |
| self._AddContext(EcmaContext.PARAMETERS) |
| |
| elif token_type == TokenType.END_PARAMETERS: |
| return self._PopContextType(EcmaContext.PARAMETERS) |
| |
| elif token_type == TokenType.START_BRACKET: |
| if (self._last_code and |
| self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): |
| self._AddContext(EcmaContext.INDEX) |
| else: |
| self._AddContext(EcmaContext.ARRAY_LITERAL) |
| |
| elif token_type == TokenType.END_BRACKET: |
| return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) |
| |
| elif token_type == TokenType.START_BLOCK: |
| if (self._last_code.type in (TokenType.END_PAREN, |
| TokenType.END_PARAMETERS) or |
| self._last_code.IsKeyword('else') or |
| self._last_code.IsKeyword('do') or |
| self._last_code.IsKeyword('try') or |
| self._last_code.IsKeyword('finally') or |
| (self._last_code.IsOperator(':') and |
| self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): |
| # else, do, try, and finally all might have no () before {. |
| # Also, handle the bizzare syntax case 10: {...}. |
| self._AddContext(EcmaContext.BLOCK) |
| else: |
| self._AddContext(EcmaContext.OBJECT_LITERAL) |
| |
| elif token_type == TokenType.END_BLOCK: |
| context = self._PopContextType(EcmaContext.BLOCK, |
| EcmaContext.OBJECT_LITERAL) |
| if self._context.type == EcmaContext.SWITCH: |
| # The end of the block also means the end of the switch statement it |
| # applies to. |
| return self._PopContext() |
| return context |
| |
| elif token.IsKeyword('switch'): |
| self._AddContext(EcmaContext.SWITCH) |
| |
| elif (token_type == TokenType.KEYWORD and |
| token.string in ('case', 'default')): |
| # Pop up to but not including the switch block. |
| while self._context.parent.type != EcmaContext.SWITCH: |
| self._PopContext() |
| |
| elif token.IsOperator('?'): |
| self._AddContext(EcmaContext.TERNARY_TRUE) |
| |
| elif token.IsOperator(':'): |
| if self._context.type == EcmaContext.OBJECT_LITERAL: |
| self._AddContext(EcmaContext.LITERAL_ELEMENT) |
| |
| elif self._context.type == EcmaContext.TERNARY_TRUE: |
| self._PopContext() |
| self._AddContext(EcmaContext.TERNARY_FALSE) |
| |
| # Handle nested ternary statements like: |
| # foo = bar ? baz ? 1 : 2 : 3 |
| # When we encounter the second ":" the context is |
| # ternary_false > ternary_true > statement > root |
| elif (self._context.type == EcmaContext.TERNARY_FALSE and |
| self._context.parent.type == EcmaContext.TERNARY_TRUE): |
| self._PopContext() # Leave current ternary false context. |
| self._PopContext() # Leave current parent ternary true |
| self._AddContext(EcmaContext.TERNARY_FALSE) |
| |
| elif self._context.parent.type == EcmaContext.SWITCH: |
| self._AddContext(EcmaContext.CASE_BLOCK) |
| |
| elif token.IsKeyword('var'): |
| self._AddContext(EcmaContext.VAR) |
| |
| elif token.IsOperator(','): |
| while self._context.type not in (EcmaContext.VAR, |
| EcmaContext.ARRAY_LITERAL, |
| EcmaContext.OBJECT_LITERAL, |
| EcmaContext.STATEMENT, |
| EcmaContext.PARAMETERS, |
| EcmaContext.GROUP): |
| self._PopContext() |
| |
| elif token_type == TokenType.SEMICOLON: |
| self._EndStatement() |
| |
| def Process(self, first_token): |
| """Processes the token stream starting with the given token.""" |
| self._token = first_token |
| while self._token: |
| self._ProcessToken() |
| |
| if self._token.IsCode(): |
| self._last_code = self._token |
| |
| self._token = self._token.next |
| |
| try: |
| self._PopContextType(self, EcmaContext.ROOT) |
| except ParseError: |
| # Ignore the "popped to root" error. |
| pass |
| |
| def _ProcessToken(self): |
| """Process the given token.""" |
| token = self._token |
| token.metadata = self._CreateMetaData() |
| context = (self._ProcessContext() or self._context) |
| token.metadata.context = context |
| token.metadata.last_code = self._last_code |
| |
| # Determine the operator type of the token, if applicable. |
| if token.type == TokenType.OPERATOR: |
| token.metadata.operator_type = self._GetOperatorType(token) |
| |
| # Determine if there is an implied semicolon after the token. |
| if token.type != TokenType.SEMICOLON: |
| next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) |
| # A statement like if (x) does not need a semicolon after it |
| is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK |
| is_last_code_in_line = token.IsCode() and ( |
| not next_code or next_code.line_number != token.line_number) |
| is_continued_identifier = (token.type == TokenType.IDENTIFIER and |
| token.string.endswith('.')) |
| is_continued_operator = (token.type == TokenType.OPERATOR and |
| not token.metadata.IsUnaryPostOperator()) |
| is_continued_dot = token.string == '.' |
| next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR |
| next_code_is_dot = next_code and next_code.string == '.' |
| is_end_of_block = (token.type == TokenType.END_BLOCK and |
| token.metadata.context.type != EcmaContext.OBJECT_LITERAL) |
| is_multiline_string = token.type == TokenType.STRING_TEXT |
| next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK |
| if (is_last_code_in_line and |
| self._StatementCouldEndInContext() and |
| not is_multiline_string and |
| not is_end_of_block and |
| not is_continued_identifier and |
| not is_continued_operator and |
| not is_continued_dot and |
| not next_code_is_dot and |
| not next_code_is_operator and |
| not is_implied_block and |
| not next_code_is_block): |
| token.metadata.is_implied_semicolon = True |
| self._EndStatement() |
| |
| def _StatementCouldEndInContext(self): |
| """Returns whether the current statement (if any) may end in this context.""" |
| # In the basic statement or variable declaration context, statement can |
| # always end in this context. |
| if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): |
| return True |
| |
| # End of a ternary false branch inside a statement can also be the |
| # end of the statement, for example: |
| # var x = foo ? foo.bar() : null |
| # In this case the statement ends after the null, when the context stack |
| # looks like ternary_false > var > statement > root. |
| if (self._context.type == EcmaContext.TERNARY_FALSE and |
| self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): |
| return True |
| |
| # In all other contexts like object and array literals, ternary true, etc. |
| # the statement can't yet end. |
| return False |
| |
| def _GetOperatorType(self, token): |
| """Returns the operator type of the given operator token. |
| |
| Args: |
| token: The token to get arity for. |
| |
| Returns: |
| The type of the operator. One of the *_OPERATOR constants defined in |
| EcmaMetaData. |
| """ |
| if token.string == '?': |
| return EcmaMetaData.TERNARY_OPERATOR |
| |
| if token.string in TokenType.UNARY_OPERATORS: |
| return EcmaMetaData.UNARY_OPERATOR |
| |
| last_code = token.metadata.last_code |
| if not last_code or last_code.type == TokenType.END_BLOCK: |
| return EcmaMetaData.UNARY_OPERATOR |
| |
| if (token.string in TokenType.UNARY_POST_OPERATORS and |
| last_code.type in TokenType.EXPRESSION_ENDER_TYPES): |
| return EcmaMetaData.UNARY_POST_OPERATOR |
| |
| if (token.string in TokenType.UNARY_OK_OPERATORS and |
| last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and |
| last_code.string not in TokenType.UNARY_POST_OPERATORS): |
| return EcmaMetaData.UNARY_OPERATOR |
| |
| return EcmaMetaData.BINARY_OPERATOR |