| #!/usr/bin/env python |
| # |
| # Copyright 2007 The Closure Linter Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS-IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Token utility functions.""" |
| |
| __author__ = ('robbyw@google.com (Robert Walker)', |
| 'ajp@google.com (Andy Perelson)') |
| |
| import copy |
| import StringIO |
| |
| from closure_linter.common import tokens |
| from closure_linter.javascripttokens import JavaScriptToken |
| from closure_linter.javascripttokens import JavaScriptTokenType |
| |
| # Shorthand |
| Type = tokens.TokenType |
| |
| |
| def GetFirstTokenInSameLine(token): |
| """Returns the first token in the same line as token. |
| |
| Args: |
| token: Any token in the line. |
| |
| Returns: |
| The first token in the same line as token. |
| """ |
| while not token.IsFirstInLine(): |
| token = token.previous |
| return token |
| |
| |
| def GetFirstTokenInPreviousLine(token): |
| """Returns the first token in the previous line as token. |
| |
| Args: |
| token: Any token in the line. |
| |
| Returns: |
| The first token in the previous line as token, or None if token is on the |
| first line. |
| """ |
| first_in_line = GetFirstTokenInSameLine(token) |
| if first_in_line.previous: |
| return GetFirstTokenInSameLine(first_in_line.previous) |
| |
| return None |
| |
| |
| def GetLastTokenInSameLine(token): |
| """Returns the last token in the same line as token. |
| |
| Args: |
| token: Any token in the line. |
| |
| Returns: |
| The last token in the same line as token. |
| """ |
| while not token.IsLastInLine(): |
| token = token.next |
| return token |
| |
| |
| def GetAllTokensInSameLine(token): |
| """Returns all tokens in the same line as the given token. |
| |
| Args: |
| token: Any token in the line. |
| |
| Returns: |
| All tokens on the same line as the given token. |
| """ |
| first_token = GetFirstTokenInSameLine(token) |
| last_token = GetLastTokenInSameLine(token) |
| |
| tokens_in_line = [] |
| while first_token != last_token: |
| tokens_in_line.append(first_token) |
| first_token = first_token.next |
| tokens_in_line.append(last_token) |
| |
| return tokens_in_line |
| |
| |
| def CustomSearch(start_token, func, end_func=None, distance=None, |
| reverse=False): |
| """Returns the first token where func is True within distance of this token. |
| |
| Args: |
| start_token: The token to start searching from |
| func: The function to call to test a token for applicability |
| end_func: The function to call to test a token to determine whether to abort |
| the search. |
| distance: The number of tokens to look through before failing search. Must |
| be positive. If unspecified, will search until the end of the token |
| chain |
| reverse: When true, search the tokens before this one instead of the tokens |
| after it |
| |
| Returns: |
| The first token matching func within distance of this token, or None if no |
| such token is found. |
| """ |
| token = start_token |
| if reverse: |
| while token and (distance is None or distance > 0): |
| previous = token.previous |
| if previous: |
| if func(previous): |
| return previous |
| if end_func and end_func(previous): |
| return None |
| |
| token = previous |
| if distance is not None: |
| distance -= 1 |
| |
| else: |
| while token and (distance is None or distance > 0): |
| next_token = token.next |
| if next_token: |
| if func(next_token): |
| return next_token |
| if end_func and end_func(next_token): |
| return None |
| |
| token = next_token |
| if distance is not None: |
| distance -= 1 |
| |
| return None |
| |
| |
| def Search(start_token, token_types, distance=None, reverse=False): |
| """Returns the first token of type in token_types within distance. |
| |
| Args: |
| start_token: The token to start searching from |
| token_types: The allowable types of the token being searched for |
| distance: The number of tokens to look through before failing search. Must |
| be positive. If unspecified, will search until the end of the token |
| chain |
| reverse: When true, search the tokens before this one instead of the tokens |
| after it |
| |
| Returns: |
| The first token of any type in token_types within distance of this token, or |
| None if no such token is found. |
| """ |
| return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), |
| None, distance, reverse) |
| |
| |
| def SearchExcept(start_token, token_types, distance=None, reverse=False): |
| """Returns the first token not of any type in token_types within distance. |
| |
| Args: |
| start_token: The token to start searching from |
| token_types: The unallowable types of the token being searched for |
| distance: The number of tokens to look through before failing search. Must |
| be positive. If unspecified, will search until the end of the token |
| chain |
| reverse: When true, search the tokens before this one instead of the tokens |
| after it |
| |
| Returns: |
| The first token of any type in token_types within distance of this token, or |
| None if no such token is found. |
| """ |
| return CustomSearch(start_token, |
| lambda token: not token.IsAnyType(token_types), |
| None, distance, reverse) |
| |
| |
| def SearchUntil(start_token, token_types, end_types, distance=None, |
| reverse=False): |
| """Returns the first token of type in token_types before a token of end_type. |
| |
| Args: |
| start_token: The token to start searching from. |
| token_types: The allowable types of the token being searched for. |
| end_types: Types of tokens to abort search if we find. |
| distance: The number of tokens to look through before failing search. Must |
| be positive. If unspecified, will search until the end of the token |
| chain |
| reverse: When true, search the tokens before this one instead of the tokens |
| after it |
| |
| Returns: |
| The first token of any type in token_types within distance of this token |
| before any tokens of type in end_type, or None if no such token is found. |
| """ |
| return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), |
| lambda token: token.IsAnyType(end_types), |
| distance, reverse) |
| |
| |
| def DeleteToken(token): |
| """Deletes the given token from the linked list. |
| |
| Args: |
| token: The token to delete |
| """ |
| # When deleting a token, we do not update the deleted token itself to make |
| # sure the previous and next pointers are still pointing to tokens which are |
| # not deleted. Also it is very hard to keep track of all previously deleted |
| # tokens to update them when their pointers become invalid. So we add this |
| # flag that any token linked list iteration logic can skip deleted node safely |
| # when its current token is deleted. |
| token.is_deleted = True |
| if token.previous: |
| token.previous.next = token.next |
| |
| if token.next: |
| token.next.previous = token.previous |
| |
| following_token = token.next |
| while following_token and following_token.metadata.last_code == token: |
| following_token.metadata.last_code = token.metadata.last_code |
| following_token = following_token.next |
| |
| |
| def DeleteTokens(token, token_count): |
| """Deletes the given number of tokens starting with the given token. |
| |
| Args: |
| token: The token to start deleting at. |
| token_count: The total number of tokens to delete. |
| """ |
| for i in xrange(1, token_count): |
| DeleteToken(token.next) |
| DeleteToken(token) |
| |
| |
| def InsertTokenBefore(new_token, token): |
| """Insert new_token before token. |
| |
| Args: |
| new_token: A token to be added to the stream |
| token: A token already in the stream |
| """ |
| new_token.next = token |
| new_token.previous = token.previous |
| |
| new_token.metadata = copy.copy(token.metadata) |
| |
| if new_token.IsCode(): |
| old_last_code = token.metadata.last_code |
| following_token = token |
| while (following_token and |
| following_token.metadata.last_code == old_last_code): |
| following_token.metadata.last_code = new_token |
| following_token = following_token.next |
| |
| token.previous = new_token |
| if new_token.previous: |
| new_token.previous.next = new_token |
| |
| if new_token.start_index is None: |
| if new_token.line_number == token.line_number: |
| new_token.start_index = token.start_index |
| else: |
| previous_token = new_token.previous |
| if previous_token: |
| new_token.start_index = (previous_token.start_index + |
| len(previous_token.string)) |
| else: |
| new_token.start_index = 0 |
| |
| iterator = new_token.next |
| while iterator and iterator.line_number == new_token.line_number: |
| iterator.start_index += len(new_token.string) |
| iterator = iterator.next |
| |
| |
| def InsertTokenAfter(new_token, token): |
| """Insert new_token after token. |
| |
| Args: |
| new_token: A token to be added to the stream |
| token: A token already in the stream |
| """ |
| new_token.previous = token |
| new_token.next = token.next |
| |
| new_token.metadata = copy.copy(token.metadata) |
| |
| if token.IsCode(): |
| new_token.metadata.last_code = token |
| |
| if new_token.IsCode(): |
| following_token = token.next |
| while following_token and following_token.metadata.last_code == token: |
| following_token.metadata.last_code = new_token |
| following_token = following_token.next |
| |
| token.next = new_token |
| if new_token.next: |
| new_token.next.previous = new_token |
| |
| if new_token.start_index is None: |
| if new_token.line_number == token.line_number: |
| new_token.start_index = token.start_index + len(token.string) |
| else: |
| new_token.start_index = 0 |
| |
| iterator = new_token.next |
| while iterator and iterator.line_number == new_token.line_number: |
| iterator.start_index += len(new_token.string) |
| iterator = iterator.next |
| |
| |
| def InsertTokensAfter(new_tokens, token): |
| """Insert multiple tokens after token. |
| |
| Args: |
| new_tokens: An array of tokens to be added to the stream |
| token: A token already in the stream |
| """ |
| # TODO(user): It would be nicer to have InsertTokenAfter defer to here |
| # instead of vice-versa. |
| current_token = token |
| for new_token in new_tokens: |
| InsertTokenAfter(new_token, current_token) |
| current_token = new_token |
| |
| |
| def InsertSpaceTokenAfter(token): |
| """Inserts a space token after the given token. |
| |
| Args: |
| token: The token to insert a space token after |
| |
| Returns: |
| A single space token |
| """ |
| space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line, |
| token.line_number) |
| InsertTokenAfter(space_token, token) |
| |
| |
| def InsertBlankLineAfter(token): |
| """Inserts a blank line after the given token. |
| |
| Args: |
| token: The token to insert a blank line after |
| |
| Returns: |
| A single space token |
| """ |
| blank_token = JavaScriptToken('', Type.BLANK_LINE, '', |
| token.line_number + 1) |
| InsertLineAfter(token, [blank_token]) |
| |
| |
| def InsertLineAfter(token, new_tokens): |
| """Inserts a new line consisting of new_tokens after the given token. |
| |
| Args: |
| token: The token to insert after. |
| new_tokens: The tokens that will make up the new line. |
| """ |
| insert_location = token |
| for new_token in new_tokens: |
| InsertTokenAfter(new_token, insert_location) |
| insert_location = new_token |
| |
| # Update all subsequent line numbers. |
| next_token = new_tokens[-1].next |
| while next_token: |
| next_token.line_number += 1 |
| next_token = next_token.next |
| |
| |
| def SplitToken(token, position): |
| """Splits the token into two tokens at position. |
| |
| Args: |
| token: The token to split |
| position: The position to split at. Will be the beginning of second token. |
| |
| Returns: |
| The new second token. |
| """ |
| new_string = token.string[position:] |
| token.string = token.string[:position] |
| |
| new_token = JavaScriptToken(new_string, token.type, token.line, |
| token.line_number) |
| InsertTokenAfter(new_token, token) |
| |
| return new_token |
| |
| |
| def Compare(token1, token2): |
| """Compares two tokens and determines their relative order. |
| |
| Args: |
| token1: The first token to compare. |
| token2: The second token to compare. |
| |
| Returns: |
| A negative integer, zero, or a positive integer as the first token is |
| before, equal, or after the second in the token stream. |
| """ |
| if token2.line_number != token1.line_number: |
| return token1.line_number - token2.line_number |
| else: |
| return token1.start_index - token2.start_index |
| |
| |
| def GoogScopeOrNoneFromStartBlock(token): |
| """Determines if the given START_BLOCK is part of a goog.scope statement. |
| |
| Args: |
| token: A token of type START_BLOCK. |
| |
| Returns: |
| The goog.scope function call token, or None if such call doesn't exist. |
| """ |
| if token.type != JavaScriptTokenType.START_BLOCK: |
| return None |
| |
| # Search for a goog.scope statement, which will be 5 tokens before the |
| # block. Illustration of the tokens found prior to the start block: |
| # goog.scope(function() { |
| # 5 4 3 21 ^ |
| |
| maybe_goog_scope = token |
| for unused_i in xrange(5): |
| maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and |
| maybe_goog_scope.previous else None) |
| if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope': |
| return maybe_goog_scope |
| |
| |
| def GetTokenRange(start_token, end_token): |
| """Returns a list of tokens between the two given, inclusive. |
| |
| Args: |
| start_token: Start token in the range. |
| end_token: End token in the range. |
| |
| Returns: |
| A list of tokens, in order, from start_token to end_token (including start |
| and end). Returns none if the tokens do not describe a valid range. |
| """ |
| |
| token_range = [] |
| token = start_token |
| |
| while token: |
| token_range.append(token) |
| |
| if token == end_token: |
| return token_range |
| |
| token = token.next |
| |
| |
| def TokensToString(token_iterable): |
| """Convert a number of tokens into a string. |
| |
| Newlines will be inserted whenever the line_number of two neighboring |
| strings differ. |
| |
| Args: |
| token_iterable: The tokens to turn to a string. |
| |
| Returns: |
| A string representation of the given tokens. |
| """ |
| |
| buf = StringIO.StringIO() |
| token_list = list(token_iterable) |
| if not token_list: |
| return '' |
| |
| line_number = token_list[0].line_number |
| |
| for token in token_list: |
| |
| while line_number < token.line_number: |
| line_number += 1 |
| buf.write('\n') |
| |
| if line_number > token.line_number: |
| line_number = token.line_number |
| buf.write('\n') |
| |
| buf.write(token.string) |
| |
| return buf.getvalue() |
| |
| |
| def GetPreviousCodeToken(token): |
| """Returns the code token before the specified token. |
| |
| Args: |
| token: A token. |
| |
| Returns: |
| The code token before the specified token or None if no such token |
| exists. |
| """ |
| |
| return CustomSearch( |
| token, |
| lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES, |
| reverse=True) |
| |
| |
| def GetNextCodeToken(token): |
| """Returns the next code token after the specified token. |
| |
| Args: |
| token: A token. |
| |
| Returns: |
| The next code token after the specified token or None if no such token |
| exists. |
| """ |
| |
| return CustomSearch( |
| token, |
| lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES, |
| reverse=False) |
| |
| |
| def GetIdentifierStart(token): |
| """Returns the first token in an identifier. |
| |
| Given a token which is part of an identifier, returns the token at the start |
| of the identifier. |
| |
| Args: |
| token: A token which is part of an identifier. |
| |
| Returns: |
| The token at the start of the identifier or None if the identifier was not |
| of the form 'a.b.c' (e.g. "['a']['b'].c"). |
| """ |
| |
| start_token = token |
| previous_code_token = GetPreviousCodeToken(token) |
| |
| while (previous_code_token and ( |
| previous_code_token.IsType(JavaScriptTokenType.IDENTIFIER) or |
| IsDot(previous_code_token))): |
| start_token = previous_code_token |
| previous_code_token = GetPreviousCodeToken(previous_code_token) |
| |
| if IsDot(start_token): |
| return None |
| |
| return start_token |
| |
| |
| def GetIdentifierForToken(token): |
| """Get the symbol specified by a token. |
| |
| Given a token, this function additionally concatenates any parts of an |
| identifying symbol being identified that are split by whitespace or a |
| newline. |
| |
| The function will return None if the token is not the first token of an |
| identifier. |
| |
| Args: |
| token: The first token of a symbol. |
| |
| Returns: |
| The whole symbol, as a string. |
| """ |
| |
| # Search backward to determine if this token is the first token of the |
| # identifier. If it is not the first token, return None to signal that this |
| # token should be ignored. |
| prev_token = token.previous |
| while prev_token: |
| if (prev_token.IsType(JavaScriptTokenType.IDENTIFIER) or |
| IsDot(prev_token)): |
| return None |
| |
| if (prev_token.IsType(tokens.TokenType.WHITESPACE) or |
| prev_token.IsAnyType(JavaScriptTokenType.COMMENT_TYPES)): |
| prev_token = prev_token.previous |
| else: |
| break |
| |
| # A "function foo()" declaration. |
| if token.type is JavaScriptTokenType.FUNCTION_NAME: |
| return token.string |
| |
| # A "var foo" declaration (if the previous token is 'var') |
| previous_code_token = GetPreviousCodeToken(token) |
| |
| if previous_code_token and previous_code_token.IsKeyword('var'): |
| return token.string |
| |
| # Otherwise, this is potentially a namespaced (goog.foo.bar) identifier that |
| # could span multiple lines or be broken up by whitespace. We need |
| # to concatenate. |
| identifier_types = set([ |
| JavaScriptTokenType.IDENTIFIER, |
| JavaScriptTokenType.SIMPLE_LVALUE |
| ]) |
| |
| assert token.type in identifier_types |
| |
| # Start with the first token |
| symbol_tokens = [token] |
| |
| if token.next: |
| for t in token.next: |
| last_symbol_token = symbol_tokens[-1] |
| |
| # A dot is part of the previous symbol. |
| if IsDot(t): |
| symbol_tokens.append(t) |
| continue |
| |
| # An identifier is part of the previous symbol if the previous one was a |
| # dot. |
| if t.type in identifier_types: |
| if IsDot(last_symbol_token): |
| symbol_tokens.append(t) |
| continue |
| else: |
| break |
| |
| # Skip any whitespace |
| if t.type in JavaScriptTokenType.NON_CODE_TYPES: |
| continue |
| |
| # This is the end of the identifier. Stop iterating. |
| break |
| |
| if symbol_tokens: |
| return ''.join([t.string for t in symbol_tokens]) |
| |
| |
| def GetStringAfterToken(token): |
| """Get string after token. |
| |
| Args: |
| token: Search will be done after this token. |
| |
| Returns: |
| String if found after token else None (empty string will also |
| return None). |
| |
| Search until end of string as in case of empty string Type.STRING_TEXT is not |
| present/found and don't want to return next string. |
| E.g. |
| a = ''; |
| b = 'test'; |
| When searching for string after 'a' if search is not limited by end of string |
| then it will return 'test' which is not desirable as there is a empty string |
| before that. |
| |
| This will return None for cases where string is empty or no string found |
| as in both cases there is no Type.STRING_TEXT. |
| """ |
| string_token = SearchUntil(token, JavaScriptTokenType.STRING_TEXT, |
| [JavaScriptTokenType.SINGLE_QUOTE_STRING_END, |
| JavaScriptTokenType.DOUBLE_QUOTE_STRING_END]) |
| if string_token: |
| return string_token.string |
| else: |
| return None |
| |
| |
| def IsDot(token): |
| """Whether the token represents a "dot" operator (foo.bar).""" |
| return token.type is JavaScriptTokenType.OPERATOR and token.string == '.' |
| |
| |
| def IsIdentifierOrDot(token): |
| """Whether the token is either an identifier or a '.'.""" |
| return (token.type in [JavaScriptTokenType.IDENTIFIER, |
| JavaScriptTokenType.SIMPLE_LVALUE] or |
| IsDot(token)) |