blob: 74d086175fb84f2b929353107b1676198d52de79 [file] [log] [blame]
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implements a format decision state object that manages whitespace decisions.
Each token is processed one at a time, at which point its whitespace formatting
decisions are made. A graph of potential whitespace formattings is created,
where each node in the graph is a format decision state object. The heuristic
tries formatting the token with and without a newline before it to determine
which one has the least penalty. Therefore, the format decision state object for
each decision needs to be its own unique copy.
Once the heuristic determines the best formatting, it makes a non-dry run pass
through the code to commit the whitespace formatting.
FormatDecisionState: main class exported by this module.
"""
from yapf.yapflib import format_token
from yapf.yapflib import logical_line
from yapf.yapflib import object_state
from yapf.yapflib import split_penalty
from yapf.yapflib import style
from yapf.yapflib import subtypes
class FormatDecisionState(object):
"""The current state when indenting a logical line.
The FormatDecisionState object is meant to be copied instead of referenced.
Attributes:
first_indent: The indent of the first token.
column: The number of used columns in the current line.
line: The logical line we're currently processing.
next_token: The next token to be formatted.
paren_level: The level of nesting inside (), [], and {}.
lowest_level_on_line: The lowest paren_level on the current line.
stack: A stack (of _ParenState) keeping track of properties applying to
parenthesis levels.
comp_stack: A stack (of ComprehensionState) keeping track of properties
applying to comprehensions.
param_list_stack: A stack (of ParameterListState) keeping track of
properties applying to function parameter lists.
ignore_stack_for_comparison: Ignore the stack of _ParenState for state
comparison.
column_limit: The column limit specified by the style.
"""
def __init__(self, line, first_indent):
"""Initializer.
Initializes to the state after placing the first token from 'line' at
'first_indent'.
Arguments:
line: (LogicalLine) The logical line we're currently processing.
first_indent: (int) The indent of the first token.
"""
self.next_token = line.first
self.column = first_indent
self.line = line
self.paren_level = 0
self.lowest_level_on_line = 0
self.ignore_stack_for_comparison = False
self.stack = [_ParenState(first_indent, first_indent)]
self.comp_stack = []
self.param_list_stack = []
self.first_indent = first_indent
self.column_limit = style.Get('COLUMN_LIMIT')
def Clone(self):
"""Clones a FormatDecisionState object."""
new = FormatDecisionState(self.line, self.first_indent)
new.next_token = self.next_token
new.column = self.column
new.line = self.line
new.paren_level = self.paren_level
new.line.depth = self.line.depth
new.lowest_level_on_line = self.lowest_level_on_line
new.ignore_stack_for_comparison = self.ignore_stack_for_comparison
new.first_indent = self.first_indent
new.stack = [state.Clone() for state in self.stack]
new.comp_stack = [state.Clone() for state in self.comp_stack]
new.param_list_stack = [state.Clone() for state in self.param_list_stack]
return new
def __eq__(self, other):
# Note: 'first_indent' is implicit in the stack. Also, we ignore 'previous',
# because it shouldn't have a bearing on this comparison. (I.e., it will
# report equal if 'next_token' does.)
return (self.next_token == other.next_token and
self.column == other.column and
self.paren_level == other.paren_level and
self.line.depth == other.line.depth and
self.lowest_level_on_line == other.lowest_level_on_line and
(self.ignore_stack_for_comparison or
other.ignore_stack_for_comparison or self.stack == other.stack and
self.comp_stack == other.comp_stack and
self.param_list_stack == other.param_list_stack))
def __ne__(self, other):
return not self == other
def __hash__(self):
return hash((self.next_token, self.column, self.paren_level,
self.line.depth, self.lowest_level_on_line))
def __repr__(self):
return ('column::%d, next_token::%s, paren_level::%d, stack::[\n\t%s' %
(self.column, repr(self.next_token), self.paren_level,
'\n\t'.join(repr(s) for s in self.stack) + ']'))
def CanSplit(self, must_split):
"""Determine if we can split before the next token.
Arguments:
must_split: (bool) A newline was required before this token.
Returns:
True if the line can be split before the next token.
"""
current = self.next_token
previous = current.previous_token
if current.is_pseudo:
return False
if (not must_split and subtypes.DICTIONARY_KEY_PART in current.subtypes and
subtypes.DICTIONARY_KEY not in current.subtypes and
not style.Get('ALLOW_MULTILINE_DICTIONARY_KEYS')):
# In some situations, a dictionary may be multiline, but pylint doesn't
# like it. So don't allow it unless forced to.
return False
if (not must_split and subtypes.DICTIONARY_VALUE in current.subtypes and
not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')):
return False
if previous and previous.value == '(' and current.value == ')':
# Don't split an empty function call list if we aren't splitting before
# dict values.
token = previous.previous_token
while token:
prev = token.previous_token
if not prev or prev.name not in {'NAME', 'DOT'}:
break
token = token.previous_token
if token and subtypes.DICTIONARY_VALUE in token.subtypes:
if not style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE'):
return False
if previous and previous.value == '.' and current.value == '.':
return False
return current.can_break_before
def MustSplit(self):
"""Returns True if the line must split before the next token."""
current = self.next_token
previous = current.previous_token
if current.is_pseudo:
return False
if current.must_break_before:
return True
if not previous:
return False
if style.Get('SPLIT_ALL_COMMA_SEPARATED_VALUES') and previous.value == ',':
return True
if (style.Get('SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES') and
previous.value == ','):
# Avoid breaking in a container that fits in the current line if possible
opening = _GetOpeningBracket(current)
# Can't find opening bracket, behave the same way as
# SPLIT_ALL_COMMA_SEPARATED_VALUES.
if not opening:
return True
if current.is_comment:
# Don't require splitting before a comment, since it may be related to
# the current line.
return False
# Allow the fallthrough code to handle the closing bracket.
if current != opening.matching_bracket:
# If the container doesn't fit in the current line, must split
return not self._ContainerFitsOnStartLine(opening)
if (self.stack[-1].split_before_closing_bracket and
(current.value in '}]' and style.Get('SPLIT_BEFORE_CLOSING_BRACKET') or
current.value in '}])' and style.Get('INDENT_CLOSING_BRACKETS'))):
# Split before the closing bracket if we can.
if subtypes.SUBSCRIPT_BRACKET not in current.subtypes:
return current.node_split_penalty != split_penalty.UNBREAKABLE
if (current.value == ')' and previous.value == ',' and
not _IsSingleElementTuple(current.matching_bracket)):
return True
# Prevent splitting before the first argument in compound statements
# with the exception of function declarations.
if (style.Get('SPLIT_BEFORE_FIRST_ARGUMENT') and
_IsCompoundStatement(self.line.first) and
not _IsFunctionDef(self.line.first)):
return False
###########################################################################
# List Splitting
if (style.Get('DEDENT_CLOSING_BRACKETS') or
style.Get('INDENT_CLOSING_BRACKETS') or
style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
bracket = current if current.ClosesScope() else previous
if subtypes.SUBSCRIPT_BRACKET not in bracket.subtypes:
if bracket.OpensScope():
if style.Get('COALESCE_BRACKETS'):
if current.OpensScope():
# Prefer to keep all opening brackets together.
return False
if (not _IsLastScopeInLine(bracket) or
logical_line.IsSurroundedByBrackets(bracket)):
last_token = bracket.matching_bracket
else:
last_token = _LastTokenInLine(bracket.matching_bracket)
if not self._FitsOnLine(bracket, last_token):
# Split before the first element if the whole list can't fit on a
# single line.
self.stack[-1].split_before_closing_bracket = True
return True
elif (style.Get('DEDENT_CLOSING_BRACKETS') or
style.Get('INDENT_CLOSING_BRACKETS')) and current.ClosesScope():
# Split before and dedent the closing bracket.
return self.stack[-1].split_before_closing_bracket
if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
current.is_name):
# An expression that's surrounded by parens gets split after the opening
# parenthesis.
def SurroundedByParens(token):
"""Check if it's an expression surrounded by parentheses."""
while token:
if token.value == ',':
return False
if token.value == ')':
return not token.next_token
if token.OpensScope():
token = token.matching_bracket.next_token
else:
token = token.next_token
return False
if (previous.value == '(' and not previous.is_pseudo and
not logical_line.IsSurroundedByBrackets(previous)):
pptoken = previous.previous_token
if (pptoken and not pptoken.is_name and not pptoken.is_keyword and
SurroundedByParens(current)):
return True
if (current.is_name or current.is_string) and previous.value == ',':
# If the list has function calls in it and the full list itself cannot
# fit on the line, then we want to split. Otherwise, we'll get something
# like this:
#
# X = [
# Bar(xxx='some string',
# yyy='another long string',
# zzz='a third long string'), Bar(
# xxx='some string',
# yyy='another long string',
# zzz='a third long string')
# ]
#
# or when a string formatting syntax.
func_call_or_string_format = False
tok = current.next_token
if current.is_name:
while tok and (tok.is_name or tok.value == '.'):
tok = tok.next_token
func_call_or_string_format = tok and tok.value == '('
elif current.is_string:
while tok and tok.is_string:
tok = tok.next_token
func_call_or_string_format = tok and tok.value == '%'
if func_call_or_string_format:
open_bracket = logical_line.IsSurroundedByBrackets(current)
if open_bracket:
if open_bracket.value in '[{':
if not self._FitsOnLine(open_bracket,
open_bracket.matching_bracket):
return True
elif tok.value == '(':
if not self._FitsOnLine(current, tok.matching_bracket):
return True
if (current.OpensScope() and previous.value == ',' and
subtypes.DICTIONARY_KEY not in current.next_token.subtypes):
# If we have a list of tuples, then we can get a similar look as above. If
# the full list cannot fit on the line, then we want a split.
open_bracket = logical_line.IsSurroundedByBrackets(current)
if (open_bracket and open_bracket.value in '[{' and
subtypes.SUBSCRIPT_BRACKET not in open_bracket.subtypes):
if not self._FitsOnLine(current, current.matching_bracket):
return True
###########################################################################
# Dict/Set Splitting
if (style.Get('EACH_DICT_ENTRY_ON_SEPARATE_LINE') and
subtypes.DICTIONARY_KEY in current.subtypes and not current.is_comment):
# Place each dictionary entry onto its own line.
if previous.value == '{' and previous.previous_token:
opening = _GetOpeningBracket(previous.previous_token)
if (opening and opening.value == '(' and opening.previous_token and
opening.previous_token.is_name):
# This is a dictionary that's an argument to a function.
if (self._FitsOnLine(previous, previous.matching_bracket) and
previous.matching_bracket.next_token and
(not opening.matching_bracket.next_token or
opening.matching_bracket.next_token.value != '.') and
_ScopeHasNoCommas(previous)):
# Don't split before the key if:
# - The dictionary fits on a line, and
# - The function call isn't part of a builder-style call and
# - The dictionary has one entry and no trailing comma
return False
return True
if (style.Get('SPLIT_BEFORE_DICT_SET_GENERATOR') and
subtypes.DICT_SET_GENERATOR in current.subtypes):
# Split before a dict/set generator.
return True
if (subtypes.DICTIONARY_VALUE in current.subtypes or
(previous.is_pseudo and previous.value == '(' and
not current.is_comment)):
# Split before the dictionary value if we can't fit every dictionary
# entry on its own line.
if not current.OpensScope():
opening = _GetOpeningBracket(current)
if not self._EachDictEntryFitsOnOneLine(opening):
return style.Get('ALLOW_SPLIT_BEFORE_DICT_VALUE')
if previous.value == '{':
# Split if the dict/set cannot fit on one line and ends in a comma.
closing = previous.matching_bracket
if (not self._FitsOnLine(previous, closing) and
closing.previous_token.value == ','):
self.stack[-1].split_before_closing_bracket = True
return True
###########################################################################
# Argument List Splitting
if (style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and not current.is_comment and
subtypes.DEFAULT_OR_NAMED_ASSIGN_ARG_LIST in current.subtypes):
if (previous.value not in {'=', ':', '*', '**'} and
current.value not in ':=,)' and not _IsFunctionDefinition(previous)):
# If we're going to split the lines because of named arguments, then we
# want to split after the opening bracket as well. But not when this is
# part of a function definition.
if previous.value == '(':
# Make sure we don't split after the opening bracket if the
# continuation indent is greater than the opening bracket:
#
# a(
# b=1,
# c=2)
if (self._FitsOnLine(previous, previous.matching_bracket) and
logical_line.IsSurroundedByBrackets(previous)):
# An argument to a function is a function call with named
# assigns.
return False
# Don't split if not required
if (not style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') and
not style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
return False
column = self.column - self.stack[-1].last_space
return column > style.Get('CONTINUATION_INDENT_WIDTH')
opening = _GetOpeningBracket(current)
if opening:
return not self._ContainerFitsOnStartLine(opening)
if (current.value not in '{)' and previous.value == '(' and
self._ArgumentListHasDictionaryEntry(current)):
return True
if style.Get('SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED'):
# Split before arguments in a function call or definition if the
# arguments are terminated by a comma.
opening = _GetOpeningBracket(current)
if opening and opening.previous_token and opening.previous_token.is_name:
if previous.value in '(,':
if opening.matching_bracket.previous_token.value == ',':
return True
if ((current.is_name or current.value in {'*', '**'}) and
previous.value == ','):
# If we have a function call within an argument list and it won't fit on
# the remaining line, but it will fit on a line by itself, then go ahead
# and split before the call.
opening = _GetOpeningBracket(current)
if (opening and opening.value == '(' and opening.previous_token and
(opening.previous_token.is_name or
opening.previous_token.value in {'*', '**'})):
is_func_call = False
opening = current
while opening:
if opening.value == '(':
is_func_call = True
break
if (not (opening.is_name or opening.value in {'*', '**'}) and
opening.value != '.'):
break
opening = opening.next_token
if is_func_call:
if (not self._FitsOnLine(current, opening.matching_bracket) or
(opening.matching_bracket.next_token and
opening.matching_bracket.next_token.value != ',' and
not opening.matching_bracket.next_token.ClosesScope())):
return True
pprevious = previous.previous_token
# A function call with a dictionary as its first argument may result in
# unreadable formatting if the dictionary spans multiple lines. The
# dictionary itself is formatted just fine, but the remaining arguments are
# indented too far:
#
# function_call({
# KEY_1: 'value one',
# KEY_2: 'value two',
# },
# default=False)
if (current.value == '{' and previous.value == '(' and pprevious and
pprevious.is_name):
dict_end = current.matching_bracket
next_token = dict_end.next_token
if next_token.value == ',' and not self._FitsOnLine(current, dict_end):
return True
if (current.is_name and pprevious and pprevious.is_name and
previous.value == '('):
if (not self._FitsOnLine(previous, previous.matching_bracket) and
_IsFunctionCallWithArguments(current)):
# There is a function call, with more than 1 argument, where the first
# argument is itself a function call with arguments that does not fit
# into the line. In this specific case, if we split after the first
# argument's opening '(', then the formatting will look bad for the
# rest of the arguments. E.g.:
#
# outer_function_call(inner_function_call(
# inner_arg1, inner_arg2),
# outer_arg1, outer_arg2)
#
# Instead, enforce a split before that argument to keep things looking
# good.
if (style.Get('SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN') or
style.Get('SPLIT_BEFORE_FIRST_ARGUMENT')):
return True
opening = _GetOpeningBracket(current)
if (opening and opening.value == '(' and opening.previous_token and
(opening.previous_token.is_name or
opening.previous_token.value in {'*', '**'})):
is_func_call = False
opening = current
while opening:
if opening.value == '(':
is_func_call = True
break
if (not (opening.is_name or opening.value in {'*', '**'}) and
opening.value != '.'):
break
opening = opening.next_token
if is_func_call:
if (not self._FitsOnLine(current, opening.matching_bracket) or
(opening.matching_bracket.next_token and
opening.matching_bracket.next_token.value != ',' and
not opening.matching_bracket.next_token.ClosesScope())):
return True
if (previous.OpensScope() and not current.OpensScope() and
not current.is_comment and
subtypes.SUBSCRIPT_BRACKET not in previous.subtypes):
if pprevious and not pprevious.is_keyword and not pprevious.is_name:
# We want to split if there's a comment in the container.
token = current
while token != previous.matching_bracket:
if token.is_comment:
return True
token = token.next_token
if previous.value == '(':
pptoken = previous.previous_token
if not pptoken or not pptoken.is_name:
# Split after the opening of a tuple if it doesn't fit on the current
# line and it's not a function call.
if self._FitsOnLine(previous, previous.matching_bracket):
return False
elif not self._FitsOnLine(previous, previous.matching_bracket):
if len(previous.container_elements) == 1:
return False
elements = previous.container_elements + [previous.matching_bracket]
i = 1
while i < len(elements):
if (not elements[i - 1].OpensScope() and
not self._FitsOnLine(elements[i - 1], elements[i])):
return True
i += 1
if (self.column_limit - self.column) / float(self.column_limit) < 0.3:
# Try not to squish all of the arguments off to the right.
return True
else:
# Split after the opening of a container if it doesn't fit on the
# current line.
if not self._FitsOnLine(previous, previous.matching_bracket):
return True
###########################################################################
# Original Formatting Splitting
# These checks rely upon the original formatting. This is in order to
# attempt to keep hand-written code in the same condition as it was before.
# However, this may cause the formatter to fail to be idempotent.
if (style.Get('SPLIT_BEFORE_BITWISE_OPERATOR') and current.value in '&|' and
previous.lineno < current.lineno):
# Retain the split before a bitwise operator.
return True
if (current.is_comment and
previous.lineno < current.lineno - current.value.count('\n')):
# If a comment comes in the middle of a logical line (like an if
# conditional with comments interspersed), then we want to split if the
# original comments were on a separate line.
return True
return False
def AddTokenToState(self, newline, dry_run, must_split=False):
"""Add a token to the format decision state.
Allow the heuristic to try out adding the token with and without a newline.
Later on, the algorithm will determine which one has the lowest penalty.
Arguments:
newline: (bool) Add the token on a new line if True.
dry_run: (bool) Don't commit whitespace changes to the FormatToken if
True.
must_split: (bool) A newline was required before this token.
Returns:
The penalty of splitting after the current token.
"""
self._PushParameterListState(newline)
penalty = 0
if newline:
penalty = self._AddTokenOnNewline(dry_run, must_split)
else:
self._AddTokenOnCurrentLine(dry_run)
penalty += self._CalculateComprehensionState(newline)
penalty += self._CalculateParameterListState(newline)
return self.MoveStateToNextToken() + penalty
def _AddTokenOnCurrentLine(self, dry_run):
"""Puts the token on the current line.
Appends the next token to the state and updates information necessary for
indentation.
Arguments:
dry_run: (bool) Commit whitespace changes to the FormatToken if True.
"""
current = self.next_token
previous = current.previous_token
spaces = current.spaces_required_before
if isinstance(spaces, list):
# Don't set the value here, as we need to look at the lines near
# this one to determine the actual horizontal alignment value.
spaces = 0
if not dry_run:
current.AddWhitespacePrefix(newlines_before=0, spaces=spaces)
if previous.OpensScope():
if not current.is_comment:
# Align closing scopes that are on a newline with the opening scope:
#
# foo = [a,
# b,
# ]
self.stack[-1].closing_scope_indent = self.column - 1
if style.Get('ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT'):
self.stack[-1].closing_scope_indent += 1
self.stack[-1].indent = self.column + spaces
else:
self.stack[-1].closing_scope_indent = (
self.stack[-1].indent - style.Get('CONTINUATION_INDENT_WIDTH'))
self.column += spaces
def _AddTokenOnNewline(self, dry_run, must_split):
"""Adds a line break and necessary indentation.
Appends the next token to the state and updates information necessary for
indentation.
Arguments:
dry_run: (bool) Don't commit whitespace changes to the FormatToken if
True.
must_split: (bool) A newline was required before this token.
Returns:
The split penalty for splitting after the current state.
"""
current = self.next_token
previous = current.previous_token
self.column = self._GetNewlineColumn()
if not dry_run:
indent_level = self.line.depth
spaces = self.column
if spaces:
spaces -= indent_level * style.Get('INDENT_WIDTH')
current.AddWhitespacePrefix(
newlines_before=1, spaces=spaces, indent_level=indent_level)
if not current.is_comment:
self.stack[-1].last_space = self.column
self.lowest_level_on_line = self.paren_level
if (previous.OpensScope() or
(previous.is_comment and previous.previous_token is not None and
previous.previous_token.OpensScope())):
dedent = (style.Get('CONTINUATION_INDENT_WIDTH'),
0)[style.Get('INDENT_CLOSING_BRACKETS')]
self.stack[-1].closing_scope_indent = (
max(0, self.stack[-1].indent - dedent))
self.stack[-1].split_before_closing_bracket = True
# Calculate the split penalty.
penalty = current.split_penalty
if must_split:
# Don't penalize for a must split.
return penalty
if previous.is_pseudo and previous.value == '(':
# Small penalty for splitting after a pseudo paren.
penalty += 50
# Add a penalty for each increasing newline we add, but don't penalize for
# splitting before an if-expression or list comprehension.
if current.value not in {'if', 'for'}:
last = self.stack[-1]
last.num_line_splits += 1
penalty += (
style.Get('SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT') *
last.num_line_splits)
if current.OpensScope() and previous.OpensScope():
# Prefer to keep opening brackets coalesced (unless it's at the beginning
# of a function call).
pprev = previous.previous_token
if not pprev or not pprev.is_name:
penalty += 10
return penalty + 10
def MoveStateToNextToken(self):
"""Calculate format decision state information and move onto the next token.
Before moving onto the next token, we first calculate the format decision
state given the current token and its formatting decisions. Then the format
decision state is set up so that the next token can be added.
Returns:
The penalty for the number of characters over the column limit.
"""
current = self.next_token
if not current.OpensScope() and not current.ClosesScope():
self.lowest_level_on_line = min(self.lowest_level_on_line,
self.paren_level)
# If we encounter an opening bracket, we add a level to our stack to prepare
# for the subsequent tokens.
if current.OpensScope():
last = self.stack[-1]
new_indent = style.Get('CONTINUATION_INDENT_WIDTH') + last.last_space
self.stack.append(_ParenState(new_indent, self.stack[-1].last_space))
self.paren_level += 1
# If we encounter a closing bracket, we can remove a level from our
# parenthesis stack.
if len(self.stack) > 1 and current.ClosesScope():
if subtypes.DICTIONARY_KEY_PART in current.subtypes:
self.stack[-2].last_space = self.stack[-2].indent
else:
self.stack[-2].last_space = self.stack[-1].last_space
self.stack.pop()
self.paren_level -= 1
is_multiline_string = current.is_string and '\n' in current.value
if is_multiline_string:
# This is a multiline string. Only look at the first line.
self.column += len(current.value.split('\n')[0])
elif not current.is_pseudo:
self.column += len(current.value)
self.next_token = self.next_token.next_token
# Calculate the penalty for overflowing the column limit.
penalty = 0
if (not current.is_pylint_comment and not current.is_pytype_comment and
not current.is_copybara_comment and self.column > self.column_limit):
excess_characters = self.column - self.column_limit
penalty += style.Get('SPLIT_PENALTY_EXCESS_CHARACTER') * excess_characters
if is_multiline_string:
# If this is a multiline string, the column is actually the
# end of the last line in the string.
self.column = len(current.value.split('\n')[-1])
return penalty
def _CalculateComprehensionState(self, newline):
"""Makes required changes to comprehension state.
Args:
newline: Whether the current token is to be added on a newline.
Returns:
The penalty for the token-newline combination given the current
comprehension state.
"""
current = self.next_token
previous = current.previous_token
top_of_stack = self.comp_stack[-1] if self.comp_stack else None
penalty = 0
if top_of_stack is not None:
# Check if the token terminates the current comprehension.
if current == top_of_stack.closing_bracket:
last = self.comp_stack.pop()
# Lightly penalize comprehensions that are split across multiple lines.
if last.has_interior_split:
penalty += style.Get('SPLIT_PENALTY_COMPREHENSION')
return penalty
if newline:
top_of_stack.has_interior_split = True
if (subtypes.COMP_EXPR in current.subtypes and
subtypes.COMP_EXPR not in previous.subtypes):
self.comp_stack.append(object_state.ComprehensionState(current))
return penalty
if current.value == 'for' and subtypes.COMP_FOR in current.subtypes:
if top_of_stack.for_token is not None:
# Treat nested comprehensions like normal comp_if expressions.
# Example:
# my_comp = [
# a.qux + b.qux
# for a in foo
# --> for b in bar <--
# if a.zut + b.zut
# ]
if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
top_of_stack.has_split_at_for != newline and
(top_of_stack.has_split_at_for or
not top_of_stack.HasTrivialExpr())):
penalty += split_penalty.UNBREAKABLE
else:
top_of_stack.for_token = current
top_of_stack.has_split_at_for = newline
# Try to keep trivial expressions on the same line as the comp_for.
if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and newline and
top_of_stack.HasTrivialExpr()):
penalty += split_penalty.CONNECTED
if (subtypes.COMP_IF in current.subtypes and
subtypes.COMP_IF not in previous.subtypes):
# Penalize breaking at comp_if when it doesn't match the newline structure
# in the rest of the comprehension.
if (style.Get('SPLIT_COMPLEX_COMPREHENSION') and
top_of_stack.has_split_at_for != newline and
(top_of_stack.has_split_at_for or not top_of_stack.HasTrivialExpr())):
penalty += split_penalty.UNBREAKABLE
return penalty
def _PushParameterListState(self, newline):
"""Push a new parameter list state for a function definition.
Args:
newline: Whether the current token is to be added on a newline.
"""
current = self.next_token
previous = current.previous_token
if _IsFunctionDefinition(previous):
first_param_column = previous.total_length + self.stack[-2].indent
self.param_list_stack.append(
object_state.ParameterListState(previous, newline,
first_param_column))
def _CalculateParameterListState(self, newline):
"""Makes required changes to parameter list state.
Args:
newline: Whether the current token is to be added on a newline.
Returns:
The penalty for the token-newline combination given the current
parameter state.
"""
current = self.next_token
previous = current.previous_token
penalty = 0
if _IsFunctionDefinition(previous):
first_param_column = previous.total_length + self.stack[-2].indent
if not newline:
param_list = self.param_list_stack[-1]
if param_list.parameters and param_list.has_typed_return:
last_param = param_list.parameters[-1].first_token
last_token = _LastTokenInLine(previous.matching_bracket)
total_length = last_token.total_length
total_length -= last_param.total_length - len(last_param.value)
if total_length + self.column > self.column_limit:
# If we need to split before the trailing code of a function
# definition with return types, then also split before the opening
# parameter so that the trailing bit isn't indented on a line by
# itself:
#
# def rrrrrrrrrrrrrrrrrrrrrr(ccccccccccccccccccccccc: Tuple[Text]
# ) -> List[Tuple[Text, Text]]:
# pass
penalty += split_penalty.VERY_STRONGLY_CONNECTED
return penalty
if first_param_column <= self.column:
# Make sure we don't split after the opening bracket if the
# continuation indent is greater than the opening bracket:
#
# a(
# b=1,
# c=2)
penalty += split_penalty.VERY_STRONGLY_CONNECTED
return penalty
if not self.param_list_stack:
return penalty
param_list = self.param_list_stack[-1]
if current == self.param_list_stack[-1].closing_bracket:
self.param_list_stack.pop() # We're done with this state.
if newline and param_list.has_typed_return:
if param_list.split_before_closing_bracket:
penalty -= split_penalty.STRONGLY_CONNECTED
elif param_list.LastParamFitsOnLine(self.column):
penalty += split_penalty.STRONGLY_CONNECTED
if (not newline and param_list.has_typed_return and
param_list.has_split_before_first_param):
# Prefer splitting before the closing bracket if there's a return type
# and we've already split before the first parameter.
penalty += split_penalty.STRONGLY_CONNECTED
return penalty
if not param_list.parameters:
return penalty
if newline:
if self._FitsOnLine(param_list.parameters[0].first_token,
_LastTokenInLine(param_list.closing_bracket)):
penalty += split_penalty.STRONGLY_CONNECTED
if (not newline and style.Get('SPLIT_BEFORE_NAMED_ASSIGNS') and
param_list.has_default_values and
current != param_list.parameters[0].first_token and
current != param_list.closing_bracket and
subtypes.PARAMETER_START in current.subtypes):
# If we want to split before parameters when there are named assigns,
# then add a penalty for not splitting.
penalty += split_penalty.STRONGLY_CONNECTED
return penalty
def _IndentWithContinuationAlignStyle(self, column):
if column == 0:
return column
align_style = style.Get('CONTINUATION_ALIGN_STYLE')
if align_style == 'FIXED':
return ((self.line.depth * style.Get('INDENT_WIDTH')) +
style.Get('CONTINUATION_INDENT_WIDTH'))
if align_style == 'VALIGN-RIGHT':
indent_width = style.Get('INDENT_WIDTH')
return indent_width * int((column + indent_width - 1) / indent_width)
return column
def _GetNewlineColumn(self):
"""Return the new column on the newline."""
current = self.next_token
previous = current.previous_token
top_of_stack = self.stack[-1]
if isinstance(current.spaces_required_before, list):
# Don't set the value here, as we need to look at the lines near
# this one to determine the actual horizontal alignment value.
return 0
elif current.spaces_required_before > 2 or self.line.disable:
return current.spaces_required_before
cont_aligned_indent = self._IndentWithContinuationAlignStyle(
top_of_stack.indent)
if current.OpensScope():
return cont_aligned_indent if self.paren_level else self.first_indent
if current.ClosesScope():
if (previous.OpensScope() or
(previous.is_comment and previous.previous_token is not None and
previous.previous_token.OpensScope())):
return max(0,
top_of_stack.indent - style.Get('CONTINUATION_INDENT_WIDTH'))
return top_of_stack.closing_scope_indent
if (previous and previous.is_string and current.is_string and
subtypes.DICTIONARY_VALUE in current.subtypes):
return previous.column
if style.Get('INDENT_DICTIONARY_VALUE'):
if previous and (previous.value == ':' or previous.is_pseudo):
if subtypes.DICTIONARY_VALUE in current.subtypes:
return top_of_stack.indent
if (not self.param_list_stack and _IsCompoundStatement(self.line.first) and
(not (style.Get('DEDENT_CLOSING_BRACKETS') or
style.Get('INDENT_CLOSING_BRACKETS')) or
style.Get('SPLIT_BEFORE_FIRST_ARGUMENT'))):
token_indent = (
len(self.line.first.whitespace_prefix.split('\n')[-1]) +
style.Get('INDENT_WIDTH'))
if token_indent == top_of_stack.indent:
return token_indent + style.Get('CONTINUATION_INDENT_WIDTH')
if (self.param_list_stack and
not self.param_list_stack[-1].SplitBeforeClosingBracket(
top_of_stack.indent) and top_of_stack.indent
== ((self.line.depth + 1) * style.Get('INDENT_WIDTH'))):
if (subtypes.PARAMETER_START in current.subtypes or
(previous.is_comment and
subtypes.PARAMETER_START in previous.subtypes)):
return top_of_stack.indent + style.Get('CONTINUATION_INDENT_WIDTH')
return cont_aligned_indent
def _FitsOnLine(self, start, end):
"""Determines if line between start and end can fit on the current line."""
length = end.total_length - start.total_length
if not start.is_pseudo:
length += len(start.value)
return length + self.column <= self.column_limit
def _EachDictEntryFitsOnOneLine(self, opening):
"""Determine if each dict elems can fit on one line."""
def PreviousNonCommentToken(tok):
tok = tok.previous_token
while tok.is_comment:
tok = tok.previous_token
return tok
def ImplicitStringConcatenation(tok):
num_strings = 0
if tok.is_pseudo:
tok = tok.next_token
while tok.is_string:
num_strings += 1
tok = tok.next_token
return num_strings > 1
def DictValueIsContainer(opening, closing):
"""Return true if the dictionary value is a container."""
if not opening or not closing:
return False
colon = opening.previous_token
while colon:
if not colon.is_pseudo:
break
colon = colon.previous_token
if not colon or colon.value != ':':
return False
key = colon.previous_token
if not key:
return False
return subtypes.DICTIONARY_KEY_PART in key.subtypes
closing = opening.matching_bracket
entry_start = opening.next_token
current = opening.next_token.next_token
while current and current != closing:
if subtypes.DICTIONARY_KEY in current.subtypes:
prev = PreviousNonCommentToken(current)
if prev.value == ',':
prev = PreviousNonCommentToken(prev.previous_token)
if not DictValueIsContainer(prev.matching_bracket, prev):
length = prev.total_length - entry_start.total_length
length += len(entry_start.value)
if length + self.stack[-2].indent >= self.column_limit:
return False
entry_start = current
if current.OpensScope():
if ((current.value == '{' or
(current.is_pseudo and current.next_token.value == '{') and
subtypes.DICTIONARY_VALUE in current.subtypes) or
ImplicitStringConcatenation(current)):
# A dictionary entry that cannot fit on a single line shouldn't matter
# to this calculation. If it can't fit on a single line, then the
# opening should be on the same line as the key and the rest on
# newlines after it. But the other entries should be on single lines
# if possible.
if current.matching_bracket:
current = current.matching_bracket
while current:
if current == closing:
return True
if subtypes.DICTIONARY_KEY in current.subtypes:
entry_start = current
break
current = current.next_token
else:
current = current.matching_bracket
else:
current = current.next_token
# At this point, current is the closing bracket. Go back one to get the end
# of the dictionary entry.
current = PreviousNonCommentToken(current)
length = current.total_length - entry_start.total_length
length += len(entry_start.value)
return length + self.stack[-2].indent <= self.column_limit
def _ArgumentListHasDictionaryEntry(self, token):
"""Check if the function argument list has a dictionary as an arg."""
if _IsArgumentToFunction(token):
while token:
if token.value == '{':
length = token.matching_bracket.total_length - token.total_length
return length + self.stack[-2].indent > self.column_limit
if token.ClosesScope():
break
if token.OpensScope():
token = token.matching_bracket
token = token.next_token
return False
def _ContainerFitsOnStartLine(self, opening):
"""Check if the container can fit on its starting line."""
return (opening.matching_bracket.total_length - opening.total_length +
self.stack[-1].indent) <= self.column_limit
_COMPOUND_STMTS = frozenset(
{'for', 'while', 'if', 'elif', 'with', 'except', 'def', 'class'})
def _IsCompoundStatement(token):
if token.value == 'async':
token = token.next_token
return token.value in _COMPOUND_STMTS
def _IsFunctionDef(token):
if token.value == 'async':
token = token.next_token
return token.value == 'def'
def _IsFunctionCallWithArguments(token):
while token:
if token.value == '(':
token = token.next_token
return token and token.value != ')'
elif token.name not in {'NAME', 'DOT', 'EQUAL'}:
break
token = token.next_token
return False
def _IsArgumentToFunction(token):
bracket = logical_line.IsSurroundedByBrackets(token)
if not bracket or bracket.value != '(':
return False
previous = bracket.previous_token
return previous and previous.is_name
def _GetOpeningBracket(current):
"""Get the opening bracket containing the current token."""
if current.matching_bracket and not current.is_pseudo:
return current if current.OpensScope() else current.matching_bracket
while current:
if current.ClosesScope():
current = current.matching_bracket
elif current.is_pseudo:
current = current.previous_token
elif current.OpensScope():
return current
current = current.previous_token
return None
def _LastTokenInLine(current):
while not current.is_comment and current.next_token:
current = current.next_token
return current
def _IsFunctionDefinition(current):
prev = current.previous_token
return current.value == '(' and prev and subtypes.FUNC_DEF in prev.subtypes
def _IsLastScopeInLine(current):
current = current.matching_bracket
while current:
current = current.next_token
if current and current.OpensScope():
return False
return True
def _IsSingleElementTuple(token):
"""Check if it's a single-element tuple."""
close = token.matching_bracket
token = token.next_token
num_commas = 0
while token != close:
if token.value == ',':
num_commas += 1
token = token.matching_bracket if token.OpensScope() else token.next_token
return num_commas == 1
def _ScopeHasNoCommas(token):
"""Check if the scope has no commas."""
close = token.matching_bracket
token = token.next_token
while token != close:
if token.value == ',':
return False
token = token.matching_bracket if token.OpensScope() else token.next_token
return True
class _ParenState(object):
"""Maintains the state of the bracket enclosures.
A stack of _ParenState objects are kept so that we know how to indent relative
to the brackets.
Attributes:
indent: The column position to which a specified parenthesis level needs to
be indented.
last_space: The column position of the last space on each level.
closing_scope_indent: The column position of the closing indentation.
split_before_closing_bracket: Whether a newline needs to be inserted before
the closing bracket. We only want to insert a newline before the closing
bracket if there also was a newline after the beginning left bracket.
num_line_splits: Number of line splits this _ParenState contains already.
Each subsequent line split gets an increasing penalty.
"""
# TODO(morbo): This doesn't track "bin packing."
def __init__(self, indent, last_space):
self.indent = indent
self.last_space = last_space
self.closing_scope_indent = 0
self.split_before_closing_bracket = False
self.num_line_splits = 0
def Clone(self):
state = _ParenState(self.indent, self.last_space)
state.closing_scope_indent = self.closing_scope_indent
state.split_before_closing_bracket = self.split_before_closing_bracket
state.num_line_splits = self.num_line_splits
return state
def __repr__(self):
return '[indent::%d, last_space::%d, closing_scope_indent::%d]' % (
self.indent, self.last_space, self.closing_scope_indent)
def __eq__(self, other):
return hash(self) == hash(other)
def __ne__(self, other):
return not self == other
def __hash__(self, *args, **kwargs):
return hash((self.indent, self.last_space, self.closing_scope_indent,
self.split_before_closing_bracket, self.num_line_splits))