core/src/main/java/com/google/googlejavaformat/java/JavaInput.java - platform/external/google-java-format - Git at Google

 /*
  * Copyright 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  * in compliance with the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software distributed under the License
  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */

 package com.google.googlejavaformat.java;

 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.collect.Iterables.getLast;
 import static java.nio.charset.StandardCharsets.UTF_8;

 import com.google.common.base.MoreObjects;
 import com.google.common.base.Verify;
 import com.google.common.collect.DiscreteDomain;
 import com.google.common.collect.ImmutableCollection;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableRangeMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Range;
 import com.google.common.collect.RangeSet;
 import com.google.common.collect.TreeRangeSet;
 import com.google.googlejavaformat.Input;
 import com.google.googlejavaformat.Newlines;
 import com.google.googlejavaformat.java.JavacTokens.RawTok;
 import com.sun.tools.javac.file.JavacFileManager;
 import com.sun.tools.javac.parser.Tokens.TokenKind;
 import com.sun.tools.javac.tree.JCTree.JCCompilationUnit;
 import com.sun.tools.javac.util.Context;
 import com.sun.tools.javac.util.Log;
 import com.sun.tools.javac.util.Log.DeferredDiagnosticHandler;
 import com.sun.tools.javac.util.Options;
 import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import javax.tools.Diagnostic;
 import javax.tools.DiagnosticCollector;
 import javax.tools.DiagnosticListener;
 import javax.tools.JavaFileObject;
 import javax.tools.JavaFileObject.Kind;
 import javax.tools.SimpleJavaFileObject;

 /** {@code JavaInput} extends {@link Input} to represent a Java input document. */
 public final class JavaInput extends Input {
   /**
    * A {@code JavaInput} is a sequence of {@link Tok}s that cover the Java input. A {@link Tok} is
    * either a token (if {@code isToken()}), or a non-token, which is a comment (if {@code
    * isComment()}) or a newline (if {@code isNewline()}) or a maximal sequence of other whitespace
    * characters (if {@code isSpaces()}). Each {@link Tok} contains a sequence of characters, an
    * index (sequential starting at {@code 0} for tokens and comments, else {@code -1}), and a
    * ({@code 0}-origin) position in the input. The concatenation of the texts of all the {@link
    * Tok}s equals the input. Each Input ends with a token EOF {@link Tok}, with empty text.
    *
    * <p>A {@code /*} comment possibly contains newlines; a {@code //} comment does not contain the
    * terminating newline character, but is followed by a newline {@link Tok}.
    */
   static final class Tok implements Input.Tok {
     private final int index;
     private final String originalText;
     private final String text;
     private final int position;
     private final int columnI;
     private final boolean isToken;
     private final TokenKind kind;

     /**
      * The {@code Tok} constructor.
      *
      * @param index its index
      * @param originalText its original text, before removing Unicode escapes
      * @param text its text after removing Unicode escapes
      * @param position its {@code 0}-origin position in the input
      * @param columnI its {@code 0}-origin column number in the input
      * @param isToken whether the {@code Tok} is a token
      * @param kind the token kind
      */
     Tok(
         int index,
         String originalText,
         String text,
         int position,
         int columnI,
         boolean isToken,
         TokenKind kind) {
       this.index = index;
       this.originalText = originalText;
       this.text = text;
       this.position = position;
       this.columnI = columnI;
       this.isToken = isToken;
       this.kind = kind;
     }

     @Override
     public int getIndex() {
       return index;
     }

     @Override
     public String getText() {
       return text;
     }

     @Override
     public String getOriginalText() {
       return originalText;
     }

     @Override
     public int length() {
       return originalText.length();
     }

     @Override
     public int getPosition() {
       return position;
     }

     @Override
     public int getColumn() {
       return columnI;
     }

     boolean isToken() {
       return isToken;
     }

     @Override
     public boolean isNewline() {
       return Newlines.isNewline(text);
     }

     @Override
     public boolean isSlashSlashComment() {
       return text.startsWith("//");
     }

     @Override
     public boolean isSlashStarComment() {
       return text.startsWith("/*");
     }

     @Override
     public boolean isJavadocComment() {
       // comments like `/***` are also javadoc, but their formatting probably won't be improved
       // by the javadoc formatter
       return text.startsWith("/**") && text.charAt("/**".length()) != '*' && text.length() > 4;
     }

     @Override
     public boolean isComment() {
       return isSlashSlashComment() || isSlashStarComment();
     }

     @Override
     public String toString() {
       return MoreObjects.toStringHelper(this)
           .add("index", index)
           .add("text", text)
           .add("position", position)
           .add("columnI", columnI)
           .add("isToken", isToken)
           .toString();
     }

     public TokenKind kind() {
       return kind;
     }
   }

   /**
    * A {@link Token} contains a token {@link Tok} and its associated non-tokens; each non-token
    * {@link Tok} belongs to one {@link Token}. Each {@link Token} has an immutable list of its
    * non-tokens that appear before it, and another list of its non-tokens that appear after it. The
    * concatenation of the texts of all the {@link Token}s' {@link Tok}s, each preceded by the texts
    * of its {@code toksBefore} and followed by the texts of its {@code toksAfter}, equals the input.
    */
   static final class Token implements Input.Token {
     private final Tok tok;
     private final ImmutableList<Tok> toksBefore;
     private final ImmutableList<Tok> toksAfter;

     /**
      * Token constructor.
      *
      * @param toksBefore the earlier non-token {link Tok}s assigned to this {@code Token}
      * @param tok this token {@link Tok}
      * @param toksAfter the later non-token {link Tok}s assigned to this {@code Token}
      */
     Token(List<Tok> toksBefore, Tok tok, List<Tok> toksAfter) {
       this.toksBefore = ImmutableList.copyOf(toksBefore);
       this.tok = tok;
       this.toksAfter = ImmutableList.copyOf(toksAfter);
     }

     /**
      * Get the token's {@link Tok}.
      *
      * @return the token's {@link Tok}
      */
     @Override
     public Tok getTok() {
       return tok;
     }

     /**
      * Get the earlier {@link Tok}s assigned to this {@code Token}.
      *
      * @return the earlier {@link Tok}s assigned to this {@code Token}
      */
     @Override
     public ImmutableList<? extends Input.Tok> getToksBefore() {
       return toksBefore;
     }

     /**
      * Get the later {@link Tok}s assigned to this {@code Token}.
      *
      * @return the later {@link Tok}s assigned to this {@code Token}
      */
     @Override
     public ImmutableList<? extends Input.Tok> getToksAfter() {
       return toksAfter;
     }

     @Override
     public String toString() {
       return MoreObjects.toStringHelper(this)
           .add("tok", tok)
           .add("toksBefore", toksBefore)
           .add("toksAfter", toksAfter)
           .toString();
     }
   }

   private final String text; // The input.
   private int kN; // The number of numbered toks (tokens or comments), excluding the EOF.

   /*
    * The following lists record the sequential indices of the {@code Tok}s on each input line. (Only
    * tokens and comments have sequential indices.) Tokens and {@code //} comments lie on just one
    * line; {@code /*} comments can lie on multiple lines. These data structures (along with
    * equivalent ones for the formatted output) let us compute correspondences between the input and
    * output.
    */

   private final ImmutableMap<Integer, Integer> positionToColumnMap; // Map Tok position to column.
   private final ImmutableList<Token> tokens; // The Tokens for this input.
   private final ImmutableRangeMap<Integer, Token> positionTokenMap; // Map position to Token.

   /** Map from Tok index to the associated Token. */
   private final Token[] kToToken;

   /**
    * Input constructor.
    *
    * @param text the input text
    * @throws FormatterException if the input cannot be parsed
    */
   public JavaInput(String text) throws FormatterException {
     this.text = checkNotNull(text);
     setLines(ImmutableList.copyOf(Newlines.lineIterator(text)));
     ImmutableList<Tok> toks = buildToks(text);
     positionToColumnMap = makePositionToColumnMap(toks);
     tokens = buildTokens(toks);
     ImmutableRangeMap.Builder<Integer, Token> tokenLocations = ImmutableRangeMap.builder();
     for (Token token : tokens) {
       Input.Tok end = JavaOutput.endTok(token);
       int upper = end.getPosition();
       if (!end.getText().isEmpty()) {
         upper += end.length() - 1;
       }
       tokenLocations.put(Range.closed(JavaOutput.startTok(token).getPosition(), upper), token);
     }
     positionTokenMap = tokenLocations.build();

     // adjust kN for EOF
     kToToken = new Token[kN + 1];
     for (Token token : tokens) {
       for (Input.Tok tok : token.getToksBefore()) {
         if (tok.getIndex() < 0) {
           continue;
         }
         kToToken[tok.getIndex()] = token;
       }
       kToToken[token.getTok().getIndex()] = token;
       for (Input.Tok tok : token.getToksAfter()) {
         if (tok.getIndex() < 0) {
           continue;
         }
         kToToken[tok.getIndex()] = token;
       }
     }
   }

   private static ImmutableMap<Integer, Integer> makePositionToColumnMap(List<Tok> toks) {
     ImmutableMap.Builder<Integer, Integer> builder = ImmutableMap.builder();
     for (Tok tok : toks) {
       builder.put(tok.getPosition(), tok.getColumn());
     }
     return builder.build();
   }

   /**
    * Get the input text.
    *
    * @return the input text
    */
   @Override
   public String getText() {
     return text;
   }

   @Override
   public ImmutableMap<Integer, Integer> getPositionToColumnMap() {
     return positionToColumnMap;
   }

   /** Lex the input and build the list of toks. */
   private ImmutableList<Tok> buildToks(String text) throws FormatterException {
     ImmutableList<Tok> toks = buildToks(text, ImmutableSet.of());
     kN = getLast(toks).getIndex();
     computeRanges(toks);
     return toks;
   }

   /**
    * Lex the input and build the list of toks.
    *
    * @param text the text to be lexed.
    * @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found,
    *     the returned list will include tokens up to but not including that token.
    */
   static ImmutableList<Tok> buildToks(String text, ImmutableSet<TokenKind> stopTokens)
       throws FormatterException {
     stopTokens = ImmutableSet.<TokenKind>builder().addAll(stopTokens).add(TokenKind.EOF).build();
     Context context = new Context();
     Options.instance(context).put("--enable-preview", "true");
     new JavacFileManager(context, true, UTF_8);
     DiagnosticCollector<JavaFileObject> diagnosticCollector = new DiagnosticCollector<>();
     context.put(DiagnosticListener.class, diagnosticCollector);
     Log log = Log.instance(context);
     log.useSource(
         new SimpleJavaFileObject(URI.create("Source.java"), Kind.SOURCE) {
           @Override
           public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
             return text;
           }
         });
     DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler(log);
     ImmutableList<RawTok> rawToks = JavacTokens.getTokens(text, context, stopTokens);
     if (diagnostics.getDiagnostics().stream().anyMatch(d -> d.getKind() == Diagnostic.Kind.ERROR)) {
       return ImmutableList.of(new Tok(0, "", "", 0, 0, true, null)); // EOF
     }
     int kN = 0;
     List<Tok> toks = new ArrayList<>();
     int charI = 0;
     int columnI = 0;
     for (RawTok t : rawToks) {
       if (stopTokens.contains(t.kind())) {
         break;
       }
       int charI0 = t.pos();
       // Get string, possibly with Unicode escapes.
       String originalTokText = text.substring(charI0, t.endPos());
       String tokText =
           t.kind() == TokenKind.STRINGLITERAL
               ? t.stringVal() // Unicode escapes removed.
               : originalTokText;
       char tokText0 = tokText.charAt(0); // The token's first character.
       final boolean isToken; // Is this tok a token?
       final boolean isNumbered; // Is this tok numbered? (tokens and comments)
       String extraNewline = null; // Extra newline at end?
       List<String> strings = new ArrayList<>();
       if (Character.isWhitespace(tokText0)) {
         isToken = false;
         isNumbered = false;
         Iterator<String> it = Newlines.lineIterator(originalTokText);
         while (it.hasNext()) {
           String line = it.next();
           String newline = Newlines.getLineEnding(line);
           if (newline != null) {
             String spaces = line.substring(0, line.length() - newline.length());
             if (!spaces.isEmpty()) {
               strings.add(spaces);
             }
             strings.add(newline);
           } else if (!line.isEmpty()) {
             strings.add(line);
           }
         }
       } else if (tokText.startsWith("'") || tokText.startsWith("\"")) {
         isToken = true;
         isNumbered = true;
         strings.add(originalTokText);
       } else if (tokText.startsWith("//") || tokText.startsWith("/*")) {
         // For compatibility with an earlier lexer, the newline after a // comment is its own tok.
         if (tokText.startsWith("//")
             && (originalTokText.endsWith("\n") || originalTokText.endsWith("\r"))) {
           extraNewline = Newlines.getLineEnding(originalTokText);
           tokText = tokText.substring(0, tokText.length() - extraNewline.length());
           originalTokText =
               originalTokText.substring(0, originalTokText.length() - extraNewline.length());
         }
         isToken = false;
         isNumbered = true;
         strings.add(originalTokText);
       } else if (Character.isJavaIdentifierStart(tokText0)
           || Character.isDigit(tokText0)
           || (tokText0 == '.' && tokText.length() > 1 && Character.isDigit(tokText.charAt(1)))) {
         // Identifier, keyword, or numeric literal (a dot may begin a number, as in .2D).
         isToken = true;
         isNumbered = true;
         strings.add(tokText);
       } else {
         // Other tokens ("+" or "++" or ">>" are broken into one-character toks, because ">>"
         // cannot be lexed without syntactic knowledge. This implementation fails if the token
         // contains Unicode escapes.
         isToken = true;
         isNumbered = true;
         for (char c : tokText.toCharArray()) {
           strings.add(String.valueOf(c));
         }
       }
       if (strings.size() == 1) {
         toks.add(
             new Tok(
                 isNumbered ? kN++ : -1,
                 originalTokText,
                 tokText,
                 charI,
                 columnI,
                 isToken,
                 t.kind()));
         charI += originalTokText.length();
         columnI = updateColumn(columnI, originalTokText);

       } else {
         if (strings.size() != 1 && !tokText.equals(originalTokText)) {
           throw new FormatterException(
               "Unicode escapes not allowed in whitespace or multi-character operators");
         }
         for (String str : strings) {
           toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, null));
           charI += str.length();
           columnI = updateColumn(columnI, originalTokText);
         }
       }
       if (extraNewline != null) {
         toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, null));
         columnI = 0;
         charI += extraNewline.length();
       }
     }
     toks.add(new Tok(kN, "", "", charI, columnI, true, null)); // EOF tok.
     return ImmutableList.copyOf(toks);
   }

   private static int updateColumn(int columnI, String originalTokText) {
     Integer last = Iterators.getLast(Newlines.lineOffsetIterator(originalTokText));
     if (last > 0) {
       columnI = originalTokText.length() - last;
     } else {
       columnI += originalTokText.length();
     }
     return columnI;
   }

   private static ImmutableList<Token> buildTokens(List<Tok> toks) {
     ImmutableList.Builder<Token> tokens = ImmutableList.builder();
     int k = 0;
     int kN = toks.size();

     // Remaining non-tokens before the token go here.
     ImmutableList.Builder<Tok> toksBefore = ImmutableList.builder();

     OUTERMOST:
     while (k < kN) {
       while (!toks.get(k).isToken()) {
         Tok tok = toks.get(k++);
         toksBefore.add(tok);
         if (isParamComment(tok)) {
           while (toks.get(k).isNewline()) {
             // drop newlines after parameter comments
             k++;
           }
         }
       }
       Tok tok = toks.get(k++);

       // Non-tokens starting on the same line go here too.
       ImmutableList.Builder<Tok> toksAfter = ImmutableList.builder();
       OUTER:
       while (k < kN && !toks.get(k).isToken()) {
         // Don't attach inline comments to certain leading tokens, e.g. for `f(/*flag1=*/true).
         //
         // Attaching inline comments to the right token is hard, and this barely
         // scratches the surface. But it's enough to do a better job with parameter
         // name comments.
         //
         // TODO(cushon): find a better strategy.
         if (toks.get(k).isSlashStarComment()) {
           switch (tok.getText()) {
             case "(":
             case "<":
             case ".":
               break OUTER;
             default:
               break;
           }
         }
         if (toks.get(k).isJavadocComment()) {
           switch (tok.getText()) {
             case ";":
               break OUTER;
             default:
               break;
           }
         }
         if (isParamComment(toks.get(k))) {
           tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
           toksBefore = ImmutableList.<Tok>builder().add(toks.get(k++));
           // drop newlines after parameter comments
           while (toks.get(k).isNewline()) {
             k++;
           }
           continue OUTERMOST;
         }
         Tok nonTokenAfter = toks.get(k++);
         toksAfter.add(nonTokenAfter);
         if (Newlines.containsBreaks(nonTokenAfter.getText())) {
           break;
         }
       }
       tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
       toksBefore = ImmutableList.builder();
     }
     return tokens.build();
   }

   private static boolean isParamComment(Tok tok) {
     return tok.isSlashStarComment()
         && tok.getText().matches("\\/\\*[A-Za-z0-9\\s_\\-]+=\\s*\\*\\/");
   }

   /**
    * Convert from an offset and length flag pair to a token range.
    *
    * @param offset the {@code 0}-based offset in characters
    * @param length the length in characters
    * @return the {@code 0}-based {@link Range} of tokens
    * @throws FormatterException if offset + length is outside the file
    */
   Range<Integer> characterRangeToTokenRange(int offset, int length) throws FormatterException {
     int requiredLength = offset + length;
     if (requiredLength > text.length()) {
       throw new FormatterException(
           String.format(
               "error: invalid length %d, offset + length (%d) is outside the file",
               length, requiredLength));
     }
     if (length < 0) {
       return EMPTY_RANGE;
     }
     if (length == 0) {
       // 0 stands for "format the line under the cursor"
       length = 1;
     }
     ImmutableCollection<Token> enclosed =
         getPositionTokenMap()
             .subRangeMap(Range.closedOpen(offset, offset + length))
             .asMapOfRanges()
             .values();
     if (enclosed.isEmpty()) {
       return EMPTY_RANGE;
     }
     return Range.closedOpen(
         enclosed.iterator().next().getTok().getIndex(), getLast(enclosed).getTok().getIndex() + 1);
   }

   /**
    * Get the number of toks.
    *
    * @return the number of toks, including the EOF tok
    */
   @Override
   public int getkN() {
     return kN;
   }

   /**
    * Get the Token by index.
    *
    * @param k the token index
    */
   @Override
   public Token getToken(int k) {
     return kToToken[k];
   }

   /**
    * Get the input tokens.
    *
    * @return the input tokens
    */
   @Override
   public ImmutableList<? extends Input.Token> getTokens() {
     return tokens;
   }

   /**
    * Get the navigable map from position to {@link Token}. Used to look for tokens following a given
    * one, and to implement the --offset and --length flags to reformat a character range in the
    * input file.
    *
    * @return the navigable map from position to {@link Token}
    */
   @Override
   public ImmutableRangeMap<Integer, Token> getPositionTokenMap() {
     return positionTokenMap;
   }

   @Override
   public String toString() {
     return MoreObjects.toStringHelper(this)
         .add("tokens", tokens)
         .add("super", super.toString())
         .toString();
   }

   private JCCompilationUnit unit;

   @Override
   public int getLineNumber(int inputPosition) {
     Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
     return unit.getLineMap().getLineNumber(inputPosition);
   }

   @Override
   public int getColumnNumber(int inputPosition) {
     Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
     return unit.getLineMap().getColumnNumber(inputPosition);
   }

   // TODO(cushon): refactor JavaInput so the CompilationUnit can be passed into
   // the constructor.
   public void setCompilationUnit(JCCompilationUnit unit) {
     this.unit = unit;
   }

   public RangeSet<Integer> characterRangesToTokenRanges(Collection<Range<Integer>> characterRanges)
       throws FormatterException {
     RangeSet<Integer> tokenRangeSet = TreeRangeSet.create();
     for (Range<Integer> characterRange0 : characterRanges) {
       Range<Integer> characterRange = characterRange0.canonical(DiscreteDomain.integers());
       tokenRangeSet.add(
           characterRangeToTokenRange(
               characterRange.lowerEndpoint(),
               characterRange.upperEndpoint() - characterRange.lowerEndpoint()));
     }
     return tokenRangeSet;
   }
 }
	/*
	* Copyright 2015 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
	* in compliance with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software distributed under the License
	* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
	* or implied. See the License for the specific language governing permissions and limitations under
	* the License.
	*/

	package com.google.googlejavaformat.java;

	import static com.google.common.base.Preconditions.checkNotNull;
	import static com.google.common.collect.Iterables.getLast;
	import static java.nio.charset.StandardCharsets.UTF_8;

	import com.google.common.base.MoreObjects;
	import com.google.common.base.Verify;
	import com.google.common.collect.DiscreteDomain;
	import com.google.common.collect.ImmutableCollection;
	import com.google.common.collect.ImmutableList;
	import com.google.common.collect.ImmutableMap;
	import com.google.common.collect.ImmutableRangeMap;
	import com.google.common.collect.ImmutableSet;
	import com.google.common.collect.Iterators;
	import com.google.common.collect.Range;
	import com.google.common.collect.RangeSet;
	import com.google.common.collect.TreeRangeSet;
	import com.google.googlejavaformat.Input;
	import com.google.googlejavaformat.Newlines;
	import com.google.googlejavaformat.java.JavacTokens.RawTok;
	import com.sun.tools.javac.file.JavacFileManager;
	import com.sun.tools.javac.parser.Tokens.TokenKind;
	import com.sun.tools.javac.tree.JCTree.JCCompilationUnit;
	import com.sun.tools.javac.util.Context;
	import com.sun.tools.javac.util.Log;
	import com.sun.tools.javac.util.Log.DeferredDiagnosticHandler;
	import com.sun.tools.javac.util.Options;
	import java.io.IOException;
	import java.net.URI;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Iterator;
	import java.util.List;
	import javax.tools.Diagnostic;
	import javax.tools.DiagnosticCollector;
	import javax.tools.DiagnosticListener;
	import javax.tools.JavaFileObject;
	import javax.tools.JavaFileObject.Kind;
	import javax.tools.SimpleJavaFileObject;

	/** {@code JavaInput} extends {@link Input} to represent a Java input document. */
	public final class JavaInput extends Input {
	/**
	* A {@code JavaInput} is a sequence of {@link Tok}s that cover the Java input. A {@link Tok} is
	* either a token (if {@code isToken()}), or a non-token, which is a comment (if {@code
	* isComment()}) or a newline (if {@code isNewline()}) or a maximal sequence of other whitespace
	* characters (if {@code isSpaces()}). Each {@link Tok} contains a sequence of characters, an
	* index (sequential starting at {@code 0} for tokens and comments, else {@code -1}), and a
	* ({@code 0}-origin) position in the input. The concatenation of the texts of all the {@link
	* Tok}s equals the input. Each Input ends with a token EOF {@link Tok}, with empty text.
	*
	* <p>A {@code /*} comment possibly contains newlines; a {@code //} comment does not contain the
	* terminating newline character, but is followed by a newline {@link Tok}.
	*/
	static final class Tok implements Input.Tok {
	private final int index;
	private final String originalText;
	private final String text;
	private final int position;
	private final int columnI;
	private final boolean isToken;
	private final TokenKind kind;

	/**
	* The {@code Tok} constructor.
	*
	* @param index its index
	* @param originalText its original text, before removing Unicode escapes
	* @param text its text after removing Unicode escapes
	* @param position its {@code 0}-origin position in the input
	* @param columnI its {@code 0}-origin column number in the input
	* @param isToken whether the {@code Tok} is a token
	* @param kind the token kind
	*/
	Tok(
	int index,
	String originalText,
	String text,
	int position,
	int columnI,
	boolean isToken,
	TokenKind kind) {
	this.index = index;
	this.originalText = originalText;
	this.text = text;
	this.position = position;
	this.columnI = columnI;
	this.isToken = isToken;
	this.kind = kind;
	}

	@Override
	public int getIndex() {
	return index;
	}

	@Override
	public String getText() {
	return text;
	}

	@Override
	public String getOriginalText() {
	return originalText;
	}

	@Override
	public int length() {
	return originalText.length();
	}

	@Override
	public int getPosition() {
	return position;
	}

	@Override
	public int getColumn() {
	return columnI;
	}

	boolean isToken() {
	return isToken;
	}

	@Override
	public boolean isNewline() {
	return Newlines.isNewline(text);
	}

	@Override
	public boolean isSlashSlashComment() {
	return text.startsWith("//");
	}

	@Override
	public boolean isSlashStarComment() {
	return text.startsWith("/*");
	}

	@Override
	public boolean isJavadocComment() {
	// comments like `/***` are also javadoc, but their formatting probably won't be improved
	// by the javadoc formatter
	return text.startsWith("/") && text.charAt("/".length()) != '*' && text.length() > 4;
	}

	@Override
	public boolean isComment() {
	return isSlashSlashComment() \|\| isSlashStarComment();
	}

	@Override
	public String toString() {
	return MoreObjects.toStringHelper(this)
	.add("index", index)
	.add("text", text)
	.add("position", position)
	.add("columnI", columnI)
	.add("isToken", isToken)
	.toString();
	}

	public TokenKind kind() {
	return kind;
	}
	}

	/**
	* A {@link Token} contains a token {@link Tok} and its associated non-tokens; each non-token
	* {@link Tok} belongs to one {@link Token}. Each {@link Token} has an immutable list of its
	* non-tokens that appear before it, and another list of its non-tokens that appear after it. The
	* concatenation of the texts of all the {@link Token}s' {@link Tok}s, each preceded by the texts
	* of its {@code toksBefore} and followed by the texts of its {@code toksAfter}, equals the input.
	*/
	static final class Token implements Input.Token {
	private final Tok tok;
	private final ImmutableList<Tok> toksBefore;
	private final ImmutableList<Tok> toksAfter;

	/**
	* Token constructor.
	*
	* @param toksBefore the earlier non-token {link Tok}s assigned to this {@code Token}
	* @param tok this token {@link Tok}
	* @param toksAfter the later non-token {link Tok}s assigned to this {@code Token}
	*/
	Token(List<Tok> toksBefore, Tok tok, List<Tok> toksAfter) {
	this.toksBefore = ImmutableList.copyOf(toksBefore);
	this.tok = tok;
	this.toksAfter = ImmutableList.copyOf(toksAfter);
	}

	/**
	* Get the token's {@link Tok}.
	*
	* @return the token's {@link Tok}
	*/
	@Override
	public Tok getTok() {
	return tok;
	}

	/**
	* Get the earlier {@link Tok}s assigned to this {@code Token}.
	*
	* @return the earlier {@link Tok}s assigned to this {@code Token}
	*/
	@Override
	public ImmutableList<? extends Input.Tok> getToksBefore() {
	return toksBefore;
	}

	/**
	* Get the later {@link Tok}s assigned to this {@code Token}.
	*
	* @return the later {@link Tok}s assigned to this {@code Token}
	*/
	@Override
	public ImmutableList<? extends Input.Tok> getToksAfter() {
	return toksAfter;
	}

	@Override
	public String toString() {
	return MoreObjects.toStringHelper(this)
	.add("tok", tok)
	.add("toksBefore", toksBefore)
	.add("toksAfter", toksAfter)
	.toString();
	}
	}

	private final String text; // The input.
	private int kN; // The number of numbered toks (tokens or comments), excluding the EOF.

	/*
	* The following lists record the sequential indices of the {@code Tok}s on each input line. (Only
	* tokens and comments have sequential indices.) Tokens and {@code //} comments lie on just one
	* line; {@code /*} comments can lie on multiple lines. These data structures (along with
	* equivalent ones for the formatted output) let us compute correspondences between the input and
	* output.
	*/

	private final ImmutableMap<Integer, Integer> positionToColumnMap; // Map Tok position to column.
	private final ImmutableList<Token> tokens; // The Tokens for this input.
	private final ImmutableRangeMap<Integer, Token> positionTokenMap; // Map position to Token.

	/** Map from Tok index to the associated Token. */
	private final Token[] kToToken;

	/**
	* Input constructor.
	*
	* @param text the input text
	* @throws FormatterException if the input cannot be parsed
	*/
	public JavaInput(String text) throws FormatterException {
	this.text = checkNotNull(text);
	setLines(ImmutableList.copyOf(Newlines.lineIterator(text)));
	ImmutableList<Tok> toks = buildToks(text);
	positionToColumnMap = makePositionToColumnMap(toks);
	tokens = buildTokens(toks);
	ImmutableRangeMap.Builder<Integer, Token> tokenLocations = ImmutableRangeMap.builder();
	for (Token token : tokens) {
	Input.Tok end = JavaOutput.endTok(token);
	int upper = end.getPosition();
	if (!end.getText().isEmpty()) {
	upper += end.length() - 1;
	}
	tokenLocations.put(Range.closed(JavaOutput.startTok(token).getPosition(), upper), token);
	}
	positionTokenMap = tokenLocations.build();

	// adjust kN for EOF
	kToToken = new Token[kN + 1];
	for (Token token : tokens) {
	for (Input.Tok tok : token.getToksBefore()) {
	if (tok.getIndex() < 0) {
	continue;
	}
	kToToken[tok.getIndex()] = token;
	}
	kToToken[token.getTok().getIndex()] = token;
	for (Input.Tok tok : token.getToksAfter()) {
	if (tok.getIndex() < 0) {
	continue;
	}
	kToToken[tok.getIndex()] = token;
	}
	}
	}

	private static ImmutableMap<Integer, Integer> makePositionToColumnMap(List<Tok> toks) {
	ImmutableMap.Builder<Integer, Integer> builder = ImmutableMap.builder();
	for (Tok tok : toks) {
	builder.put(tok.getPosition(), tok.getColumn());
	}
	return builder.build();
	}

	/**
	* Get the input text.
	*
	* @return the input text
	*/
	@Override
	public String getText() {
	return text;
	}

	@Override
	public ImmutableMap<Integer, Integer> getPositionToColumnMap() {
	return positionToColumnMap;
	}

	/** Lex the input and build the list of toks. */
	private ImmutableList<Tok> buildToks(String text) throws FormatterException {
	ImmutableList<Tok> toks = buildToks(text, ImmutableSet.of());
	kN = getLast(toks).getIndex();
	computeRanges(toks);
	return toks;
	}

	/**
	* Lex the input and build the list of toks.
	*
	* @param text the text to be lexed.
	* @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found,
	* the returned list will include tokens up to but not including that token.
	*/
	static ImmutableList<Tok> buildToks(String text, ImmutableSet<TokenKind> stopTokens)
	throws FormatterException {
	stopTokens = ImmutableSet.<TokenKind>builder().addAll(stopTokens).add(TokenKind.EOF).build();
	Context context = new Context();
	Options.instance(context).put("--enable-preview", "true");
	new JavacFileManager(context, true, UTF_8);
	DiagnosticCollector<JavaFileObject> diagnosticCollector = new DiagnosticCollector<>();
	context.put(DiagnosticListener.class, diagnosticCollector);
	Log log = Log.instance(context);
	log.useSource(
	new SimpleJavaFileObject(URI.create("Source.java"), Kind.SOURCE) {
	@Override
	public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
	return text;
	}
	});
	DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler(log);
	ImmutableList<RawTok> rawToks = JavacTokens.getTokens(text, context, stopTokens);
	if (diagnostics.getDiagnostics().stream().anyMatch(d -> d.getKind() == Diagnostic.Kind.ERROR)) {
	return ImmutableList.of(new Tok(0, "", "", 0, 0, true, null)); // EOF
	}
	int kN = 0;
	List<Tok> toks = new ArrayList<>();
	int charI = 0;
	int columnI = 0;
	for (RawTok t : rawToks) {
	if (stopTokens.contains(t.kind())) {
	break;
	}
	int charI0 = t.pos();
	// Get string, possibly with Unicode escapes.
	String originalTokText = text.substring(charI0, t.endPos());
	String tokText =
	t.kind() == TokenKind.STRINGLITERAL
	? t.stringVal() // Unicode escapes removed.
	: originalTokText;
	char tokText0 = tokText.charAt(0); // The token's first character.
	final boolean isToken; // Is this tok a token?
	final boolean isNumbered; // Is this tok numbered? (tokens and comments)
	String extraNewline = null; // Extra newline at end?
	List<String> strings = new ArrayList<>();
	if (Character.isWhitespace(tokText0)) {
	isToken = false;
	isNumbered = false;
	Iterator<String> it = Newlines.lineIterator(originalTokText);
	while (it.hasNext()) {
	String line = it.next();
	String newline = Newlines.getLineEnding(line);
	if (newline != null) {
	String spaces = line.substring(0, line.length() - newline.length());
	if (!spaces.isEmpty()) {
	strings.add(spaces);
	}
	strings.add(newline);
	} else if (!line.isEmpty()) {
	strings.add(line);
	}
	}
	} else if (tokText.startsWith("'") \|\| tokText.startsWith("\"")) {
	isToken = true;
	isNumbered = true;
	strings.add(originalTokText);
	} else if (tokText.startsWith("//") \|\| tokText.startsWith("/*")) {
	// For compatibility with an earlier lexer, the newline after a // comment is its own tok.
	if (tokText.startsWith("//")
	&& (originalTokText.endsWith("\n") \|\| originalTokText.endsWith("\r"))) {
	extraNewline = Newlines.getLineEnding(originalTokText);
	tokText = tokText.substring(0, tokText.length() - extraNewline.length());
	originalTokText =
	originalTokText.substring(0, originalTokText.length() - extraNewline.length());
	}
	isToken = false;
	isNumbered = true;
	strings.add(originalTokText);
	} else if (Character.isJavaIdentifierStart(tokText0)
	\|\| Character.isDigit(tokText0)
	\|\| (tokText0 == '.' && tokText.length() > 1 && Character.isDigit(tokText.charAt(1)))) {
	// Identifier, keyword, or numeric literal (a dot may begin a number, as in .2D).
	isToken = true;
	isNumbered = true;
	strings.add(tokText);
	} else {
	// Other tokens ("+" or "++" or ">>" are broken into one-character toks, because ">>"
	// cannot be lexed without syntactic knowledge. This implementation fails if the token
	// contains Unicode escapes.
	isToken = true;
	isNumbered = true;
	for (char c : tokText.toCharArray()) {
	strings.add(String.valueOf(c));
	}
	}
	if (strings.size() == 1) {
	toks.add(
	new Tok(
	isNumbered ? kN++ : -1,
	originalTokText,
	tokText,
	charI,
	columnI,
	isToken,
	t.kind()));
	charI += originalTokText.length();
	columnI = updateColumn(columnI, originalTokText);

	} else {
	if (strings.size() != 1 && !tokText.equals(originalTokText)) {
	throw new FormatterException(
	"Unicode escapes not allowed in whitespace or multi-character operators");
	}
	for (String str : strings) {
	toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, null));
	charI += str.length();
	columnI = updateColumn(columnI, originalTokText);
	}
	}
	if (extraNewline != null) {
	toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, null));
	columnI = 0;
	charI += extraNewline.length();
	}
	}
	toks.add(new Tok(kN, "", "", charI, columnI, true, null)); // EOF tok.
	return ImmutableList.copyOf(toks);
	}

	private static int updateColumn(int columnI, String originalTokText) {
	Integer last = Iterators.getLast(Newlines.lineOffsetIterator(originalTokText));
	if (last > 0) {
	columnI = originalTokText.length() - last;
	} else {
	columnI += originalTokText.length();
	}
	return columnI;
	}

	private static ImmutableList<Token> buildTokens(List<Tok> toks) {
	ImmutableList.Builder<Token> tokens = ImmutableList.builder();
	int k = 0;
	int kN = toks.size();

	// Remaining non-tokens before the token go here.
	ImmutableList.Builder<Tok> toksBefore = ImmutableList.builder();

	OUTERMOST:
	while (k < kN) {
	while (!toks.get(k).isToken()) {
	Tok tok = toks.get(k++);
	toksBefore.add(tok);
	if (isParamComment(tok)) {
	while (toks.get(k).isNewline()) {
	// drop newlines after parameter comments
	k++;
	}
	}
	}
	Tok tok = toks.get(k++);

	// Non-tokens starting on the same line go here too.
	ImmutableList.Builder<Tok> toksAfter = ImmutableList.builder();
	OUTER:
	while (k < kN && !toks.get(k).isToken()) {
	// Don't attach inline comments to certain leading tokens, e.g. for `f(/flag1=/true).
	//
	// Attaching inline comments to the right token is hard, and this barely
	// scratches the surface. But it's enough to do a better job with parameter
	// name comments.
	//
	// TODO(cushon): find a better strategy.
	if (toks.get(k).isSlashStarComment()) {
	switch (tok.getText()) {
	case "(":
	case "<":
	case ".":
	break OUTER;
	default:
	break;
	}
	}
	if (toks.get(k).isJavadocComment()) {
	switch (tok.getText()) {
	case ";":
	break OUTER;
	default:
	break;
	}
	}
	if (isParamComment(toks.get(k))) {
	tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
	toksBefore = ImmutableList.<Tok>builder().add(toks.get(k++));
	// drop newlines after parameter comments
	while (toks.get(k).isNewline()) {
	k++;
	}
	continue OUTERMOST;
	}
	Tok nonTokenAfter = toks.get(k++);
	toksAfter.add(nonTokenAfter);
	if (Newlines.containsBreaks(nonTokenAfter.getText())) {
	break;
	}
	}
	tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
	toksBefore = ImmutableList.builder();
	}
	return tokens.build();
	}

	private static boolean isParamComment(Tok tok) {
	return tok.isSlashStarComment()
	&& tok.getText().matches("\\/\\[A-Za-z0-9\\s_\\-]+=\\s\\*\\/");
	}

	/**
	* Convert from an offset and length flag pair to a token range.
	*
	* @param offset the {@code 0}-based offset in characters
	* @param length the length in characters
	* @return the {@code 0}-based {@link Range} of tokens
	* @throws FormatterException if offset + length is outside the file
	*/
	Range<Integer> characterRangeToTokenRange(int offset, int length) throws FormatterException {
	int requiredLength = offset + length;
	if (requiredLength > text.length()) {
	throw new FormatterException(
	String.format(
	"error: invalid length %d, offset + length (%d) is outside the file",
	length, requiredLength));
	}
	if (length < 0) {
	return EMPTY_RANGE;
	}
	if (length == 0) {
	// 0 stands for "format the line under the cursor"
	length = 1;
	}
	ImmutableCollection<Token> enclosed =
	getPositionTokenMap()
	.subRangeMap(Range.closedOpen(offset, offset + length))
	.asMapOfRanges()
	.values();
	if (enclosed.isEmpty()) {
	return EMPTY_RANGE;
	}
	return Range.closedOpen(
	enclosed.iterator().next().getTok().getIndex(), getLast(enclosed).getTok().getIndex() + 1);
	}

	/**
	* Get the number of toks.
	*
	* @return the number of toks, including the EOF tok
	*/
	@Override
	public int getkN() {
	return kN;
	}

	/**
	* Get the Token by index.
	*
	* @param k the token index
	*/
	@Override
	public Token getToken(int k) {
	return kToToken[k];
	}

	/**
	* Get the input tokens.
	*
	* @return the input tokens
	*/
	@Override
	public ImmutableList<? extends Input.Token> getTokens() {
	return tokens;
	}

	/**
	* Get the navigable map from position to {@link Token}. Used to look for tokens following a given
	* one, and to implement the --offset and --length flags to reformat a character range in the
	* input file.
	*
	* @return the navigable map from position to {@link Token}
	*/
	@Override
	public ImmutableRangeMap<Integer, Token> getPositionTokenMap() {
	return positionTokenMap;
	}

	@Override
	public String toString() {
	return MoreObjects.toStringHelper(this)
	.add("tokens", tokens)
	.add("super", super.toString())
	.toString();
	}

	private JCCompilationUnit unit;

	@Override
	public int getLineNumber(int inputPosition) {
	Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
	return unit.getLineMap().getLineNumber(inputPosition);
	}

	@Override
	public int getColumnNumber(int inputPosition) {
	Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
	return unit.getLineMap().getColumnNumber(inputPosition);
	}

	// TODO(cushon): refactor JavaInput so the CompilationUnit can be passed into
	// the constructor.
	public void setCompilationUnit(JCCompilationUnit unit) {
	this.unit = unit;
	}

	public RangeSet<Integer> characterRangesToTokenRanges(Collection<Range<Integer>> characterRanges)
	throws FormatterException {
	RangeSet<Integer> tokenRangeSet = TreeRangeSet.create();
	for (Range<Integer> characterRange0 : characterRanges) {
	Range<Integer> characterRange = characterRange0.canonical(DiscreteDomain.integers());
	tokenRangeSet.add(
	characterRangeToTokenRange(
	characterRange.lowerEndpoint(),
	characterRange.upperEndpoint() - characterRange.lowerEndpoint()));
	}
	return tokenRangeSet;
	}
	}