core/src/main/java/com/google/googlejavaformat/java/JavacTokens.java - platform/external/google-java-format - Git at Google

 /*
  * Copyright 2016 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  * in compliance with the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software distributed under the License
  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */

 package com.google.googlejavaformat.java;

 import static com.google.common.base.Preconditions.checkArgument;

 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.sun.tools.javac.parser.JavaTokenizer;
 import com.sun.tools.javac.parser.Scanner;
 import com.sun.tools.javac.parser.ScannerFactory;
 import com.sun.tools.javac.parser.Tokens.Comment;
 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
 import com.sun.tools.javac.parser.Tokens.Token;
 import com.sun.tools.javac.parser.Tokens.TokenKind;
 import com.sun.tools.javac.parser.UnicodeReader;
 import com.sun.tools.javac.util.Context;
 import java.util.Set;

 /** A wrapper around javac's lexer. */
 class JavacTokens {

   /** The lexer eats terminal comments, so feed it one we don't care about. */
   // TODO(b/33103797): fix javac and remove the work-around
   private static final CharSequence EOF_COMMENT = "\n//EOF";

   /** An unprocessed input token, including whitespace and comments. */
   static class RawTok {
     private final String stringVal;
     private final TokenKind kind;
     private final int pos;
     private final int endPos;

     RawTok(String stringVal, TokenKind kind, int pos, int endPos) {
       this.stringVal = stringVal;
       this.kind = kind;
       this.pos = pos;
       this.endPos = endPos;
     }

     /** The token kind, or {@code null} for whitespace and comments. */
     public TokenKind kind() {
       return kind;
     }

     /** The start position. */
     public int pos() {
       return pos;
     }

     /** The end position. */
     public int endPos() {
       return endPos;
     }

     /** The escaped string value of a literal, or {@code null} for other tokens. */
     public String stringVal() {
       return stringVal;
     }
   }

   /** Lex the input and return a list of {@link RawTok}s. */
   public static ImmutableList<RawTok> getTokens(
       String source, Context context, Set<TokenKind> stopTokens) {
     if (source == null) {
       return ImmutableList.of();
     }
     ScannerFactory fac = ScannerFactory.instance(context);
     char[] buffer = (source + EOF_COMMENT).toCharArray();
     Scanner scanner =
         new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length));
     ImmutableList.Builder<RawTok> tokens = ImmutableList.builder();
     int end = source.length();
     int last = 0;
     do {
       scanner.nextToken();
       Token t = scanner.token();
       if (t.comments != null) {
         for (Comment c : Lists.reverse(t.comments)) {
           if (last < c.getSourcePos(0)) {
             tokens.add(new RawTok(null, null, last, c.getSourcePos(0)));
           }
           tokens.add(
               new RawTok(null, null, c.getSourcePos(0), c.getSourcePos(0) + c.getText().length()));
           last = c.getSourcePos(0) + c.getText().length();
         }
       }
       if (stopTokens.contains(t.kind)) {
         if (t.kind != TokenKind.EOF) {
           end = t.pos;
         }
         break;
       }
       if (last < t.pos) {
         tokens.add(new RawTok(null, null, last, t.pos));
       }
       tokens.add(
           new RawTok(
               t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null,
               t.kind,
               t.pos,
               t.endPos));
       last = t.endPos;
     } while (scanner.token().kind != TokenKind.EOF);
     if (last < end) {
       tokens.add(new RawTok(null, null, last, end));
     }
     return tokens.build();
   }

   /** A {@link JavaTokenizer} that saves comments. */
   static class CommentSavingTokenizer extends JavaTokenizer {
     CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) {
       super(fac, buffer, length);
     }

     @Override
     protected Comment processComment(int pos, int endPos, CommentStyle style) {
       char[] buf = reader.getRawCharacters(pos, endPos);
       return new CommentWithTextAndPosition(
           pos, endPos, new AccessibleReader(fac, buf, buf.length), style);
     }
   }

   /** A {@link Comment} that saves its text and start position. */
   static class CommentWithTextAndPosition implements Comment {

     private final int pos;
     private final int endPos;
     private final AccessibleReader reader;
     private final CommentStyle style;

     private String text = null;

     public CommentWithTextAndPosition(
         int pos, int endPos, AccessibleReader reader, CommentStyle style) {
       this.pos = pos;
       this.endPos = endPos;
       this.reader = reader;
       this.style = style;
     }

     /**
      * Returns the source position of the character at index {@code index} in the comment text.
      *
      * <p>The handling of javadoc comments in javac has more logic to skip over leading whitespace
      * and '*' characters when indexing into doc comments, but we don't need any of that.
      */
     @Override
     public int getSourcePos(int index) {
       checkArgument(
           0 <= index && index < (endPos - pos),
           "Expected %s in the range [0, %s)",
           index,
           endPos - pos);
       return pos + index;
     }

     @Override
     public CommentStyle getStyle() {
       return style;
     }

     @Override
     public String getText() {
       String text = this.text;
       if (text == null) {
         this.text = text = new String(reader.getRawCharacters());
       }
       return text;
     }

     /**
      * We don't care about {@code @deprecated} javadoc tags (see the DepAnn check).
      *
      * @return false
      */
     @Override
     public boolean isDeprecated() {
       return false;
     }

     @Override
     public String toString() {
       return String.format("Comment: '%s'", getText());
     }
   }

   // Scanner(ScannerFactory, JavaTokenizer) is package-private
   static class AccessibleScanner extends Scanner {
     protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) {
       super(fac, tokenizer);
     }
   }

   // UnicodeReader(ScannerFactory, char[], int) is package-private
   static class AccessibleReader extends UnicodeReader {
     protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) {
       super(fac, buffer, length);
     }
   }
 }
	/*
	* Copyright 2016 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
	* in compliance with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software distributed under the License
	* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
	* or implied. See the License for the specific language governing permissions and limitations under
	* the License.
	*/

	package com.google.googlejavaformat.java;

	import static com.google.common.base.Preconditions.checkArgument;

	import com.google.common.collect.ImmutableList;
	import com.google.common.collect.Lists;
	import com.sun.tools.javac.parser.JavaTokenizer;
	import com.sun.tools.javac.parser.Scanner;
	import com.sun.tools.javac.parser.ScannerFactory;
	import com.sun.tools.javac.parser.Tokens.Comment;
	import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
	import com.sun.tools.javac.parser.Tokens.Token;
	import com.sun.tools.javac.parser.Tokens.TokenKind;
	import com.sun.tools.javac.parser.UnicodeReader;
	import com.sun.tools.javac.util.Context;
	import java.util.Set;

	/** A wrapper around javac's lexer. */
	class JavacTokens {

	/** The lexer eats terminal comments, so feed it one we don't care about. */
	// TODO(b/33103797): fix javac and remove the work-around
	private static final CharSequence EOF_COMMENT = "\n//EOF";

	/** An unprocessed input token, including whitespace and comments. */
	static class RawTok {
	private final String stringVal;
	private final TokenKind kind;
	private final int pos;
	private final int endPos;

	RawTok(String stringVal, TokenKind kind, int pos, int endPos) {
	this.stringVal = stringVal;
	this.kind = kind;
	this.pos = pos;
	this.endPos = endPos;
	}

	/** The token kind, or {@code null} for whitespace and comments. */
	public TokenKind kind() {
	return kind;
	}

	/** The start position. */
	public int pos() {
	return pos;
	}

	/** The end position. */
	public int endPos() {
	return endPos;
	}

	/** The escaped string value of a literal, or {@code null} for other tokens. */
	public String stringVal() {
	return stringVal;
	}
	}

	/** Lex the input and return a list of {@link RawTok}s. */
	public static ImmutableList<RawTok> getTokens(
	String source, Context context, Set<TokenKind> stopTokens) {
	if (source == null) {
	return ImmutableList.of();
	}
	ScannerFactory fac = ScannerFactory.instance(context);
	char[] buffer = (source + EOF_COMMENT).toCharArray();
	Scanner scanner =
	new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length));
	ImmutableList.Builder<RawTok> tokens = ImmutableList.builder();
	int end = source.length();
	int last = 0;
	do {
	scanner.nextToken();
	Token t = scanner.token();
	if (t.comments != null) {
	for (Comment c : Lists.reverse(t.comments)) {
	if (last < c.getSourcePos(0)) {
	tokens.add(new RawTok(null, null, last, c.getSourcePos(0)));
	}
	tokens.add(
	new RawTok(null, null, c.getSourcePos(0), c.getSourcePos(0) + c.getText().length()));
	last = c.getSourcePos(0) + c.getText().length();
	}
	}
	if (stopTokens.contains(t.kind)) {
	if (t.kind != TokenKind.EOF) {
	end = t.pos;
	}
	break;
	}
	if (last < t.pos) {
	tokens.add(new RawTok(null, null, last, t.pos));
	}
	tokens.add(
	new RawTok(
	t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null,
	t.kind,
	t.pos,
	t.endPos));
	last = t.endPos;
	} while (scanner.token().kind != TokenKind.EOF);
	if (last < end) {
	tokens.add(new RawTok(null, null, last, end));
	}
	return tokens.build();
	}

	/** A {@link JavaTokenizer} that saves comments. */
	static class CommentSavingTokenizer extends JavaTokenizer {
	CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) {
	super(fac, buffer, length);
	}

	@Override
	protected Comment processComment(int pos, int endPos, CommentStyle style) {
	char[] buf = reader.getRawCharacters(pos, endPos);
	return new CommentWithTextAndPosition(
	pos, endPos, new AccessibleReader(fac, buf, buf.length), style);
	}
	}

	/** A {@link Comment} that saves its text and start position. */
	static class CommentWithTextAndPosition implements Comment {

	private final int pos;
	private final int endPos;
	private final AccessibleReader reader;
	private final CommentStyle style;

	private String text = null;

	public CommentWithTextAndPosition(
	int pos, int endPos, AccessibleReader reader, CommentStyle style) {
	this.pos = pos;
	this.endPos = endPos;
	this.reader = reader;
	this.style = style;
	}

	/**
	* Returns the source position of the character at index {@code index} in the comment text.
	*
	* <p>The handling of javadoc comments in javac has more logic to skip over leading whitespace
	* and '*' characters when indexing into doc comments, but we don't need any of that.
	*/
	@Override
	public int getSourcePos(int index) {
	checkArgument(
	0 <= index && index < (endPos - pos),
	"Expected %s in the range [0, %s)",
	index,
	endPos - pos);
	return pos + index;
	}

	@Override
	public CommentStyle getStyle() {
	return style;
	}

	@Override
	public String getText() {
	String text = this.text;
	if (text == null) {
	this.text = text = new String(reader.getRawCharacters());
	}
	return text;
	}

	/**
	* We don't care about {@code @deprecated} javadoc tags (see the DepAnn check).
	*
	* @return false
	*/
	@Override
	public boolean isDeprecated() {
	return false;
	}

	@Override
	public String toString() {
	return String.format("Comment: '%s'", getText());
	}
	}

	// Scanner(ScannerFactory, JavaTokenizer) is package-private
	static class AccessibleScanner extends Scanner {
	protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) {
	super(fac, tokenizer);
	}
	}

	// UnicodeReader(ScannerFactory, char[], int) is package-private
	static class AccessibleReader extends UnicodeReader {
	protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) {
	super(fac, buffer, length);
	}
	}
	}