| /* |
| * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package com.sun.tools.javac.parser; |
| |
| import com.sun.tools.javac.code.Source; |
| import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; |
| import com.sun.tools.javac.util.*; |
| |
| import java.nio.CharBuffer; |
| |
| import static com.sun.tools.javac.parser.Tokens.*; |
| import static com.sun.tools.javac.util.LayoutCharacters.*; |
| |
| /** The lexical analyzer maps an input stream consisting of |
| * ASCII characters and Unicode escapes into a token sequence. |
| * |
| * <p><b>This is NOT part of any supported API. |
| * If you write code that depends on this, you do so at your own risk. |
| * This code and its internal interfaces are subject to change or |
| * deletion without notice.</b> |
| */ |
| public class JavaTokenizer { |
| |
| private static final boolean scannerDebug = false; |
| |
| /** Allow binary literals. |
| */ |
| private boolean allowBinaryLiterals; |
| |
| /** Allow underscores in literals. |
| */ |
| private boolean allowUnderscoresInLiterals; |
| |
| /** The source language setting. |
| */ |
| private Source source; |
| |
| /** The log to be used for error reporting. |
| */ |
| private final Log log; |
| |
| /** The token factory. */ |
| private final Tokens tokens; |
| |
| /** The token kind, set by nextToken(). |
| */ |
| protected TokenKind tk; |
| |
| /** The token's radix, set by nextToken(). |
| */ |
| protected int radix; |
| |
| /** The token's name, set by nextToken(). |
| */ |
| protected Name name; |
| |
| /** The position where a lexical error occurred; |
| */ |
| protected int errPos = Position.NOPOS; |
| |
| /** The Unicode reader (low-level stream reader). |
| */ |
| protected UnicodeReader reader; |
| |
| protected ScannerFactory fac; |
| |
| private static final boolean hexFloatsWork = hexFloatsWork(); |
| private static boolean hexFloatsWork() { |
| try { |
| Float.valueOf("0x1.0p1"); |
| return true; |
| } catch (NumberFormatException ex) { |
| return false; |
| } |
| } |
| |
| /** |
| * Create a scanner from the input array. This method might |
| * modify the array. To avoid copying the input array, ensure |
| * that {@code inputLength < input.length} or |
| * {@code input[input.length -1]} is a white space character. |
| * |
| * @param fac the factory which created this Scanner |
| * @param buf the input, might be modified |
| * Must be positive and less than or equal to input.length. |
| */ |
| protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) { |
| this(fac, new UnicodeReader(fac, buf)); |
| } |
| |
| protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) { |
| this(fac, new UnicodeReader(fac, buf, inputLength)); |
| } |
| |
| protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) { |
| this.fac = fac; |
| this.log = fac.log; |
| this.tokens = fac.tokens; |
| this.source = fac.source; |
| this.reader = reader; |
| this.allowBinaryLiterals = source.allowBinaryLiterals(); |
| this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); |
| } |
| |
| /** Report an error at the given position using the provided arguments. |
| */ |
| protected void lexError(int pos, String key, Object... args) { |
| log.error(pos, key, args); |
| tk = TokenKind.ERROR; |
| errPos = pos; |
| } |
| |
| /** Read next character in character or string literal and copy into sbuf. |
| */ |
| private void scanLitChar(int pos) { |
| if (reader.ch == '\\') { |
| if (reader.peekChar() == '\\' && !reader.isUnicode()) { |
| reader.skipChar(); |
| reader.putChar('\\', true); |
| } else { |
| reader.scanChar(); |
| switch (reader.ch) { |
| case '0': case '1': case '2': case '3': |
| case '4': case '5': case '6': case '7': |
| char leadch = reader.ch; |
| int oct = reader.digit(pos, 8); |
| reader.scanChar(); |
| if ('0' <= reader.ch && reader.ch <= '7') { |
| oct = oct * 8 + reader.digit(pos, 8); |
| reader.scanChar(); |
| if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { |
| oct = oct * 8 + reader.digit(pos, 8); |
| reader.scanChar(); |
| } |
| } |
| reader.putChar((char)oct); |
| break; |
| case 'b': |
| reader.putChar('\b', true); break; |
| case 't': |
| reader.putChar('\t', true); break; |
| case 'n': |
| reader.putChar('\n', true); break; |
| case 'f': |
| reader.putChar('\f', true); break; |
| case 'r': |
| reader.putChar('\r', true); break; |
| case '\'': |
| reader.putChar('\'', true); break; |
| case '\"': |
| reader.putChar('\"', true); break; |
| case '\\': |
| reader.putChar('\\', true); break; |
| default: |
| lexError(reader.bp, "illegal.esc.char"); |
| } |
| } |
| } else if (reader.bp != reader.buflen) { |
| reader.putChar(true); |
| } |
| } |
| |
| private void scanDigits(int pos, int digitRadix) { |
| char saveCh; |
| int savePos; |
| do { |
| if (reader.ch != '_') { |
| reader.putChar(false); |
| } else { |
| if (!allowUnderscoresInLiterals) { |
| lexError(pos, "unsupported.underscore.lit", source.name); |
| allowUnderscoresInLiterals = true; |
| } |
| } |
| saveCh = reader.ch; |
| savePos = reader.bp; |
| reader.scanChar(); |
| } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_'); |
| if (saveCh == '_') |
| lexError(savePos, "illegal.underscore"); |
| } |
| |
| /** Read fractional part of hexadecimal floating point number. |
| */ |
| private void scanHexExponentAndSuffix(int pos) { |
| if (reader.ch == 'p' || reader.ch == 'P') { |
| reader.putChar(true); |
| skipIllegalUnderscores(); |
| if (reader.ch == '+' || reader.ch == '-') { |
| reader.putChar(true); |
| } |
| skipIllegalUnderscores(); |
| if (reader.digit(pos, 10) >= 0) { |
| scanDigits(pos, 10); |
| if (!hexFloatsWork) |
| lexError(pos, "unsupported.cross.fp.lit"); |
| } else |
| lexError(pos, "malformed.fp.lit"); |
| } else { |
| lexError(pos, "malformed.fp.lit"); |
| } |
| if (reader.ch == 'f' || reader.ch == 'F') { |
| reader.putChar(true); |
| tk = TokenKind.FLOATLITERAL; |
| radix = 16; |
| } else { |
| if (reader.ch == 'd' || reader.ch == 'D') { |
| reader.putChar(true); |
| } |
| tk = TokenKind.DOUBLELITERAL; |
| radix = 16; |
| } |
| } |
| |
| /** Read fractional part of floating point number. |
| */ |
| private void scanFraction(int pos) { |
| skipIllegalUnderscores(); |
| if (reader.digit(pos, 10) >= 0) { |
| scanDigits(pos, 10); |
| } |
| int sp1 = reader.sp; |
| if (reader.ch == 'e' || reader.ch == 'E') { |
| reader.putChar(true); |
| skipIllegalUnderscores(); |
| if (reader.ch == '+' || reader.ch == '-') { |
| reader.putChar(true); |
| } |
| skipIllegalUnderscores(); |
| if (reader.digit(pos, 10) >= 0) { |
| scanDigits(pos, 10); |
| return; |
| } |
| lexError(pos, "malformed.fp.lit"); |
| reader.sp = sp1; |
| } |
| } |
| |
| /** Read fractional part and 'd' or 'f' suffix of floating point number. |
| */ |
| private void scanFractionAndSuffix(int pos) { |
| radix = 10; |
| scanFraction(pos); |
| if (reader.ch == 'f' || reader.ch == 'F') { |
| reader.putChar(true); |
| tk = TokenKind.FLOATLITERAL; |
| } else { |
| if (reader.ch == 'd' || reader.ch == 'D') { |
| reader.putChar(true); |
| } |
| tk = TokenKind.DOUBLELITERAL; |
| } |
| } |
| |
| /** Read fractional part and 'd' or 'f' suffix of floating point number. |
| */ |
| private void scanHexFractionAndSuffix(int pos, boolean seendigit) { |
| radix = 16; |
| Assert.check(reader.ch == '.'); |
| reader.putChar(true); |
| skipIllegalUnderscores(); |
| if (reader.digit(pos, 16) >= 0) { |
| seendigit = true; |
| scanDigits(pos, 16); |
| } |
| if (!seendigit) |
| lexError(pos, "invalid.hex.number"); |
| else |
| scanHexExponentAndSuffix(pos); |
| } |
| |
| private void skipIllegalUnderscores() { |
| if (reader.ch == '_') { |
| lexError(reader.bp, "illegal.underscore"); |
| while (reader.ch == '_') |
| reader.scanChar(); |
| } |
| } |
| |
| /** Read a number. |
| * @param radix The radix of the number; one of 2, 8, 10, 16. |
| */ |
| private void scanNumber(int pos, int radix) { |
| // for octal, allow base-10 digit in case it's a float literal |
| this.radix = radix; |
| int digitRadix = (radix == 8 ? 10 : radix); |
| int firstDigit = reader.digit(pos, Math.max(10, digitRadix)); |
| boolean seendigit = firstDigit >= 0; |
| boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix; |
| if (seendigit) { |
| scanDigits(pos, digitRadix); |
| } |
| if (radix == 16 && reader.ch == '.') { |
| scanHexFractionAndSuffix(pos, seendigit); |
| } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) { |
| scanHexExponentAndSuffix(pos); |
| } else if (digitRadix == 10 && reader.ch == '.') { |
| reader.putChar(true); |
| scanFractionAndSuffix(pos); |
| } else if (digitRadix == 10 && |
| (reader.ch == 'e' || reader.ch == 'E' || |
| reader.ch == 'f' || reader.ch == 'F' || |
| reader.ch == 'd' || reader.ch == 'D')) { |
| scanFractionAndSuffix(pos); |
| } else { |
| if (!seenValidDigit) { |
| switch (radix) { |
| case 2: |
| lexError(pos, "invalid.binary.number"); |
| break; |
| case 16: |
| lexError(pos, "invalid.hex.number"); |
| break; |
| } |
| } |
| if (reader.ch == 'l' || reader.ch == 'L') { |
| reader.scanChar(); |
| tk = TokenKind.LONGLITERAL; |
| } else { |
| tk = TokenKind.INTLITERAL; |
| } |
| } |
| } |
| |
| /** Read an identifier. |
| */ |
| private void scanIdent() { |
| boolean isJavaIdentifierPart; |
| char high; |
| reader.putChar(true); |
| do { |
| switch (reader.ch) { |
| case 'A': case 'B': case 'C': case 'D': case 'E': |
| case 'F': case 'G': case 'H': case 'I': case 'J': |
| case 'K': case 'L': case 'M': case 'N': case 'O': |
| case 'P': case 'Q': case 'R': case 'S': case 'T': |
| case 'U': case 'V': case 'W': case 'X': case 'Y': |
| case 'Z': |
| case 'a': case 'b': case 'c': case 'd': case 'e': |
| case 'f': case 'g': case 'h': case 'i': case 'j': |
| case 'k': case 'l': case 'm': case 'n': case 'o': |
| case 'p': case 'q': case 'r': case 's': case 't': |
| case 'u': case 'v': case 'w': case 'x': case 'y': |
| case 'z': |
| case '$': case '_': |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| break; |
| case '\u0000': case '\u0001': case '\u0002': case '\u0003': |
| case '\u0004': case '\u0005': case '\u0006': case '\u0007': |
| case '\u0008': case '\u000E': case '\u000F': case '\u0010': |
| case '\u0011': case '\u0012': case '\u0013': case '\u0014': |
| case '\u0015': case '\u0016': case '\u0017': |
| case '\u0018': case '\u0019': case '\u001B': |
| case '\u007F': |
| reader.scanChar(); |
| continue; |
| case '\u001A': // EOI is also a legal identifier part |
| if (reader.bp >= reader.buflen) { |
| name = reader.name(); |
| tk = tokens.lookupKind(name); |
| return; |
| } |
| reader.scanChar(); |
| continue; |
| default: |
| if (reader.ch < '\u0080') { |
| // all ASCII range chars already handled, above |
| isJavaIdentifierPart = false; |
| } else { |
| if (Character.isIdentifierIgnorable(reader.ch)) { |
| reader.scanChar(); |
| continue; |
| } else { |
| int codePoint = reader.peekSurrogates(); |
| if (codePoint >= 0) { |
| if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { |
| reader.putChar(true); |
| } |
| } else { |
| isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); |
| } |
| } |
| } |
| if (!isJavaIdentifierPart) { |
| name = reader.name(); |
| tk = tokens.lookupKind(name); |
| return; |
| } |
| } |
| reader.putChar(true); |
| } while (true); |
| } |
| |
| /** Return true if reader.ch can be part of an operator. |
| */ |
| private boolean isSpecial(char ch) { |
| switch (ch) { |
| case '!': case '%': case '&': case '*': case '?': |
| case '+': case '-': case ':': case '<': case '=': |
| case '>': case '^': case '|': case '~': |
| case '@': |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /** Read longest possible sequence of special characters and convert |
| * to token. |
| */ |
| private void scanOperator() { |
| while (true) { |
| reader.putChar(false); |
| Name newname = reader.name(); |
| TokenKind tk1 = tokens.lookupKind(newname); |
| if (tk1 == TokenKind.IDENTIFIER) { |
| reader.sp--; |
| break; |
| } |
| tk = tk1; |
| reader.scanChar(); |
| if (!isSpecial(reader.ch)) break; |
| } |
| } |
| |
| /** Read token. |
| */ |
| public Token readToken() { |
| |
| reader.sp = 0; |
| name = null; |
| radix = 0; |
| |
| int pos = 0; |
| int endPos = 0; |
| List<Comment> comments = null; |
| |
| try { |
| loop: while (true) { |
| pos = reader.bp; |
| switch (reader.ch) { |
| case ' ': // (Spec 3.6) |
| case '\t': // (Spec 3.6) |
| case FF: // (Spec 3.6) |
| do { |
| reader.scanChar(); |
| } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF); |
| processWhiteSpace(pos, reader.bp); |
| break; |
| case LF: // (Spec 3.4) |
| reader.scanChar(); |
| processLineTerminator(pos, reader.bp); |
| break; |
| case CR: // (Spec 3.4) |
| reader.scanChar(); |
| if (reader.ch == LF) { |
| reader.scanChar(); |
| } |
| processLineTerminator(pos, reader.bp); |
| break; |
| case 'A': case 'B': case 'C': case 'D': case 'E': |
| case 'F': case 'G': case 'H': case 'I': case 'J': |
| case 'K': case 'L': case 'M': case 'N': case 'O': |
| case 'P': case 'Q': case 'R': case 'S': case 'T': |
| case 'U': case 'V': case 'W': case 'X': case 'Y': |
| case 'Z': |
| case 'a': case 'b': case 'c': case 'd': case 'e': |
| case 'f': case 'g': case 'h': case 'i': case 'j': |
| case 'k': case 'l': case 'm': case 'n': case 'o': |
| case 'p': case 'q': case 'r': case 's': case 't': |
| case 'u': case 'v': case 'w': case 'x': case 'y': |
| case 'z': |
| case '$': case '_': |
| scanIdent(); |
| break loop; |
| case '0': |
| reader.scanChar(); |
| if (reader.ch == 'x' || reader.ch == 'X') { |
| reader.scanChar(); |
| skipIllegalUnderscores(); |
| scanNumber(pos, 16); |
| } else if (reader.ch == 'b' || reader.ch == 'B') { |
| if (!allowBinaryLiterals) { |
| lexError(pos, "unsupported.binary.lit", source.name); |
| allowBinaryLiterals = true; |
| } |
| reader.scanChar(); |
| skipIllegalUnderscores(); |
| scanNumber(pos, 2); |
| } else { |
| reader.putChar('0'); |
| if (reader.ch == '_') { |
| int savePos = reader.bp; |
| do { |
| reader.scanChar(); |
| } while (reader.ch == '_'); |
| if (reader.digit(pos, 10) < 0) { |
| lexError(savePos, "illegal.underscore"); |
| } |
| } |
| scanNumber(pos, 8); |
| } |
| break loop; |
| case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| scanNumber(pos, 10); |
| break loop; |
| case '.': |
| reader.scanChar(); |
| if (reader.digit(pos, 10) >= 0) { |
| reader.putChar('.'); |
| scanFractionAndSuffix(pos); |
| } else if (reader.ch == '.') { |
| int savePos = reader.bp; |
| reader.putChar('.'); reader.putChar('.', true); |
| if (reader.ch == '.') { |
| reader.scanChar(); |
| reader.putChar('.'); |
| tk = TokenKind.ELLIPSIS; |
| } else { |
| lexError(savePos, "illegal.dot"); |
| } |
| } else { |
| tk = TokenKind.DOT; |
| } |
| break loop; |
| case ',': |
| reader.scanChar(); tk = TokenKind.COMMA; break loop; |
| case ';': |
| reader.scanChar(); tk = TokenKind.SEMI; break loop; |
| case '(': |
| reader.scanChar(); tk = TokenKind.LPAREN; break loop; |
| case ')': |
| reader.scanChar(); tk = TokenKind.RPAREN; break loop; |
| case '[': |
| reader.scanChar(); tk = TokenKind.LBRACKET; break loop; |
| case ']': |
| reader.scanChar(); tk = TokenKind.RBRACKET; break loop; |
| case '{': |
| reader.scanChar(); tk = TokenKind.LBRACE; break loop; |
| case '}': |
| reader.scanChar(); tk = TokenKind.RBRACE; break loop; |
| case '/': |
| reader.scanChar(); |
| if (reader.ch == '/') { |
| do { |
| reader.scanCommentChar(); |
| } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen); |
| if (reader.bp < reader.buflen) { |
| comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE)); |
| } |
| break; |
| } else if (reader.ch == '*') { |
| boolean isEmpty = false; |
| reader.scanChar(); |
| CommentStyle style; |
| if (reader.ch == '*') { |
| style = CommentStyle.JAVADOC; |
| reader.scanCommentChar(); |
| if (reader.ch == '/') { |
| isEmpty = true; |
| } |
| } else { |
| style = CommentStyle.BLOCK; |
| } |
| while (!isEmpty && reader.bp < reader.buflen) { |
| if (reader.ch == '*') { |
| reader.scanChar(); |
| if (reader.ch == '/') break; |
| } else { |
| reader.scanCommentChar(); |
| } |
| } |
| if (reader.ch == '/') { |
| reader.scanChar(); |
| comments = addComment(comments, processComment(pos, reader.bp, style)); |
| break; |
| } else { |
| lexError(pos, "unclosed.comment"); |
| break loop; |
| } |
| } else if (reader.ch == '=') { |
| tk = TokenKind.SLASHEQ; |
| reader.scanChar(); |
| } else { |
| tk = TokenKind.SLASH; |
| } |
| break loop; |
| case '\'': |
| reader.scanChar(); |
| if (reader.ch == '\'') { |
| lexError(pos, "empty.char.lit"); |
| reader.scanChar(); |
| } else { |
| if (reader.ch == CR || reader.ch == LF) |
| lexError(pos, "illegal.line.end.in.char.lit"); |
| scanLitChar(pos); |
| if (reader.ch == '\'') { |
| reader.scanChar(); |
| tk = TokenKind.CHARLITERAL; |
| } else { |
| lexError(pos, "unclosed.char.lit"); |
| } |
| } |
| break loop; |
| case '\"': |
| reader.scanChar(); |
| while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen) |
| scanLitChar(pos); |
| if (reader.ch == '\"') { |
| tk = TokenKind.STRINGLITERAL; |
| reader.scanChar(); |
| } else { |
| lexError(pos, "unclosed.str.lit"); |
| } |
| break loop; |
| default: |
| if (isSpecial(reader.ch)) { |
| scanOperator(); |
| } else { |
| boolean isJavaIdentifierStart; |
| int codePoint = -1; |
| if (reader.ch < '\u0080') { |
| // all ASCII range chars already handled, above |
| isJavaIdentifierStart = false; |
| } else { |
| codePoint = reader.peekSurrogates(); |
| if (codePoint >= 0) { |
| if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { |
| reader.putChar(true); |
| } |
| } else { |
| isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); |
| } |
| } |
| if (isJavaIdentifierStart) { |
| scanIdent(); |
| } else if (reader.digit(pos, 10) >= 0) { |
| scanNumber(pos, 10); |
| } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 |
| tk = TokenKind.EOF; |
| pos = reader.buflen; |
| } else { |
| String arg; |
| |
| if (codePoint >= 0) { |
| char high = reader.ch; |
| reader.scanChar(); |
| arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); |
| } else { |
| arg = (32 < reader.ch && reader.ch < 127) ? |
| String.format("%s", reader.ch) : |
| String.format("\\u%04x", (int)reader.ch); |
| } |
| lexError(pos, "illegal.char", arg); |
| reader.scanChar(); |
| } |
| } |
| break loop; |
| } |
| } |
| endPos = reader.bp; |
| switch (tk.tag) { |
| case DEFAULT: return new Token(tk, pos, endPos, comments); |
| case NAMED: return new NamedToken(tk, pos, endPos, name, comments); |
| case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); |
| case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); |
| default: throw new AssertionError(); |
| } |
| } |
| finally { |
| if (scannerDebug) { |
| System.out.println("nextToken(" + pos |
| + "," + endPos + ")=|" + |
| new String(reader.getRawCharacters(pos, endPos)) |
| + "|"); |
| } |
| } |
| } |
| //where |
| List<Comment> addComment(List<Comment> comments, Comment comment) { |
| return comments == null ? |
| List.of(comment) : |
| comments.prepend(comment); |
| } |
| |
| /** Return the position where a lexical error occurred; |
| */ |
| public int errPos() { |
| return errPos; |
| } |
| |
| /** Set the position where a lexical error occurred; |
| */ |
| public void errPos(int pos) { |
| errPos = pos; |
| } |
| |
| /** |
| * Called when a complete comment has been scanned. pos and endPos |
| * will mark the comment boundary. |
| */ |
| protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) { |
| if (scannerDebug) |
| System.out.println("processComment(" + pos |
| + "," + endPos + "," + style + ")=|" |
| + new String(reader.getRawCharacters(pos, endPos)) |
| + "|"); |
| char[] buf = reader.getRawCharacters(pos, endPos); |
| return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style); |
| } |
| |
| /** |
| * Called when a complete whitespace run has been scanned. pos and endPos |
| * will mark the whitespace boundary. |
| */ |
| protected void processWhiteSpace(int pos, int endPos) { |
| if (scannerDebug) |
| System.out.println("processWhitespace(" + pos |
| + "," + endPos + ")=|" + |
| new String(reader.getRawCharacters(pos, endPos)) |
| + "|"); |
| } |
| |
| /** |
| * Called when a line terminator has been processed. |
| */ |
| protected void processLineTerminator(int pos, int endPos) { |
| if (scannerDebug) |
| System.out.println("processTerminator(" + pos |
| + "," + endPos + ")=|" + |
| new String(reader.getRawCharacters(pos, endPos)) |
| + "|"); |
| } |
| |
| /** Build a map for translating between line numbers and |
| * positions in the input. |
| * |
| * @return a LineMap */ |
| public Position.LineMap getLineMap() { |
| return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false); |
| } |
| |
| |
| /** |
| * Scan a documentation comment; determine if a deprecated tag is present. |
| * Called once the initial /, * have been skipped, positioned at the second * |
| * (which is treated as the beginning of the first line). |
| * Stops positioned at the closing '/'. |
| */ |
| protected static class BasicComment<U extends UnicodeReader> implements Comment { |
| |
| CommentStyle cs; |
| U comment_reader; |
| |
| protected boolean deprecatedFlag = false; |
| protected boolean scanned = false; |
| |
| protected BasicComment(U comment_reader, CommentStyle cs) { |
| this.comment_reader = comment_reader; |
| this.cs = cs; |
| } |
| |
| public String getText() { |
| return null; |
| } |
| |
| public int getSourcePos(int pos) { |
| return -1; |
| } |
| |
| public CommentStyle getStyle() { |
| return cs; |
| } |
| |
| public boolean isDeprecated() { |
| if (!scanned && cs == CommentStyle.JAVADOC) { |
| scanDocComment(); |
| } |
| return deprecatedFlag; |
| } |
| |
| @SuppressWarnings("fallthrough") |
| protected void scanDocComment() { |
| try { |
| boolean deprecatedPrefix = false; |
| |
| comment_reader.bp += 3; // '/**' |
| comment_reader.ch = comment_reader.buf[comment_reader.bp]; |
| |
| forEachLine: |
| while (comment_reader.bp < comment_reader.buflen) { |
| |
| // Skip optional WhiteSpace at beginning of line |
| while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { |
| comment_reader.scanCommentChar(); |
| } |
| |
| // Skip optional consecutive Stars |
| while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { |
| comment_reader.scanCommentChar(); |
| if (comment_reader.ch == '/') { |
| return; |
| } |
| } |
| |
| // Skip optional WhiteSpace after Stars |
| while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) { |
| comment_reader.scanCommentChar(); |
| } |
| |
| deprecatedPrefix = false; |
| // At beginning of line in the JavaDoc sense. |
| if (!deprecatedFlag) { |
| String deprecated = "@deprecated"; |
| int i = 0; |
| while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) { |
| comment_reader.scanCommentChar(); |
| i++; |
| if (i == deprecated.length()) { |
| deprecatedPrefix = true; |
| break; |
| } |
| } |
| } |
| |
| if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) { |
| if (Character.isWhitespace(comment_reader.ch)) { |
| deprecatedFlag = true; |
| } else if (comment_reader.ch == '*') { |
| comment_reader.scanCommentChar(); |
| if (comment_reader.ch == '/') { |
| deprecatedFlag = true; |
| return; |
| } |
| } |
| } |
| |
| // Skip rest of line |
| while (comment_reader.bp < comment_reader.buflen) { |
| switch (comment_reader.ch) { |
| case '*': |
| comment_reader.scanCommentChar(); |
| if (comment_reader.ch == '/') { |
| return; |
| } |
| break; |
| case CR: // (Spec 3.4) |
| comment_reader.scanCommentChar(); |
| if (comment_reader.ch != LF) { |
| continue forEachLine; |
| } |
| /* fall through to LF case */ |
| case LF: // (Spec 3.4) |
| comment_reader.scanCommentChar(); |
| continue forEachLine; |
| default: |
| comment_reader.scanCommentChar(); |
| } |
| } // rest of line |
| } // forEachLine |
| return; |
| } finally { |
| scanned = true; |
| } |
| } |
| } |
| } |