| /* |
| * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package sun.tools.java; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.Hashtable; |
| |
| /** |
| * A Scanner for Java tokens. Errors are reported |
| * to the environment object.<p> |
| * |
| * The scanner keeps track of the current token, |
| * the value of the current token (if any), and the start |
| * position of the current token.<p> |
| * |
| * The scan() method advances the scanner to the next |
| * token in the input.<p> |
| * |
| * The match() method is used to quickly match opening |
| * brackets (ie: '(', '{', or '[') with their closing |
| * counter part. This is useful during error recovery.<p> |
| * |
| * An position consists of: ((linenr << WHEREOFFSETBITS) | offset) |
| * this means that both the line number and the exact offset into |
| * the file are encoded in each position value.<p> |
| * |
| * The compiler treats either "\n", "\r" or "\r\n" as the |
| * end of a line.<p> |
| * |
| * WARNING: The contents of this source file are not part of any |
| * supported API. Code that depends on them does so at its own risk: |
| * they are subject to change or removal without notice. |
| * |
| * @author Arthur van Hoff |
| */ |
| |
| public |
| class Scanner implements Constants { |
| /** |
| * The increment for each character. |
| */ |
| public static final long OFFSETINC = 1; |
| |
| /** |
| * The increment for each line. |
| */ |
| public static final long LINEINC = 1L << WHEREOFFSETBITS; |
| |
| /** |
| * End of input |
| */ |
| public static final int EOF = -1; |
| |
| /** |
| * Where errors are reported |
| */ |
| public Environment env; |
| |
| /** |
| * Input reader |
| */ |
| protected ScannerInputReader in; |
| |
| /** |
| * If true, present all comments as tokens. |
| * Contents are not saved, but positions are recorded accurately, |
| * so the comment can be recovered from the text. |
| * Line terminations are also returned as comment tokens, |
| * and may be distinguished by their start and end positions, |
| * which are equal (meaning, these tokens contain no chars). |
| */ |
| public boolean scanComments = false; |
| |
| /** |
| * Current token |
| */ |
| public int token; |
| |
| /** |
| * The position of the current token |
| */ |
| public long pos; |
| |
| /** |
| * The position of the previous token |
| */ |
| public long prevPos; |
| |
| /** |
| * The current character |
| */ |
| protected int ch; |
| |
| /* |
| * Token values. |
| */ |
| public char charValue; |
| public int intValue; |
| public long longValue; |
| public float floatValue; |
| public double doubleValue; |
| public String stringValue; |
| public Identifier idValue; |
| public int radix; // Radix, when reading int or long |
| |
| /* |
| * A doc comment preceding the most recent token |
| */ |
| public String docComment; |
| |
| /* |
| * A growable character buffer. |
| */ |
| private int count; |
| private char buffer[] = new char[1024]; |
| private void growBuffer() { |
| char newBuffer[] = new char[buffer.length * 2]; |
| System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); |
| buffer = newBuffer; |
| } |
| |
| // The following two methods have been hand-inlined in |
| // scanDocComment. If you make changes here, you should |
| // check to see if scanDocComment also needs modification. |
| private void putc(int ch) { |
| if (count == buffer.length) { |
| growBuffer(); |
| } |
| buffer[count++] = (char)ch; |
| } |
| |
| private String bufferString() { |
| return new String(buffer, 0, count); |
| } |
| |
| /** |
| * Create a scanner to scan an input stream. |
| */ |
| public Scanner(Environment env, InputStream in) throws IOException { |
| this.env = env; |
| useInputStream(in); |
| } |
| |
| /** |
| * Setup input from the given input stream, |
| * and scan the first token from it. |
| */ |
| protected void useInputStream(InputStream in) throws IOException { |
| try { |
| this.in = new ScannerInputReader(env, in); |
| } catch (Exception e) { |
| env.setCharacterEncoding(null); |
| this.in = new ScannerInputReader(env, in); |
| } |
| |
| ch = this.in.read(); |
| prevPos = this.in.pos; |
| |
| scan(); |
| } |
| |
| /** |
| * Create a scanner to scan an input stream. |
| */ |
| protected Scanner(Environment env) { |
| this.env = env; |
| // Expect the subclass to call useInputStream at the right time. |
| } |
| |
| /** |
| * Define a keyword. |
| */ |
| private static void defineKeyword(int val) { |
| Identifier.lookup(opNames[val]).setType(val); |
| } |
| |
| /** |
| * Initialized keyword and token Hashtables |
| */ |
| static { |
| // Statement keywords |
| defineKeyword(FOR); |
| defineKeyword(IF); |
| defineKeyword(ELSE); |
| defineKeyword(WHILE); |
| defineKeyword(DO); |
| defineKeyword(SWITCH); |
| defineKeyword(CASE); |
| defineKeyword(DEFAULT); |
| defineKeyword(BREAK); |
| defineKeyword(CONTINUE); |
| defineKeyword(RETURN); |
| defineKeyword(TRY); |
| defineKeyword(CATCH); |
| defineKeyword(FINALLY); |
| defineKeyword(THROW); |
| |
| // Type defineKeywords |
| defineKeyword(BYTE); |
| defineKeyword(CHAR); |
| defineKeyword(SHORT); |
| defineKeyword(INT); |
| defineKeyword(LONG); |
| defineKeyword(FLOAT); |
| defineKeyword(DOUBLE); |
| defineKeyword(VOID); |
| defineKeyword(BOOLEAN); |
| |
| // Expression keywords |
| defineKeyword(INSTANCEOF); |
| defineKeyword(TRUE); |
| defineKeyword(FALSE); |
| defineKeyword(NEW); |
| defineKeyword(THIS); |
| defineKeyword(SUPER); |
| defineKeyword(NULL); |
| |
| // Declaration keywords |
| defineKeyword(IMPORT); |
| defineKeyword(CLASS); |
| defineKeyword(EXTENDS); |
| defineKeyword(IMPLEMENTS); |
| defineKeyword(INTERFACE); |
| defineKeyword(PACKAGE); |
| defineKeyword(THROWS); |
| |
| // Modifier keywords |
| defineKeyword(PRIVATE); |
| defineKeyword(PUBLIC); |
| defineKeyword(PROTECTED); |
| defineKeyword(STATIC); |
| defineKeyword(TRANSIENT); |
| defineKeyword(SYNCHRONIZED); |
| defineKeyword(NATIVE); |
| defineKeyword(ABSTRACT); |
| defineKeyword(VOLATILE); |
| defineKeyword(FINAL); |
| defineKeyword(STRICTFP); |
| |
| // reserved keywords |
| defineKeyword(CONST); |
| defineKeyword(GOTO); |
| } |
| |
| /** |
| * Scan a comment. This method should be |
| * called once the initial /, * and the next |
| * character have been read. |
| */ |
| private void skipComment() throws IOException { |
| while (true) { |
| switch (ch) { |
| case EOF: |
| env.error(pos, "eof.in.comment"); |
| return; |
| |
| case '*': |
| if ((ch = in.read()) == '/') { |
| ch = in.read(); |
| return; |
| } |
| break; |
| |
| default: |
| ch = in.read(); |
| break; |
| } |
| } |
| } |
| |
| /** |
| * Scan a doc comment. This method should be called |
| * once the initial /, * and * have been read. It gathers |
| * the content of the comment (witout leading spaces and '*'s) |
| * in the string buffer. |
| */ |
| private String scanDocComment() throws IOException { |
| // Note: this method has been hand-optimized to yield |
| // better performance. This was done after it was noted |
| // that javadoc spent a great deal of its time here. |
| // This should also help the performance of the compiler |
| // as well -- it scans the doc comments to find |
| // @deprecated tags. |
| // |
| // The logic of the method has been completely rewritten |
| // to avoid the use of flags that need to be looked at |
| // for every character read. Members that are accessed |
| // more than once have been stored in local variables. |
| // The methods putc() and bufferString() have been |
| // inlined by hand. Extra cases have been added to |
| // switch statements to trick the compiler into generating |
| // a tableswitch instead of a lookupswitch. |
| // |
| // This implementation aims to preserve the previous |
| // behavior of this method. |
| |
| int c; |
| |
| // Put `in' in a local variable. |
| final ScannerInputReader in = this.in; |
| |
| // We maintain the buffer locally rather than calling putc(). |
| char[] buffer = this.buffer; |
| int count = 0; |
| |
| // We are called pointing at the second star of the doc |
| // comment: |
| // |
| // Input: /** the rest of the comment ... */ |
| // ^ |
| // |
| // We rely on this in the code below. |
| |
| // Consume any number of stars. |
| while ((c = in.read()) == '*') |
| ; |
| |
| // Is the comment of the form /**/, /***/, /****/, etc.? |
| if (c == '/') { |
| // Set ch and return |
| ch = in.read(); |
| return ""; |
| } |
| |
| // Skip a newline on the first line of the comment. |
| if (c == '\n') { |
| c = in.read(); |
| } |
| |
| outerLoop: |
| // The outerLoop processes the doc comment, looping once |
| // for each line. For each line, it first strips off |
| // whitespace, then it consumes any stars, then it |
| // puts the rest of the line into our buffer. |
| while (true) { |
| |
| // The wsLoop consumes whitespace from the beginning |
| // of each line. |
| wsLoop: |
| while (true) { |
| switch (c) { |
| case ' ': |
| case '\t': |
| // We could check for other forms of whitespace |
| // as well, but this is left as is for minimum |
| // disturbance of functionality. |
| // |
| // Just skip whitespace. |
| c = in.read(); |
| break; |
| |
| // We have added extra cases here to trick the |
| // compiler into using a tableswitch instead of |
| // a lookupswitch. They can be removed without |
| // a change in meaning. |
| case 10: case 11: case 12: case 13: case 14: case 15: |
| case 16: case 17: case 18: case 19: case 20: case 21: |
| case 22: case 23: case 24: case 25: case 26: case 27: |
| case 28: case 29: case 30: case 31: |
| default: |
| // We've seen something that isn't whitespace, |
| // jump out. |
| break wsLoop; |
| } |
| } // end wsLoop. |
| |
| // Are there stars here? If so, consume them all |
| // and check for the end of comment. |
| if (c == '*') { |
| // Skip all of the stars... |
| do { |
| c = in.read(); |
| } while (c == '*'); |
| |
| // ...then check for the closing slash. |
| if (c == '/') { |
| // We're done with the doc comment. |
| // Set ch and break out. |
| ch = in.read(); |
| break outerLoop; |
| } |
| } |
| |
| // The textLoop processes the rest of the characters |
| // on the line, adding them to our buffer. |
| textLoop: |
| while (true) { |
| switch (c) { |
| case EOF: |
| // We've seen a premature EOF. Break out |
| // of the loop. |
| env.error(pos, "eof.in.comment"); |
| ch = EOF; |
| break outerLoop; |
| |
| case '*': |
| // Is this just a star? Or is this the |
| // end of a comment? |
| c = in.read(); |
| if (c == '/') { |
| // This is the end of the comment, |
| // set ch and return our buffer. |
| ch = in.read(); |
| break outerLoop; |
| } |
| // This is just an ordinary star. Add it to |
| // the buffer. |
| if (count == buffer.length) { |
| growBuffer(); |
| buffer = this.buffer; |
| } |
| buffer[count++] = '*'; |
| break; |
| |
| case '\n': |
| // We've seen a newline. Add it to our |
| // buffer and break out of this loop, |
| // starting fresh on a new line. |
| if (count == buffer.length) { |
| growBuffer(); |
| buffer = this.buffer; |
| } |
| buffer[count++] = '\n'; |
| c = in.read(); |
| break textLoop; |
| |
| // Again, the extra cases here are a trick |
| // to get the compiler to generate a tableswitch. |
| case 0: case 1: case 2: case 3: case 4: case 5: |
| case 6: case 7: case 8: case 11: case 12: case 13: |
| case 14: case 15: case 16: case 17: case 18: case 19: |
| case 20: case 21: case 22: case 23: case 24: case 25: |
| case 26: case 27: case 28: case 29: case 30: case 31: |
| case 32: case 33: case 34: case 35: case 36: case 37: |
| case 38: case 39: case 40: |
| default: |
| // Add the character to our buffer. |
| if (count == buffer.length) { |
| growBuffer(); |
| buffer = this.buffer; |
| } |
| buffer[count++] = (char)c; |
| c = in.read(); |
| break; |
| } |
| } // end textLoop |
| } // end outerLoop |
| |
| // We have scanned our doc comment. It is stored in |
| // buffer. The previous implementation of scanDocComment |
| // stripped off all trailing spaces and stars from the comment. |
| // We will do this as well, so as to cause a minimum of |
| // disturbance. Is this what we want? |
| if (count > 0) { |
| int i = count - 1; |
| trailLoop: |
| while (i > -1) { |
| switch (buffer[i]) { |
| case ' ': |
| case '\t': |
| case '*': |
| i--; |
| break; |
| // And again, the extra cases here are a trick |
| // to get the compiler to generate a tableswitch. |
| case 0: case 1: case 2: case 3: case 4: case 5: |
| case 6: case 7: case 8: case 10: case 11: case 12: |
| case 13: case 14: case 15: case 16: case 17: case 18: |
| case 19: case 20: case 21: case 22: case 23: case 24: |
| case 25: case 26: case 27: case 28: case 29: case 30: |
| case 31: case 33: case 34: case 35: case 36: case 37: |
| case 38: case 39: case 40: |
| default: |
| break trailLoop; |
| } |
| } |
| count = i + 1; |
| |
| // Return the text of the doc comment. |
| return new String(buffer, 0, count); |
| } else { |
| return ""; |
| } |
| } |
| |
| /** |
| * Scan a number. The first digit of the number should be the current |
| * character. We may be scanning hex, decimal, or octal at this point |
| */ |
| private void scanNumber() throws IOException { |
| boolean seenNonOctal = false; |
| boolean overflow = false; |
| boolean seenDigit = false; // used to detect invalid hex number 0xL |
| radix = (ch == '0' ? 8 : 10); |
| long value = ch - '0'; |
| count = 0; |
| putc(ch); // save character in buffer |
| numberLoop: |
| for (;;) { |
| switch (ch = in.read()) { |
| case '.': |
| if (radix == 16) |
| break numberLoop; // an illegal character |
| scanReal(); |
| return; |
| |
| case '8': case '9': |
| // We can't yet throw an error if reading an octal. We might |
| // discover we're really reading a real. |
| seenNonOctal = true; |
| case '0': case '1': case '2': case '3': |
| case '4': case '5': case '6': case '7': |
| seenDigit = true; |
| putc(ch); |
| if (radix == 10) { |
| overflow = overflow || (value * 10)/10 != value; |
| value = (value * 10) + (ch - '0'); |
| overflow = overflow || (value - 1 < -1); |
| } else if (radix == 8) { |
| overflow = overflow || (value >>> 61) != 0; |
| value = (value << 3) + (ch - '0'); |
| } else { |
| overflow = overflow || (value >>> 60) != 0; |
| value = (value << 4) + (ch - '0'); |
| } |
| break; |
| |
| case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': |
| if (radix != 16) { |
| scanReal(); |
| return; |
| } |
| // fall through |
| case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': |
| seenDigit = true; |
| putc(ch); |
| if (radix != 16) |
| break numberLoop; // an illegal character |
| overflow = overflow || (value >>> 60) != 0; |
| value = (value << 4) + 10 + |
| Character.toLowerCase((char)ch) - 'a'; |
| break; |
| |
| case 'l': case 'L': |
| ch = in.read(); // skip over 'l' |
| longValue = value; |
| token = LONGVAL; |
| break numberLoop; |
| |
| case 'x': case 'X': |
| // if the first character is a '0' and this is the second |
| // letter, then read in a hexadecimal number. Otherwise, error. |
| if (count == 1 && radix == 8) { |
| radix = 16; |
| seenDigit = false; |
| break; |
| } else { |
| // we'll get an illegal character error |
| break numberLoop; |
| } |
| |
| default: |
| intValue = (int)value; |
| token = INTVAL; |
| break numberLoop; |
| } |
| } // while true |
| |
| // We have just finished reading the number. The next thing better |
| // not be a letter or digit. |
| // Note: There will be deprecation warnings against these uses |
| // of Character.isJavaLetterOrDigit and Character.isJavaLetter. |
| // Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs. |
| if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { |
| env.error(in.pos, "invalid.number"); |
| do { ch = in.read(); } |
| while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); |
| intValue = 0; |
| token = INTVAL; |
| } else if (radix == 8 && seenNonOctal) { |
| // A bogus octal literal. |
| intValue = 0; |
| token = INTVAL; |
| env.error(pos, "invalid.octal.number"); |
| } else if (radix == 16 && seenDigit == false) { |
| // A hex literal with no digits, 0xL, for example. |
| intValue = 0; |
| token = INTVAL; |
| env.error(pos, "invalid.hex.number"); |
| } else { |
| if (token == INTVAL) { |
| // Check for overflow. Note that base 10 literals |
| // have different rules than base 8 and 16. |
| overflow = overflow || |
| (value & 0xFFFFFFFF00000000L) != 0 || |
| (radix == 10 && value > 2147483648L); |
| |
| if (overflow) { |
| intValue = 0; |
| |
| // Give a specific error message which tells |
| // the user the range. |
| switch (radix) { |
| case 8: |
| env.error(pos, "overflow.int.oct"); |
| break; |
| case 10: |
| env.error(pos, "overflow.int.dec"); |
| break; |
| case 16: |
| env.error(pos, "overflow.int.hex"); |
| break; |
| default: |
| throw new CompilerError("invalid radix"); |
| } |
| } |
| } else { |
| if (overflow) { |
| longValue = 0; |
| |
| // Give a specific error message which tells |
| // the user the range. |
| switch (radix) { |
| case 8: |
| env.error(pos, "overflow.long.oct"); |
| break; |
| case 10: |
| env.error(pos, "overflow.long.dec"); |
| break; |
| case 16: |
| env.error(pos, "overflow.long.hex"); |
| break; |
| default: |
| throw new CompilerError("invalid radix"); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Scan a float. We are either looking at the decimal, or we have already |
| * seen it and put it into the buffer. We haven't seen an exponent. |
| * Scan a float. Should be called with the current character is either |
| * the 'e', 'E' or '.' |
| */ |
| private void scanReal() throws IOException { |
| boolean seenExponent = false; |
| boolean isSingleFloat = false; |
| char lastChar; |
| if (ch == '.') { |
| putc(ch); |
| ch = in.read(); |
| } |
| |
| numberLoop: |
| for ( ; ; ch = in.read()) { |
| switch (ch) { |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| putc(ch); |
| break; |
| |
| case 'e': case 'E': |
| if (seenExponent) |
| break numberLoop; // we'll get a format error |
| putc(ch); |
| seenExponent = true; |
| break; |
| |
| case '+': case '-': |
| lastChar = buffer[count - 1]; |
| if (lastChar != 'e' && lastChar != 'E') |
| break numberLoop; // this isn't an error, though! |
| putc(ch); |
| break; |
| |
| case 'f': case 'F': |
| ch = in.read(); // skip over 'f' |
| isSingleFloat = true; |
| break numberLoop; |
| |
| case 'd': case 'D': |
| ch = in.read(); // skip over 'd' |
| // fall through |
| default: |
| break numberLoop; |
| } // sswitch |
| } // loop |
| |
| // we have just finished reading the number. The next thing better |
| // not be a letter or digit. |
| if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') { |
| env.error(in.pos, "invalid.number"); |
| do { ch = in.read(); } |
| while (Character.isJavaLetterOrDigit((char)ch) || ch == '.'); |
| doubleValue = 0; |
| token = DOUBLEVAL; |
| } else { |
| token = isSingleFloat ? FLOATVAL : DOUBLEVAL; |
| try { |
| lastChar = buffer[count - 1]; |
| if (lastChar == 'e' || lastChar == 'E' |
| || lastChar == '+' || lastChar == '-') { |
| env.error(in.pos -1, "float.format"); |
| } else if (isSingleFloat) { |
| String string = bufferString(); |
| floatValue = Float.valueOf(string).floatValue(); |
| if (Float.isInfinite(floatValue)) { |
| env.error(pos, "overflow.float"); |
| } else if (floatValue == 0 && !looksLikeZero(string)) { |
| env.error(pos, "underflow.float"); |
| } |
| } else { |
| String string = bufferString(); |
| doubleValue = Double.valueOf(string).doubleValue(); |
| if (Double.isInfinite(doubleValue)) { |
| env.error(pos, "overflow.double"); |
| } else if (doubleValue == 0 && !looksLikeZero(string)) { |
| env.error(pos, "underflow.double"); |
| } |
| } |
| } catch (NumberFormatException ee) { |
| env.error(pos, "float.format"); |
| doubleValue = 0; |
| floatValue = 0; |
| } |
| } |
| return; |
| } |
| |
| // We have a token that parses as a number. Is this token possibly zero? |
| // i.e. does it have a non-zero value in the mantissa? |
| private static boolean looksLikeZero(String token) { |
| int length = token.length(); |
| for (int i = 0; i < length; i++) { |
| switch (token.charAt(i)) { |
| case 0: case '.': |
| continue; |
| case '1': case '2': case '3': case '4': case '5': |
| case '6': case '7': case '8': case '9': |
| return false; |
| case 'e': case 'E': case 'f': case 'F': |
| return true; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Scan an escape character. |
| * @return the character or -1 if it escaped an |
| * end-of-line. |
| */ |
| private int scanEscapeChar() throws IOException { |
| long p = in.pos; |
| |
| switch (ch = in.read()) { |
| case '0': case '1': case '2': case '3': |
| case '4': case '5': case '6': case '7': { |
| int n = ch - '0'; |
| for (int i = 2 ; i > 0 ; i--) { |
| switch (ch = in.read()) { |
| case '0': case '1': case '2': case '3': |
| case '4': case '5': case '6': case '7': |
| n = (n << 3) + ch - '0'; |
| break; |
| |
| default: |
| if (n > 0xFF) { |
| env.error(p, "invalid.escape.char"); |
| } |
| return n; |
| } |
| } |
| ch = in.read(); |
| if (n > 0xFF) { |
| env.error(p, "invalid.escape.char"); |
| } |
| return n; |
| } |
| |
| case 'r': ch = in.read(); return '\r'; |
| case 'n': ch = in.read(); return '\n'; |
| case 'f': ch = in.read(); return '\f'; |
| case 'b': ch = in.read(); return '\b'; |
| case 't': ch = in.read(); return '\t'; |
| case '\\': ch = in.read(); return '\\'; |
| case '\"': ch = in.read(); return '\"'; |
| case '\'': ch = in.read(); return '\''; |
| } |
| |
| env.error(p, "invalid.escape.char"); |
| ch = in.read(); |
| return -1; |
| } |
| |
| /** |
| * Scan a string. The current character |
| * should be the opening " of the string. |
| */ |
| private void scanString() throws IOException { |
| token = STRINGVAL; |
| count = 0; |
| ch = in.read(); |
| |
| // Scan a String |
| while (true) { |
| switch (ch) { |
| case EOF: |
| env.error(pos, "eof.in.string"); |
| stringValue = bufferString(); |
| return; |
| |
| case '\r': |
| case '\n': |
| ch = in.read(); |
| env.error(pos, "newline.in.string"); |
| stringValue = bufferString(); |
| return; |
| |
| case '"': |
| ch = in.read(); |
| stringValue = bufferString(); |
| return; |
| |
| case '\\': { |
| int c = scanEscapeChar(); |
| if (c >= 0) { |
| putc((char)c); |
| } |
| break; |
| } |
| |
| default: |
| putc(ch); |
| ch = in.read(); |
| break; |
| } |
| } |
| } |
| |
| /** |
| * Scan a character. The current character should be |
| * the opening ' of the character constant. |
| */ |
| private void scanCharacter() throws IOException { |
| token = CHARVAL; |
| |
| switch (ch = in.read()) { |
| case '\\': |
| int c = scanEscapeChar(); |
| charValue = (char)((c >= 0) ? c : 0); |
| break; |
| |
| case '\'': |
| // There are two standard problems this case deals with. One |
| // is the malformed single quote constant (i.e. the programmer |
| // uses ''' instead of '\'') and the other is the empty |
| // character constant (i.e. ''). Just consume any number of |
| // single quotes and emit an error message. |
| charValue = 0; |
| env.error(pos, "invalid.char.constant"); |
| ch = in.read(); |
| while (ch == '\'') { |
| ch = in.read(); |
| } |
| return; |
| |
| case '\r': |
| case '\n': |
| charValue = 0; |
| env.error(pos, "invalid.char.constant"); |
| return; |
| |
| default: |
| charValue = (char)ch; |
| ch = in.read(); |
| break; |
| } |
| |
| if (ch == '\'') { |
| ch = in.read(); |
| } else { |
| env.error(pos, "invalid.char.constant"); |
| while (true) { |
| switch (ch) { |
| case '\'': |
| ch = in.read(); |
| return; |
| case ';': |
| case '\n': |
| case EOF: |
| return; |
| default: |
| ch = in.read(); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Scan an Identifier. The current character should |
| * be the first character of the identifier. |
| */ |
| private void scanIdentifier() throws IOException { |
| count = 0; |
| |
| while (true) { |
| putc(ch); |
| switch (ch = in.read()) { |
| case 'a': case 'b': case 'c': case 'd': case 'e': |
| case 'f': case 'g': case 'h': case 'i': case 'j': |
| case 'k': case 'l': case 'm': case 'n': case 'o': |
| case 'p': case 'q': case 'r': case 's': case 't': |
| case 'u': case 'v': case 'w': case 'x': case 'y': |
| case 'z': |
| case 'A': case 'B': case 'C': case 'D': case 'E': |
| case 'F': case 'G': case 'H': case 'I': case 'J': |
| case 'K': case 'L': case 'M': case 'N': case 'O': |
| case 'P': case 'Q': case 'R': case 'S': case 'T': |
| case 'U': case 'V': case 'W': case 'X': case 'Y': |
| case 'Z': |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| case '$': case '_': |
| break; |
| |
| default: |
| if (!Character.isJavaLetterOrDigit((char)ch)) { |
| idValue = Identifier.lookup(bufferString()); |
| token = idValue.getType(); |
| return; |
| } |
| } |
| } |
| } |
| |
| /** |
| * The ending position of the current token |
| */ |
| // Note: This should be part of the pos itself. |
| public long getEndPos() { |
| return in.pos; |
| } |
| |
| /** |
| * If the current token is IDENT, return the identifier occurrence. |
| * It will be freshly allocated. |
| */ |
| public IdentifierToken getIdToken() { |
| return (token != IDENT) ? null : new IdentifierToken(pos, idValue); |
| } |
| |
| /** |
| * Scan the next token. |
| * @return the position of the previous token. |
| */ |
| public long scan() throws IOException { |
| return xscan(); |
| } |
| |
| protected long xscan() throws IOException { |
| final ScannerInputReader in = this.in; |
| long retPos = pos; |
| prevPos = in.pos; |
| docComment = null; |
| while (true) { |
| pos = in.pos; |
| |
| switch (ch) { |
| case EOF: |
| token = EOF; |
| return retPos; |
| |
| case '\n': |
| if (scanComments) { |
| ch = ' '; |
| // Avoid this path the next time around. |
| // Do not just call in.read; we want to present |
| // a null token (and also avoid read-ahead). |
| token = COMMENT; |
| return retPos; |
| } |
| case ' ': |
| case '\t': |
| case '\f': |
| ch = in.read(); |
| break; |
| |
| case '/': |
| switch (ch = in.read()) { |
| case '/': |
| // Parse a // comment |
| while (((ch = in.read()) != EOF) && (ch != '\n')); |
| if (scanComments) { |
| token = COMMENT; |
| return retPos; |
| } |
| break; |
| |
| case '*': |
| ch = in.read(); |
| if (ch == '*') { |
| docComment = scanDocComment(); |
| } else { |
| skipComment(); |
| } |
| if (scanComments) { |
| return retPos; |
| } |
| break; |
| |
| case '=': |
| ch = in.read(); |
| token = ASGDIV; |
| return retPos; |
| |
| default: |
| token = DIV; |
| return retPos; |
| } |
| break; |
| |
| case '"': |
| scanString(); |
| return retPos; |
| |
| case '\'': |
| scanCharacter(); |
| return retPos; |
| |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| scanNumber(); |
| return retPos; |
| |
| case '.': |
| switch (ch = in.read()) { |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| count = 0; |
| putc('.'); |
| scanReal(); |
| break; |
| default: |
| token = FIELD; |
| } |
| return retPos; |
| |
| case '{': |
| ch = in.read(); |
| token = LBRACE; |
| return retPos; |
| |
| case '}': |
| ch = in.read(); |
| token = RBRACE; |
| return retPos; |
| |
| case '(': |
| ch = in.read(); |
| token = LPAREN; |
| return retPos; |
| |
| case ')': |
| ch = in.read(); |
| token = RPAREN; |
| return retPos; |
| |
| case '[': |
| ch = in.read(); |
| token = LSQBRACKET; |
| return retPos; |
| |
| case ']': |
| ch = in.read(); |
| token = RSQBRACKET; |
| return retPos; |
| |
| case ',': |
| ch = in.read(); |
| token = COMMA; |
| return retPos; |
| |
| case ';': |
| ch = in.read(); |
| token = SEMICOLON; |
| return retPos; |
| |
| case '?': |
| ch = in.read(); |
| token = QUESTIONMARK; |
| return retPos; |
| |
| case '~': |
| ch = in.read(); |
| token = BITNOT; |
| return retPos; |
| |
| case ':': |
| ch = in.read(); |
| token = COLON; |
| return retPos; |
| |
| case '-': |
| switch (ch = in.read()) { |
| case '-': |
| ch = in.read(); |
| token = DEC; |
| return retPos; |
| |
| case '=': |
| ch = in.read(); |
| token = ASGSUB; |
| return retPos; |
| } |
| token = SUB; |
| return retPos; |
| |
| case '+': |
| switch (ch = in.read()) { |
| case '+': |
| ch = in.read(); |
| token = INC; |
| return retPos; |
| |
| case '=': |
| ch = in.read(); |
| token = ASGADD; |
| return retPos; |
| } |
| token = ADD; |
| return retPos; |
| |
| case '<': |
| switch (ch = in.read()) { |
| case '<': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = ASGLSHIFT; |
| return retPos; |
| } |
| token = LSHIFT; |
| return retPos; |
| |
| case '=': |
| ch = in.read(); |
| token = LE; |
| return retPos; |
| } |
| token = LT; |
| return retPos; |
| |
| case '>': |
| switch (ch = in.read()) { |
| case '>': |
| switch (ch = in.read()) { |
| case '=': |
| ch = in.read(); |
| token = ASGRSHIFT; |
| return retPos; |
| |
| case '>': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = ASGURSHIFT; |
| return retPos; |
| } |
| token = URSHIFT; |
| return retPos; |
| } |
| token = RSHIFT; |
| return retPos; |
| |
| case '=': |
| ch = in.read(); |
| token = GE; |
| return retPos; |
| } |
| token = GT; |
| return retPos; |
| |
| case '|': |
| switch (ch = in.read()) { |
| case '|': |
| ch = in.read(); |
| token = OR; |
| return retPos; |
| |
| case '=': |
| ch = in.read(); |
| token = ASGBITOR; |
| return retPos; |
| } |
| token = BITOR; |
| return retPos; |
| |
| case '&': |
| switch (ch = in.read()) { |
| case '&': |
| ch = in.read(); |
| token = AND; |
| return retPos; |
| |
| case '=': |
| ch = in.read(); |
| token = ASGBITAND; |
| return retPos; |
| } |
| token = BITAND; |
| return retPos; |
| |
| case '=': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = EQ; |
| return retPos; |
| } |
| token = ASSIGN; |
| return retPos; |
| |
| case '%': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = ASGREM; |
| return retPos; |
| } |
| token = REM; |
| return retPos; |
| |
| case '^': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = ASGBITXOR; |
| return retPos; |
| } |
| token = BITXOR; |
| return retPos; |
| |
| case '!': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = NE; |
| return retPos; |
| } |
| token = NOT; |
| return retPos; |
| |
| case '*': |
| if ((ch = in.read()) == '=') { |
| ch = in.read(); |
| token = ASGMUL; |
| return retPos; |
| } |
| token = MUL; |
| return retPos; |
| |
| case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': |
| case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': |
| case 's': case 't': case 'u': case 'v': case 'w': case 'x': |
| case 'y': case 'z': |
| case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': |
| case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': |
| case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': |
| case 'Y': case 'Z': |
| case '$': case '_': |
| scanIdentifier(); |
| return retPos; |
| |
| case '\u001a': |
| // Our one concession to DOS. |
| if ((ch = in.read()) == EOF) { |
| token = EOF; |
| return retPos; |
| } |
| env.error(pos, "funny.char"); |
| ch = in.read(); |
| break; |
| |
| |
| default: |
| if (Character.isJavaLetter((char)ch)) { |
| scanIdentifier(); |
| return retPos; |
| } |
| env.error(pos, "funny.char"); |
| ch = in.read(); |
| break; |
| } |
| } |
| } |
| |
| /** |
| * Scan to a matching '}', ']' or ')'. The current token must be |
| * a '{', '[' or '('; |
| */ |
| public void match(int open, int close) throws IOException { |
| int depth = 1; |
| |
| while (true) { |
| scan(); |
| if (token == open) { |
| depth++; |
| } else if (token == close) { |
| if (--depth == 0) { |
| return; |
| } |
| } else if (token == EOF) { |
| env.error(pos, "unbalanced.paren"); |
| return; |
| } |
| } |
| } |
| } |