| /* |
| * Copyright 2000-2013 JetBrains s.r.o. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package com.intellij.lang.java.lexer; |
| |
| import com.intellij.lexer.LexerBase; |
| import com.intellij.openapi.diagnostic.Logger; |
| import com.intellij.pom.java.LanguageLevel; |
| import com.intellij.psi.JavaTokenType; |
| import com.intellij.psi.TokenType; |
| import com.intellij.psi.impl.source.tree.JavaDocElementType; |
| import com.intellij.psi.tree.IElementType; |
| import com.intellij.util.text.CharArrayUtil; |
| import gnu.trove.THashSet; |
| import org.jetbrains.annotations.NotNull; |
| |
| import java.io.IOException; |
| import java.util.Set; |
| |
| public class JavaLexer extends LexerBase { |
| private static final HashTable[] TABLES = new HashTable[]{ |
| new HashTable(LanguageLevel.JDK_1_5), |
| new HashTable(LanguageLevel.JDK_1_4), |
| new HashTable(LanguageLevel.JDK_1_3) |
| }; |
| |
| private static HashTable getTable(final LanguageLevel level) { |
| for (HashTable table : TABLES) { |
| if (level.isAtLeast(table.myLevel)) { |
| return table; |
| } |
| } |
| throw new IllegalArgumentException("Unsupported level: " + level); |
| } |
| |
| public static boolean isKeyword(String id, LanguageLevel level) { |
| return getTable(level).contains(id); |
| } |
| |
| private final _JavaLexer myFlexLexer; |
| private final HashTable myTable; |
| private CharSequence myBuffer; |
| private char[] myBufferArray; |
| private int myBufferIndex; |
| private int myBufferEndOffset; |
| private int myTokenEndOffset; // positioned after the last symbol of the current token |
| private IElementType myTokenType; |
| |
| public JavaLexer(@NotNull final LanguageLevel level) { |
| myFlexLexer = new _JavaLexer(level); |
| myTable = getTable(level); |
| } |
| |
| private static final class HashTable { |
| private static final int NUM_ENTRIES = 999; |
| private static final Logger LOG = Logger.getInstance("com.intellij.Lexer.JavaLexer"); |
| |
| private final LanguageLevel myLevel; |
| private final char[][] myTable = new char[NUM_ENTRIES][]; |
| private final IElementType[] myKeywords = new IElementType[NUM_ENTRIES]; |
| private final Set<String> myKeywordsInSet = new THashSet<String>(); |
| |
| private void add(String s, IElementType tokenType) { |
| char[] chars = s.toCharArray(); |
| int hashCode = chars[0] * 2; |
| for (int j = 1; j < chars.length; j++) { |
| hashCode += chars[j]; |
| } |
| int modHashCode = hashCode % NUM_ENTRIES; |
| LOG.assertTrue(myTable[modHashCode] == null); |
| |
| myTable[modHashCode] = chars; |
| myKeywords[modHashCode] = tokenType; |
| myKeywordsInSet.add(s); |
| } |
| |
| public boolean contains(String s) { |
| return myKeywordsInSet.contains(s); |
| } |
| |
| private boolean contains(int hashCode, final char[] bufferArray, final CharSequence buffer, int offset) { |
| int modHashCode = hashCode % NUM_ENTRIES; |
| final char[] kwd = myTable[modHashCode]; |
| if (kwd == null) return false; |
| |
| if (bufferArray != null) { |
| for (int j = 0; j < kwd.length; j++) { |
| if (bufferArray[j + offset] != kwd[j]) return false; |
| } |
| } else { |
| for (int j = 0; j < kwd.length; j++) { |
| if (buffer.charAt(j + offset) != kwd[j]) return false; |
| } |
| } |
| return true; |
| } |
| |
| private IElementType getTokenType(int hashCode) { |
| return myKeywords[hashCode % NUM_ENTRIES]; |
| } |
| |
| @SuppressWarnings({"HardCodedStringLiteral"}) |
| private HashTable(final LanguageLevel level) { |
| myLevel = level; |
| if (level.isAtLeast(LanguageLevel.JDK_1_4)) { |
| add("assert", JavaTokenType.ASSERT_KEYWORD); |
| if (level.isAtLeast(LanguageLevel.JDK_1_5)) { |
| add("enum", JavaTokenType.ENUM_KEYWORD); |
| } |
| } |
| add("abstract", JavaTokenType.ABSTRACT_KEYWORD); |
| add("default", JavaTokenType.DEFAULT_KEYWORD); |
| add("if", JavaTokenType.IF_KEYWORD); |
| add("private", JavaTokenType.PRIVATE_KEYWORD); |
| add("this", JavaTokenType.THIS_KEYWORD); |
| add("boolean", JavaTokenType.BOOLEAN_KEYWORD); |
| add("do", JavaTokenType.DO_KEYWORD); |
| add("implements", JavaTokenType.IMPLEMENTS_KEYWORD); |
| add("protected", JavaTokenType.PROTECTED_KEYWORD); |
| add("throw", JavaTokenType.THROW_KEYWORD); |
| add("break", JavaTokenType.BREAK_KEYWORD); |
| add("double", JavaTokenType.DOUBLE_KEYWORD); |
| add("import", JavaTokenType.IMPORT_KEYWORD); |
| add("public", JavaTokenType.PUBLIC_KEYWORD); |
| add("throws", JavaTokenType.THROWS_KEYWORD); |
| add("byte", JavaTokenType.BYTE_KEYWORD); |
| add("else", JavaTokenType.ELSE_KEYWORD); |
| add("instanceof", JavaTokenType.INSTANCEOF_KEYWORD); |
| add("return", JavaTokenType.RETURN_KEYWORD); |
| add("transient", JavaTokenType.TRANSIENT_KEYWORD); |
| add("case", JavaTokenType.CASE_KEYWORD); |
| add("extends", JavaTokenType.EXTENDS_KEYWORD); |
| add("int", JavaTokenType.INT_KEYWORD); |
| add("short", JavaTokenType.SHORT_KEYWORD); |
| add("try", JavaTokenType.TRY_KEYWORD); |
| add("catch", JavaTokenType.CATCH_KEYWORD); |
| add("final", JavaTokenType.FINAL_KEYWORD); |
| add("interface", JavaTokenType.INTERFACE_KEYWORD); |
| add("static", JavaTokenType.STATIC_KEYWORD); |
| add("void", JavaTokenType.VOID_KEYWORD); |
| add("char", JavaTokenType.CHAR_KEYWORD); |
| add("finally", JavaTokenType.FINALLY_KEYWORD); |
| add("long", JavaTokenType.LONG_KEYWORD); |
| add("strictfp", JavaTokenType.STRICTFP_KEYWORD); |
| add("volatile", JavaTokenType.VOLATILE_KEYWORD); |
| add("class", JavaTokenType.CLASS_KEYWORD); |
| add("float", JavaTokenType.FLOAT_KEYWORD); |
| add("native", JavaTokenType.NATIVE_KEYWORD); |
| add("super", JavaTokenType.SUPER_KEYWORD); |
| add("while", JavaTokenType.WHILE_KEYWORD); |
| add("const", JavaTokenType.CONST_KEYWORD); |
| add("for", JavaTokenType.FOR_KEYWORD); |
| add("new", JavaTokenType.NEW_KEYWORD); |
| add("switch", JavaTokenType.SWITCH_KEYWORD); |
| add("continue", JavaTokenType.CONTINUE_KEYWORD); |
| add("goto", JavaTokenType.GOTO_KEYWORD); |
| add("package", JavaTokenType.PACKAGE_KEYWORD); |
| add("synchronized", JavaTokenType.SYNCHRONIZED_KEYWORD); |
| add("true", JavaTokenType.TRUE_KEYWORD); |
| add("false", JavaTokenType.FALSE_KEYWORD); |
| add("null", JavaTokenType.NULL_KEYWORD); |
| } |
| } |
| |
| @Override |
| public final void start(@NotNull CharSequence buffer, int startOffset, int endOffset, int initialState) { |
| myBuffer = buffer; |
| myBufferArray = CharArrayUtil.fromSequenceWithoutCopying(buffer); |
| myBufferIndex = startOffset; |
| myBufferEndOffset = endOffset; |
| myTokenType = null; |
| myTokenEndOffset = startOffset; |
| myFlexLexer.reset(myBuffer, startOffset, endOffset, 0); |
| } |
| |
| @Override |
| public int getState() { |
| return 0; |
| } |
| |
| @Override |
| public final IElementType getTokenType() { |
| if (myTokenType == null) _locateToken(); |
| |
| return myTokenType; |
| } |
| |
| @Override |
| public final int getTokenStart() { |
| return myBufferIndex; |
| } |
| |
| @Override |
| public final int getTokenEnd() { |
| if (myTokenType == null) _locateToken(); |
| return myTokenEndOffset; |
| } |
| |
| |
| @Override |
| public final void advance() { |
| if (myTokenType == null) _locateToken(); |
| myTokenType = null; |
| } |
| |
| private void _locateToken() { |
| if (myTokenEndOffset == myBufferEndOffset) { |
| myTokenType = null; |
| myBufferIndex = myBufferEndOffset; |
| return; |
| } |
| |
| myBufferIndex = myTokenEndOffset; |
| |
| final char c = myBufferArray != null ? myBufferArray[myBufferIndex]:myBuffer.charAt(myBufferIndex); |
| switch (c) { |
| default: |
| flexLocateToken(); |
| break; |
| |
| case ' ': |
| case '\t': |
| case '\n': |
| case '\r': |
| case '\f': |
| myTokenType = TokenType.WHITE_SPACE; |
| myTokenEndOffset = getWhitespaces(myBufferIndex + 1); |
| break; |
| |
| case '/': |
| if (myBufferIndex + 1 >= myBufferEndOffset) { |
| myTokenType = JavaTokenType.DIV; |
| myTokenEndOffset = myBufferEndOffset; |
| } |
| else { |
| final char nextChar = myBufferArray != null ? myBufferArray[myBufferIndex + 1]:myBuffer.charAt(myBufferIndex + 1); |
| |
| if (nextChar == '/') { |
| myTokenType = JavaTokenType.END_OF_LINE_COMMENT; |
| myTokenEndOffset = getLineTerminator(myBufferIndex + 2); |
| } |
| else if (nextChar == '*') { |
| if (myBufferIndex + 2 >= myBufferEndOffset || |
| (myBufferArray != null ? myBufferArray[myBufferIndex + 2]:myBuffer.charAt(myBufferIndex + 2)) != '*' || |
| (myBufferIndex + 3 < myBufferEndOffset && |
| (myBufferArray != null ? myBufferArray[myBufferIndex + 3]:myBuffer.charAt(myBufferIndex + 3)) == '/')) { |
| myTokenType = JavaTokenType.C_STYLE_COMMENT; |
| myTokenEndOffset = getClosingComment(myBufferIndex + 2); |
| } |
| else { |
| myTokenType = JavaDocElementType.DOC_COMMENT; |
| myTokenEndOffset = getClosingComment(myBufferIndex + 3); |
| } |
| } |
| else if (c > 127 && Character.isJavaIdentifierStart(c)) { |
| myTokenEndOffset = getIdentifier(myBufferIndex + 1); |
| } |
| else { |
| flexLocateToken(); |
| } |
| } |
| break; |
| |
| case '"': |
| case '\'': |
| myTokenType = c == '"' ? JavaTokenType.STRING_LITERAL : JavaTokenType.CHARACTER_LITERAL; |
| myTokenEndOffset = getClosingParenthesis(myBufferIndex + 1, c); |
| } |
| |
| if (myTokenEndOffset > myBufferEndOffset) { |
| myTokenEndOffset = myBufferEndOffset; |
| } |
| } |
| |
| private int getWhitespaces(int pos) { |
| if (pos >= myBufferEndOffset) return myBufferEndOffset; |
| final CharSequence lBuffer = myBuffer; |
| final char[] lBufferArray = myBufferArray; |
| |
| char c = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| |
| while (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f') { |
| pos++; |
| if (pos == myBufferEndOffset) return pos; |
| c = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| } |
| |
| return pos; |
| } |
| |
| private void flexLocateToken() { |
| try { |
| myFlexLexer.goTo(myBufferIndex); |
| myTokenType = myFlexLexer.advance(); |
| myTokenEndOffset = myFlexLexer.getTokenEnd(); |
| } |
| catch (IOException e) { |
| // Can't be |
| } |
| } |
| |
| private int getClosingParenthesis(int offset, char c) { |
| int pos = offset; |
| final int lBufferEnd = myBufferEndOffset; |
| if (pos >= lBufferEnd) return lBufferEnd; |
| |
| final CharSequence lBuffer = myBuffer; |
| final char[] lBufferArray = myBufferArray; |
| char cur = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| |
| while (true) { |
| while (cur != c && cur != '\n' && cur != '\r' && cur != '\\') { |
| pos++; |
| if (pos >= lBufferEnd) return lBufferEnd; |
| cur = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| } |
| |
| if (cur == '\\') { |
| pos++; |
| if (pos >= lBufferEnd) return lBufferEnd; |
| cur = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| if (cur == '\n' || cur == '\r') continue; |
| pos++; |
| if (pos >= lBufferEnd) return lBufferEnd; |
| cur = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| } |
| else if (cur == c) { |
| break; |
| } |
| else { |
| pos--; |
| break; |
| } |
| } |
| |
| return pos + 1; |
| } |
| |
| private int getClosingComment(int offset) { |
| int pos = offset; |
| |
| final int lBufferEnd = myBufferEndOffset; |
| final CharSequence lBuffer = myBuffer; |
| final char[] lBufferArray = myBufferArray; |
| |
| while (pos < lBufferEnd - 1) { |
| final char c = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| |
| if (c == '*' && (lBufferArray != null ? lBufferArray[pos + 1]:lBuffer.charAt(pos + 1)) == '/') { |
| break; |
| } |
| pos++; |
| } |
| |
| return pos + 2; |
| } |
| |
| private int getLineTerminator(int offset) { |
| int pos = offset; |
| final int lBufferEnd = myBufferEndOffset; |
| final CharSequence lBuffer = myBuffer; |
| final char[] lBufferArray = myBufferArray; |
| |
| while (pos < lBufferEnd) { |
| final char c = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| if (c == '\r' || c == '\n') break; |
| pos++; |
| } |
| |
| return pos; |
| } |
| |
| private int getIdentifier(int offset) { |
| final CharSequence lBuffer = myBuffer; |
| final char[] lBufferArray = myBufferArray; |
| |
| int hashCode = (lBufferArray != null ? lBufferArray[offset - 1]:lBuffer.charAt(offset - 1)) * 2; |
| final int lBufferEnd = myBufferEndOffset; |
| |
| int pos = offset; |
| if (pos < lBufferEnd) { |
| char c = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| |
| while (c >= 'a' && c <= 'z' || |
| c >= 'A' && c <= 'Z' || |
| c >= '0' && c <= '9' || |
| c == '_' || |
| c == '$' || |
| c > 127 && Character.isJavaIdentifierPart(c)) { |
| pos++; |
| hashCode += c; |
| |
| if (pos == lBufferEnd) break; |
| c = lBufferArray != null ? lBufferArray[pos]:lBuffer.charAt(pos); |
| } |
| } |
| |
| if (myTable.contains(hashCode, lBufferArray, lBuffer, offset - 1)) { |
| myTokenType = myTable.getTokenType(hashCode); |
| } |
| else { |
| myTokenType = JavaTokenType.IDENTIFIER; |
| } |
| |
| return pos; |
| } |
| |
| @NotNull |
| @Override |
| public CharSequence getBufferSequence() { |
| return myBuffer; |
| } |
| |
| @Override |
| public final int getBufferEnd() { |
| return myBufferEndOffset; |
| } |
| } |