| /* |
| * Copyright 2007 Sascha Weinreuter |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.intellij.plugins.relaxNG.compact.lexer; |
| |
| import com.intellij.lexer.LexerBase; |
| import com.intellij.openapi.diagnostic.Logger; |
| import com.intellij.openapi.util.io.FileUtil; |
| import com.intellij.psi.TokenType; |
| import com.intellij.psi.tree.IElementType; |
| import com.intellij.util.text.CharArrayCharSequence; |
| import com.intellij.util.text.CharArrayUtil; |
| import gnu.trove.TIntIntHashMap; |
| import org.intellij.plugins.relaxNG.compact.RncTokenTypes; |
| import org.jetbrains.annotations.NotNull; |
| import org.jetbrains.annotations.Nullable; |
| import org.kohsuke.rngom.parse.compact.*; |
| |
| import java.io.CharArrayReader; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.lang.reflect.Field; |
| import java.net.URL; |
| import java.security.CodeSource; |
| import java.util.Arrays; |
| import java.util.LinkedList; |
| |
| /** |
| * An adapter to use the lexer ("TokenManager") generated from a javacc grammar. |
| * |
| * Not sure if it was easier to write this than hacking my own lexer... |
| */ |
| public class CompactSyntaxLexerAdapter extends LexerBase { |
| private static final Logger LOG = Logger.getInstance(CompactSyntaxLexerAdapter.class.getName()); |
| |
| private static final Field myStateField; |
| |
| static { |
| try { |
| myStateField = CompactSyntaxTokenManager.class.getDeclaredField("curLexState"); |
| myStateField.setAccessible(true); |
| } catch (NoSuchFieldException e) { |
| throw new Error(e); |
| } |
| } |
| |
| private static final Token START = new Token(); |
| |
| private CompactSyntaxTokenManager myLexer; |
| private final LinkedList<Token> myTokenQueue = new LinkedList<Token>(); |
| private Token myCurrentToken; |
| private int myCurrentOffset; |
| private int myCurrentEnd; |
| |
| private IElementType myCurrentTokenType; |
| private CharSequence myBuffer; |
| private int myEndOffset; |
| private TIntIntHashMap myLengthMap; |
| |
| @Override |
| public void advance() { |
| try { |
| myCurrentToken = nextToken(); |
| myCurrentOffset = myCurrentEnd; |
| |
| if (myCurrentToken != null) { |
| |
| myCurrentEnd = myCurrentOffset + myCurrentToken.image.length(); |
| for (int i = myCurrentOffset; i < myCurrentEnd; i++) { |
| myCurrentEnd += myLengthMap.get(i); |
| } |
| |
| if (myCurrentToken.kind == CompactSyntaxConstants.EOF) { |
| assert myCurrentOffset == myEndOffset : "actual: " + myCurrentOffset + ", expected: " + myEndOffset; |
| myCurrentToken = null; |
| } |
| } |
| |
| // if (myCurrentToken != null) { |
| // System.out.println("token = <" + RncTokenTypes.get(myCurrentToken.kind).toString() + "> [" + myCurrentToken.image + "]"); |
| // } |
| } catch (TokenMgrError e) { |
| LOG.error(e); |
| myCurrentToken = null; |
| } |
| |
| if (myCurrentToken == null) { |
| myCurrentTokenType = null; |
| } else { |
| myCurrentTokenType = RncTokenTypes.get(myCurrentToken.kind); |
| |
| // collapse whitespace tokens into TokenType.WHITE_SPACE [IDEA-12106] |
| if (RncTokenTypes.WHITESPACE.contains(myCurrentTokenType)) { |
| myCurrentTokenType = TokenType.WHITE_SPACE; |
| } |
| } |
| } |
| |
| private Token nextToken() { |
| if (myTokenQueue.size() > 0) { |
| return myTokenQueue.removeFirst(); |
| } |
| |
| final Token t = myLexer.getNextToken(); |
| if (t.specialToken != null) { |
| myTokenQueue.addFirst(t); |
| for (Token s = t.specialToken; s != null; s = s.specialToken) { |
| myTokenQueue.addFirst(s); |
| } |
| return myTokenQueue.removeFirst(); |
| } else { |
| return t; |
| } |
| } |
| |
| @Deprecated |
| public char[] getBuffer() { |
| return CharArrayUtil.fromSequence(myBuffer); |
| } |
| |
| @NotNull |
| @Override |
| public CharSequence getBufferSequence() { |
| return myBuffer; |
| } |
| |
| @Override |
| public int getBufferEnd() { |
| return myEndOffset; |
| } |
| |
| @Override |
| public int getState() { |
| try { |
| return (Integer)myStateField.get(myLexer); |
| } catch (Exception e) { |
| return -1; |
| } |
| } |
| |
| @Override |
| public int getTokenEnd() { |
| return myCurrentEnd; |
| } |
| |
| @Override |
| public int getTokenStart() { |
| return myCurrentToken == null ? 0 : myCurrentOffset; |
| } |
| |
| @Override |
| @Nullable |
| public IElementType getTokenType() { |
| if (myCurrentToken == null) { |
| return null; |
| } else { |
| return myCurrentTokenType; |
| } |
| } |
| |
| @Deprecated |
| public void start(char[] buffer, int startOffset, int endOffset, int initialState) { |
| myBuffer = new CharArrayCharSequence(buffer, startOffset, endOffset); |
| |
| final CharArrayReader reader = new CharArrayReader(buffer, startOffset, endOffset - startOffset); |
| init(startOffset, endOffset, reader, initialState); |
| } |
| |
| @Override |
| @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") |
| public void start(@NotNull CharSequence buffer, int startOffset, int endOffset, int initialState) { |
| myBuffer = buffer; |
| |
| final Reader reader = new CharSequenceReader(buffer, startOffset, endOffset); |
| init(startOffset, endOffset, reader, initialState); |
| } |
| |
| @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") |
| private void init(int startOffset, int endOffset, Reader reader, int initialState) { |
| myEndOffset = endOffset; |
| myLengthMap = new TIntIntHashMap(); |
| |
| myLexer = createTokenManager(initialState, new EscapePreprocessor(reader, startOffset, myLengthMap)); |
| |
| myCurrentToken = START; |
| myCurrentOffset = startOffset; |
| myCurrentEnd = startOffset; |
| myTokenQueue.clear(); |
| advance(); |
| } |
| |
| private static CompactSyntaxTokenManager createTokenManager(int initialState, EscapePreprocessor preprocessor) { |
| try { |
| return new CompactSyntaxTokenManager(new SimpleCharStream(preprocessor, 1, 1), initialState); |
| } catch (NoSuchMethodError e) { |
| final Class<CompactSyntaxTokenManager> managerClass = CompactSyntaxTokenManager.class; |
| LOG.error("Unsupported version of RNGOM in classpath. Please check your IDEA and JDK installation.", e, |
| "Actual parameter types: " + Arrays.toString(managerClass.getConstructors()[0].getParameterTypes()), |
| "Location of " + managerClass.getName() + ": " + getSourceLocation(managerClass), |
| "Location of " + CharStream.class.getName() + ": " + getSourceLocation(CharStream.class)); |
| throw e; |
| } |
| } |
| |
| private static String getSourceLocation(Class<?> clazz) { |
| final CodeSource source = clazz.getProtectionDomain().getCodeSource(); |
| if (source != null) { |
| final URL location = source.getLocation(); |
| if (location != null) { |
| return location.toExternalForm(); |
| } |
| } |
| final String name = clazz.getName().replace('.', '/') + ".class"; |
| final ClassLoader loader = clazz.getClassLoader(); |
| final URL resource = loader != null ? loader.getResource(name) : ClassLoader.getSystemResource(name); |
| return resource != null ? resource.toExternalForm() : "<unknown>"; |
| } |
| |
| public static void main(String[] args) throws IOException { |
| final CompactSyntaxLexerAdapter lexer = new CompactSyntaxLexerAdapter(); |
| lexer.start(new CharArrayCharSequence(FileUtil.adaptiveLoadText(new FileReader(args[0])))); |
| while (lexer.getTokenType() != null) { |
| System.out.println("token = " + lexer.getTokenType()); |
| final int start = lexer.getTokenStart(); |
| System.out.println("start = " + start); |
| final int end = lexer.getTokenEnd(); |
| System.out.println("end = " + end); |
| final CharSequence t = lexer.getBufferSequence().subSequence(start, end); |
| System.out.println("t = " + t); |
| lexer.advance(); |
| } |
| } |
| |
| // adapted from com.intellij.util.text.CharSequenceReader with start- and endOffset support |
| static class CharSequenceReader extends Reader { |
| private final CharSequence myText; |
| private final int myEndOffset; |
| private int myCurPos; |
| |
| public CharSequenceReader(final CharSequence text, int startOffset, int endOffset) { |
| myText = text; |
| myEndOffset = endOffset; |
| myCurPos = startOffset; |
| } |
| |
| @Override |
| public void close() { |
| } |
| |
| @Override |
| public int read(char[] cbuf, int off, int len) { |
| if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0)) { |
| throw new IndexOutOfBoundsException(); |
| } else if (len == 0) { |
| return 0; |
| } |
| |
| if (myText instanceof CharArrayCharSequence) { // Optimization |
| final int readChars = ((CharArrayCharSequence)myText).readCharsTo(myCurPos, cbuf, off, len); |
| if (readChars < 0) return -1; |
| myCurPos += readChars; |
| return readChars; |
| } |
| |
| int charsToCopy = Math.min(len, myEndOffset - myCurPos); |
| if (charsToCopy <= 0) return -1; |
| |
| for (int n = 0; n < charsToCopy; n++) { |
| cbuf[n + off] = myText.charAt(n + myCurPos); |
| } |
| |
| myCurPos += charsToCopy; |
| return charsToCopy; |
| } |
| |
| @Override |
| public int read() { |
| if (myCurPos >= myEndOffset) return -1; |
| return myText.charAt(myCurPos++); |
| } |
| } |
| } |