blob: d759d3291763b8cfb961cba6c517563d2264b418 [file] [log] [blame]
/*
* Copyright 2007 Sascha Weinreuter
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.intellij.plugins.relaxNG.compact.lexer;
import gnu.trove.TIntArrayList;
import gnu.trove.TIntIntHashMap;
import org.kohsuke.rngom.util.Utf16;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;
/**
* A reader that deals with escape sequences in RNC files (\x{xx}) and keeps track of their positions to build correct
* token ranges in the lexer.
* <p/>
* Created by IntelliJ IDEA.
* User: sweinreuter
* Date: 05.08.2007
*/
class EscapePreprocessor extends FilterReader {
private final TIntArrayList myQueuedChars;
private final TIntIntHashMap myLengthMap;
private int myOffset;
public EscapePreprocessor(Reader reader, int startOffset, TIntIntHashMap map) {
super(reader);
myOffset = startOffset;
myQueuedChars = new TIntArrayList();
myLengthMap = map;
}
@Override
public int read(char cbuf[], int off, int len) throws IOException {
final int i = read();
if (i == -1) {
return -1;
}
cbuf[off] = (char)i; // not really efficient, but acceptable since we're usually not having to deal with megabytes of RNC files...
return 1;
}
@Override
public int read() throws IOException {
if (myQueuedChars.size() > 0) {
return consume();
}
final int i = super.read();
if (i == -1) {
return -1;
}
myOffset++;
switch (i) {
case '\r':
assert false : "Unexpected newline character"; // IDEA document's are supposed to newlines normalized to \n
if (peek() == '\n') {
consume();
myLengthMap.put(myOffset - 1, 2);
}
case '\n':
return '\u0000';
case '\\':
int n = 0;
int x;
while ((x = peek()) == 'x') {
n++;
}
if (n > 0 && x == '{') {
n++;
} else {
return i;
}
int value = 0;
while (isHexChar((char)(x = peek()))) {
n++;
value <<= 4;
value |= Character.digit(x, 16);
}
if (x == '}') {
n++;
}
consume(n);
myLengthMap.put(myOffset - 1, n);
myOffset += n;
if (value <= 0xFFFF) {
return value;
}
myQueuedChars.add(Utf16.surrogate2(value));
return Utf16.surrogate1(value);
}
return i;
}
private static boolean isHexChar(char i) {
if (Character.isDigit(i)) return true;
final char c = Character.toLowerCase(i);
return c >= 'a' && c <= 'f';
}
private int consume() {
if (myQueuedChars.size() > 0) {
myOffset++;
return myQueuedChars.remove(0);
}
return -1;
}
private void consume(int n) {
myQueuedChars.remove(0, n);
}
private int peek() throws IOException {
final int i = super.read();
if (i == -1) {
return -1;
}
myQueuedChars.add(i);
return i;
}
}