xml/relaxng/src/org/intellij/plugins/relaxNG/compact/lexer/EscapePreprocessor.java - platform/tools/idea - Git at Google

 /*
  * Copyright 2007 Sascha Weinreuter
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.intellij.plugins.relaxNG.compact.lexer;

 import gnu.trove.TIntArrayList;
 import gnu.trove.TIntIntHashMap;
 import org.kohsuke.rngom.util.Utf16;

 import java.io.FilterReader;
 import java.io.IOException;
 import java.io.Reader;

 /**
  * A reader that deals with escape sequences in RNC files (\x{xx}) and keeps track of their positions to build correct
  * token ranges in the lexer.
  * <p/>
  * Created by IntelliJ IDEA.
  * User: sweinreuter
  * Date: 05.08.2007
  */
 class EscapePreprocessor extends FilterReader {
   private final TIntArrayList myQueuedChars;
   private final TIntIntHashMap myLengthMap;

   private int myOffset;

   public EscapePreprocessor(Reader reader, int startOffset, TIntIntHashMap map) {
     super(reader);
     myOffset = startOffset;
     myQueuedChars = new TIntArrayList();
     myLengthMap = map;
   }

   @Override
   public int read(char cbuf[], int off, int len) throws IOException {
     final int i = read();
     if (i == -1) {
       return -1;
     }
     cbuf[off] = (char)i; // not really efficient, but acceptable since we're usually not having to deal with megabytes of RNC files...
     return 1;
   }

   @Override
   public int read() throws IOException {
     if (myQueuedChars.size() > 0) {
       return consume();
     }
     final int i = super.read();
     if (i == -1) {
       return -1;
     }
     myOffset++;

     switch (i) {
       case '\r':
         assert false : "Unexpected newline character";  // IDEA document's are supposed to newlines normalized to \n
         if (peek() == '\n') {
           consume();
           myLengthMap.put(myOffset - 1, 2);
         }
       case '\n':
         return '\u0000';

       case '\\':
         int n = 0;
         int x;
         while ((x = peek()) == 'x') {
           n++;
         }
         if (n > 0 && x == '{') {
           n++;
         } else {
           return i;
         }
         int value = 0;
         while (isHexChar((char)(x = peek()))) {
           n++;
           value <<= 4;
           value |= Character.digit(x, 16);
         }
         if (x == '}') {
           n++;
         }
         consume(n);

         myLengthMap.put(myOffset - 1, n);
         myOffset += n;

         if (value <= 0xFFFF) {
           return value;
         }

         myQueuedChars.add(Utf16.surrogate2(value));
         return Utf16.surrogate1(value);
     }

     return i;
   }

   private static boolean isHexChar(char i) {
     if (Character.isDigit(i)) return true;
     final char c = Character.toLowerCase(i);
     return c >= 'a' && c <= 'f';
   }

   private int consume() {
     if (myQueuedChars.size() > 0) {
       myOffset++;
       return myQueuedChars.remove(0);
     }
     return -1;
   }

   private void consume(int n) {
     myQueuedChars.remove(0, n);
   }

   private int peek() throws IOException {
     final int i = super.read();
     if (i == -1) {
       return -1;
     }
     myQueuedChars.add(i);
     return i;
   }
 }
	/*
	* Copyright 2007 Sascha Weinreuter
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.intellij.plugins.relaxNG.compact.lexer;

	import gnu.trove.TIntArrayList;
	import gnu.trove.TIntIntHashMap;
	import org.kohsuke.rngom.util.Utf16;

	import java.io.FilterReader;
	import java.io.IOException;
	import java.io.Reader;

	/**
	* A reader that deals with escape sequences in RNC files (\x{xx}) and keeps track of their positions to build correct
	* token ranges in the lexer.
	* <p/>
	* Created by IntelliJ IDEA.
	* User: sweinreuter
	* Date: 05.08.2007
	*/
	class EscapePreprocessor extends FilterReader {
	private final TIntArrayList myQueuedChars;
	private final TIntIntHashMap myLengthMap;

	private int myOffset;

	public EscapePreprocessor(Reader reader, int startOffset, TIntIntHashMap map) {
	super(reader);
	myOffset = startOffset;
	myQueuedChars = new TIntArrayList();
	myLengthMap = map;
	}

	@Override
	public int read(char cbuf[], int off, int len) throws IOException {
	final int i = read();
	if (i == -1) {
	return -1;
	}
	cbuf[off] = (char)i; // not really efficient, but acceptable since we're usually not having to deal with megabytes of RNC files...
	return 1;
	}

	@Override
	public int read() throws IOException {
	if (myQueuedChars.size() > 0) {
	return consume();
	}
	final int i = super.read();
	if (i == -1) {
	return -1;
	}
	myOffset++;

	switch (i) {
	case '\r':
	assert false : "Unexpected newline character"; // IDEA document's are supposed to newlines normalized to \n
	if (peek() == '\n') {
	consume();
	myLengthMap.put(myOffset - 1, 2);
	}
	case '\n':
	return '\u0000';

	case '\\':
	int n = 0;
	int x;
	while ((x = peek()) == 'x') {
	n++;
	}
	if (n > 0 && x == '{') {
	n++;
	} else {
	return i;
	}
	int value = 0;
	while (isHexChar((char)(x = peek()))) {
	n++;
	value <<= 4;
	value \|= Character.digit(x, 16);
	}
	if (x == '}') {
	n++;
	}
	consume(n);

	myLengthMap.put(myOffset - 1, n);
	myOffset += n;

	if (value <= 0xFFFF) {
	return value;
	}

	myQueuedChars.add(Utf16.surrogate2(value));
	return Utf16.surrogate1(value);
	}

	return i;
	}

	private static boolean isHexChar(char i) {
	if (Character.isDigit(i)) return true;
	final char c = Character.toLowerCase(i);
	return c >= 'a' && c <= 'f';
	}

	private int consume() {
	if (myQueuedChars.size() > 0) {
	myOffset++;
	return myQueuedChars.remove(0);
	}
	return -1;
	}

	private void consume(int n) {
	myQueuedChars.remove(0, n);
	}

	private int peek() throws IOException {
	final int i = super.read();
	if (i == -1) {
	return -1;
	}
	myQueuedChars.add(i);
	return i;
	}
	}