src/test/java/org/pyyaml/CanonicalScanner.java - platform/external/snakeyaml - Git at Google

 /*
  * See LICENSE file in distribution for copyright and licensing information.
  */
 package org.pyyaml;

 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;

 import org.yaml.snakeyaml.error.Mark;
 import org.yaml.snakeyaml.scanner.Scanner;
 import org.yaml.snakeyaml.scanner.ScannerImpl;
 import org.yaml.snakeyaml.tokens.AliasToken;
 import org.yaml.snakeyaml.tokens.AnchorToken;
 import org.yaml.snakeyaml.tokens.DirectiveToken;
 import org.yaml.snakeyaml.tokens.DocumentStartToken;
 import org.yaml.snakeyaml.tokens.FlowEntryToken;
 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
 import org.yaml.snakeyaml.tokens.KeyToken;
 import org.yaml.snakeyaml.tokens.ScalarToken;
 import org.yaml.snakeyaml.tokens.StreamEndToken;
 import org.yaml.snakeyaml.tokens.StreamStartToken;
 import org.yaml.snakeyaml.tokens.TagToken;
 import org.yaml.snakeyaml.tokens.Token;
 import org.yaml.snakeyaml.tokens.ValueToken;

 public class CanonicalScanner implements Scanner {
     private static final String DIRECTIVE = "%YAML 1.1";
     private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;

     private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;

     private String data;
     private int index;
     public LinkedList<Token> tokens;
     private boolean scanned;
     private Mark mark;

     public CanonicalScanner(String data) {
         this.data = data + "\0";
         this.index = 0;
         this.tokens = new LinkedList<Token>();
         this.scanned = false;
         this.mark = new Mark("test", 0, 0, 0, data, 0);
     }

     public boolean checkToken(List<Class<? extends Token>> choices) {
         if (!scanned) {
             scan();
         }
         if (!tokens.isEmpty()) {
             if (choices.isEmpty()) {
                 return true;
             }
             Token first = this.tokens.get(0);
             for (Class<? extends Token> choice : choices) {
                 if (choice.isInstance(first)) {
                     return true;
                 }
             }
         }
         return false;
     }

     public boolean checkToken(Class<? extends Token> choice) {
         List<Class<? extends Token>> list = new ArrayList<Class<? extends Token>>();
         list.add(choice);
         return checkToken(list);
     }

     public Token peekToken() {
         if (!scanned) {
             scan();
         }
         if (!tokens.isEmpty()) {
             return this.tokens.get(0);
         }
         return null;
     }

     public Token getToken() {
         if (!scanned) {
             scan();
         }
         return this.tokens.poll();
     }

     public Token getToken(Class<? extends Token> choice) {
         Token token = getToken();
         if (choice != null && !choice.isInstance(token)) {
             throw new CanonicalException("unexpected token " + token);
         }
         return token;
     }

     private void scan() {
         this.tokens.add(new StreamStartToken(mark, mark));
         boolean stop = false;
         while (!stop) {
             findToken();
             char ch = data.charAt(index);
             switch (ch) {
             case '\0':
                 tokens.add(new StreamEndToken(mark, mark));
                 stop = true;
                 break;

             case '%':
                 tokens.add(scanDirective());
                 break;

             case '-':
                 if ("---".equals(data.substring(index, index + 3))) {
                     index += 3;
                     tokens.add(new DocumentStartToken(mark, mark));
                 }
                 break;

             case '[':
                 index++;
                 tokens.add(new FlowSequenceStartToken(mark, mark));
                 break;

             case '{':
                 index++;
                 tokens.add(new FlowMappingStartToken(mark, mark));
                 break;

             case ']':
                 index++;
                 tokens.add(new FlowSequenceEndToken(mark, mark));
                 break;

             case '}':
                 index++;
                 tokens.add(new FlowMappingEndToken(mark, mark));
                 break;

             case '?':
                 index++;
                 tokens.add(new KeyToken(mark, mark));
                 break;

             case ':':
                 index++;
                 tokens.add(new ValueToken(mark, mark));
                 break;

             case ',':
                 index++;
                 tokens.add(new FlowEntryToken(mark, mark));
                 break;

             case '*':
                 tokens.add(scanAlias());
                 break;

             case '&':
                 tokens.add(scanAlias());
                 break;

             case '!':
                 tokens.add(scanTag());
                 break;

             case '"':
                 tokens.add(scanScalar());
                 break;

             default:
                 throw new CanonicalException("invalid token");
             }
         }
         scanned = true;
     }

     private Token scanDirective() {
         String chunk1 = data.substring(index, index + DIRECTIVE.length());
         char chunk2 = data.charAt(index + DIRECTIVE.length());
         if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
             index += DIRECTIVE.length();
             List<Integer> implicit = new ArrayList<Integer>(2);
             implicit.add(new Integer(1));
             implicit.add(new Integer(1));
             return new DirectiveToken("YAML", implicit, mark, mark);
         } else {
             throw new CanonicalException("invalid directive");
         }
     }

     private Token scanAlias() {
         boolean isTokenClassAlias;
         if (data.charAt(index) == '*') {
             isTokenClassAlias = true;
         } else {
             isTokenClassAlias = false;
         }
         index++;
         int start = index;
         while (", \n\0".indexOf(data.charAt(index)) == -1) {
             index++;
         }
         String value = data.substring(start, index);
         Token token;
         if (isTokenClassAlias) {
             token = new AliasToken(value, mark, mark);
         } else {
             token = new AnchorToken(value, mark, mark);
         }
         return token;
     }

     private Token scanTag() {
         index++;
         int start = index;
         while (" \n\0".indexOf(data.charAt(index)) == -1) {
             index++;
         }
         String value = data.substring(start, index);
         if (value.length() == 0) {
             value = "!";
         } else if (value.charAt(0) == '!') {
             value = "tag:yaml.org,2002:" + value.substring(1);
         } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
             value = value.substring(1, value.length() - 1);
         } else {
             value = "!" + value;
         }
         return new TagToken(new String[] { "", value }, mark, mark);
     }

     private Token scanScalar() {
         index++;
         StringBuffer chunks = new StringBuffer();
         int start = index;
         boolean ignoreSpaces = false;
         while (data.charAt(index) != '"') {
             if (data.charAt(index) == '\\') {
                 ignoreSpaces = false;
                 chunks.append(data.substring(start, index));
                 index++;
                 char ch = data.charAt(index);
                 index++;
                 if (ch == '\n') {
                     ignoreSpaces = true;
                 } else if (QUOTE_CODES.keySet().contains(ch)) {
                     int length = QUOTE_CODES.get(ch);
                     int code = Integer.parseInt(data.substring(index, index + length), 16);
                     chunks.append(String.valueOf((char) code));
                     index += length;
                 } else {
                     if (!QUOTE_REPLACES.keySet().contains(ch)) {
                         throw new CanonicalException("invalid escape code");
                     }
                     chunks.append(QUOTE_REPLACES.get(ch));
                 }
                 start = index;
             } else if (data.charAt(index) == '\n') {
                 chunks.append(data.substring(start, index));
                 chunks.append(" ");
                 index++;
                 start = index;
                 ignoreSpaces = true;
             } else if (ignoreSpaces && data.charAt(index) == ' ') {
                 index++;
                 start = index;
             } else {
                 ignoreSpaces = false;
                 index++;
             }
         }
         chunks.append(data.substring(start, index));
         index++;
         return new ScalarToken(chunks.toString(), mark, mark, false);
     }

     private void findToken() {
         boolean found = false;
         while (!found) {
             while (" \t".indexOf(data.charAt(index)) != -1) {
                 index++;
             }
             if (data.charAt(index) == '#') {
                 while (data.charAt(index) != '\n') {
                     index++;
                 }
             }
             if (data.charAt(index) == '\n') {
                 index++;
             } else {
                 found = true;
             }
         }
     }
 }
	/*
	* See LICENSE file in distribution for copyright and licensing information.
	*/
	package org.pyyaml;

	import java.util.ArrayList;
	import java.util.LinkedList;
	import java.util.List;
	import java.util.Map;

	import org.yaml.snakeyaml.error.Mark;
	import org.yaml.snakeyaml.scanner.Scanner;
	import org.yaml.snakeyaml.scanner.ScannerImpl;
	import org.yaml.snakeyaml.tokens.AliasToken;
	import org.yaml.snakeyaml.tokens.AnchorToken;
	import org.yaml.snakeyaml.tokens.DirectiveToken;
	import org.yaml.snakeyaml.tokens.DocumentStartToken;
	import org.yaml.snakeyaml.tokens.FlowEntryToken;
	import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
	import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
	import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
	import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
	import org.yaml.snakeyaml.tokens.KeyToken;
	import org.yaml.snakeyaml.tokens.ScalarToken;
	import org.yaml.snakeyaml.tokens.StreamEndToken;
	import org.yaml.snakeyaml.tokens.StreamStartToken;
	import org.yaml.snakeyaml.tokens.TagToken;
	import org.yaml.snakeyaml.tokens.Token;
	import org.yaml.snakeyaml.tokens.ValueToken;

	public class CanonicalScanner implements Scanner {
	private static final String DIRECTIVE = "%YAML 1.1";
	private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;

	private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;

	private String data;
	private int index;
	public LinkedList<Token> tokens;
	private boolean scanned;
	private Mark mark;

	public CanonicalScanner(String data) {
	this.data = data + "\0";
	this.index = 0;
	this.tokens = new LinkedList<Token>();
	this.scanned = false;
	this.mark = new Mark("test", 0, 0, 0, data, 0);
	}

	public boolean checkToken(List<Class<? extends Token>> choices) {
	if (!scanned) {
	scan();
	}
	if (!tokens.isEmpty()) {
	if (choices.isEmpty()) {
	return true;
	}
	Token first = this.tokens.get(0);
	for (Class<? extends Token> choice : choices) {
	if (choice.isInstance(first)) {
	return true;
	}
	}
	}
	return false;
	}

	public boolean checkToken(Class<? extends Token> choice) {
	List<Class<? extends Token>> list = new ArrayList<Class<? extends Token>>();
	list.add(choice);
	return checkToken(list);
	}

	public Token peekToken() {
	if (!scanned) {
	scan();
	}
	if (!tokens.isEmpty()) {
	return this.tokens.get(0);
	}
	return null;
	}

	public Token getToken() {
	if (!scanned) {
	scan();
	}
	return this.tokens.poll();
	}

	public Token getToken(Class<? extends Token> choice) {
	Token token = getToken();
	if (choice != null && !choice.isInstance(token)) {
	throw new CanonicalException("unexpected token " + token);
	}
	return token;
	}

	private void scan() {
	this.tokens.add(new StreamStartToken(mark, mark));
	boolean stop = false;
	while (!stop) {
	findToken();
	char ch = data.charAt(index);
	switch (ch) {
	case '\0':
	tokens.add(new StreamEndToken(mark, mark));
	stop = true;
	break;

	case '%':
	tokens.add(scanDirective());
	break;

	case '-':
	if ("---".equals(data.substring(index, index + 3))) {
	index += 3;
	tokens.add(new DocumentStartToken(mark, mark));
	}
	break;

	case '[':
	index++;
	tokens.add(new FlowSequenceStartToken(mark, mark));
	break;

	case '{':
	index++;
	tokens.add(new FlowMappingStartToken(mark, mark));
	break;

	case ']':
	index++;
	tokens.add(new FlowSequenceEndToken(mark, mark));
	break;

	case '}':
	index++;
	tokens.add(new FlowMappingEndToken(mark, mark));
	break;

	case '?':
	index++;
	tokens.add(new KeyToken(mark, mark));
	break;

	case ':':
	index++;
	tokens.add(new ValueToken(mark, mark));
	break;

	case ',':
	index++;
	tokens.add(new FlowEntryToken(mark, mark));
	break;

	case '*':
	tokens.add(scanAlias());
	break;

	case '&':
	tokens.add(scanAlias());
	break;

	case '!':
	tokens.add(scanTag());
	break;

	case '"':
	tokens.add(scanScalar());
	break;

	default:
	throw new CanonicalException("invalid token");
	}
	}
	scanned = true;
	}

	private Token scanDirective() {
	String chunk1 = data.substring(index, index + DIRECTIVE.length());
	char chunk2 = data.charAt(index + DIRECTIVE.length());
	if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
	index += DIRECTIVE.length();
	List<Integer> implicit = new ArrayList<Integer>(2);
	implicit.add(new Integer(1));
	implicit.add(new Integer(1));
	return new DirectiveToken("YAML", implicit, mark, mark);
	} else {
	throw new CanonicalException("invalid directive");
	}
	}

	private Token scanAlias() {
	boolean isTokenClassAlias;
	if (data.charAt(index) == '*') {
	isTokenClassAlias = true;
	} else {
	isTokenClassAlias = false;
	}
	index++;
	int start = index;
	while (", \n\0".indexOf(data.charAt(index)) == -1) {
	index++;
	}
	String value = data.substring(start, index);
	Token token;
	if (isTokenClassAlias) {
	token = new AliasToken(value, mark, mark);
	} else {
	token = new AnchorToken(value, mark, mark);
	}
	return token;
	}

	private Token scanTag() {
	index++;
	int start = index;
	while (" \n\0".indexOf(data.charAt(index)) == -1) {
	index++;
	}
	String value = data.substring(start, index);
	if (value.length() == 0) {
	value = "!";
	} else if (value.charAt(0) == '!') {
	value = "tag:yaml.org,2002:" + value.substring(1);
	} else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
	value = value.substring(1, value.length() - 1);
	} else {
	value = "!" + value;
	}
	return new TagToken(new String[] { "", value }, mark, mark);
	}

	private Token scanScalar() {
	index++;
	StringBuffer chunks = new StringBuffer();
	int start = index;
	boolean ignoreSpaces = false;
	while (data.charAt(index) != '"') {
	if (data.charAt(index) == '\\') {
	ignoreSpaces = false;
	chunks.append(data.substring(start, index));
	index++;
	char ch = data.charAt(index);
	index++;
	if (ch == '\n') {
	ignoreSpaces = true;
	} else if (QUOTE_CODES.keySet().contains(ch)) {
	int length = QUOTE_CODES.get(ch);
	int code = Integer.parseInt(data.substring(index, index + length), 16);
	chunks.append(String.valueOf((char) code));
	index += length;
	} else {
	if (!QUOTE_REPLACES.keySet().contains(ch)) {
	throw new CanonicalException("invalid escape code");
	}
	chunks.append(QUOTE_REPLACES.get(ch));
	}
	start = index;
	} else if (data.charAt(index) == '\n') {
	chunks.append(data.substring(start, index));
	chunks.append(" ");
	index++;
	start = index;
	ignoreSpaces = true;
	} else if (ignoreSpaces && data.charAt(index) == ' ') {
	index++;
	start = index;
	} else {
	ignoreSpaces = false;
	index++;
	}
	}
	chunks.append(data.substring(start, index));
	index++;
	return new ScalarToken(chunks.toString(), mark, mark, false);
	}

	private void findToken() {
	boolean found = false;
	while (!found) {
	while (" \t".indexOf(data.charAt(index)) != -1) {
	index++;
	}
	if (data.charAt(index) == '#') {
	while (data.charAt(index) != '\n') {
	index++;
	}
	}
	if (data.charAt(index) == '\n') {
	index++;
	} else {
	found = true;
	}
	}
	}
	}