| /* |
| * Copyright (C) 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.clearsilver.jsilver.data; |
| |
| import com.google.clearsilver.jsilver.resourceloader.ResourceLoader; |
| |
| import java.io.IOException; |
| import java.io.LineNumberReader; |
| import java.io.Reader; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.Stack; |
| |
| /** |
| * Parser for HDF based on the following grammar by Brandon Long. |
| * |
| * COMMAND := (INCLUDE | COMMENT | HDF_SET | HDF_DESCEND | HDF_ASCEND ) INCLUDE := #include |
| * "FILENAME" EOL COMMENT := # .* EOL HDF_DESCEND := HDF_NAME_ATTRS { EOL HDF_ASCEND := } EOL |
| * HDF_SET := (HDF_ASSIGN | HDF_MULTILINE_ASSIGN | HDF_COPY | HDF_LINK) HDF_ASSIGN := HDF_NAME_ATTRS |
| * = .* EOL HDF_MULTILINE_ASSIGN := HDF_NAME_ATTRS << EOM_MARKER EOL (.* EOL)* EOM_MARKER EOL |
| * HDF_COPY := HDF_NAME_ATTRS := HDF_NAME EOL HDF_LINK := HDF_NAME_ATTRS : HDF_NAME EOL |
| * HDF_NAME_ATTRS := (HDF_NAME | HDF_NAME [HDF_ATTRS]) HDF_ATTRS := (HDF_ATTR | HDF_ATTR, HDF_ATTRS) |
| * HDF_ATTR := (HDF_ATTR_KEY | HDF_ATTR_KEY = [^\s,\]]+ | HDF_ATTR_KEY = DQUOTED_STRING) |
| * HDF_ATTR_KEY := [0-9a-zA-Z]+ DQUOTED_STRING := "([^\\"]|\\[ntr]|\\.)*" HDF_NAME := (HDF_SUB_NAME |
| * | HDF_SUB_NAME\.HDF_NAME) HDF_SUB_NAME := [0-9a-zA-Z_]+ EOM_MARKER := \S.*\S EOL := \n |
| */ |
| public class NewHdfParser implements Parser { |
| |
| private final StringInternStrategy internStrategy; |
| |
| /** |
| * Special exception used to detect when we unexpectedly run out of characters on the line. |
| */ |
| private static class OutOfCharsException extends Exception {} |
| |
| /** |
| * Object used to hold the name and attributes of an HDF node before we are ready to commit it to |
| * the Data object. |
| */ |
| private static class HdfNameAttrs { |
| String name; |
| ArrayList<String> attrs = null; |
| int endOfSequence; |
| |
| void reset(String newname) { |
| // TODO: think about moving interning here instead of parser code |
| this.name = newname; |
| if (attrs != null) { |
| attrs.clear(); |
| } |
| endOfSequence = 0; |
| } |
| |
| void addAttribute(String key, String value) { |
| if (attrs == null) { |
| attrs = new ArrayList<String>(10); |
| } |
| attrs.ensureCapacity(attrs.size() + 2); |
| // TODO: think about moving interning here instead of parser code |
| attrs.add(key); |
| attrs.add(value); |
| } |
| |
| Data toData(Data data) { |
| Data child = data.createChild(name); |
| if (attrs != null) { |
| Iterator<String> it = attrs.iterator(); |
| while (it.hasNext()) { |
| String key = it.next(); |
| String value = it.next(); |
| child.setAttribute(key, value); |
| } |
| } |
| return child; |
| } |
| } |
| |
| static final String UNNAMED_INPUT = "[UNNAMED_INPUT]"; |
| |
| /** |
| * State information that we pass through the parse methods. Allows parser to be reentrant as all |
| * the state is passed through method calls. |
| */ |
| static class ParseState { |
| final Stack<Data> context = new Stack<Data>(); |
| final Data output; |
| final LineNumberReader lineReader; |
| final ErrorHandler errorHandler; |
| final ResourceLoader resourceLoader; |
| final NewHdfParser hdfParser; |
| final boolean ignoreAttributes; |
| final HdfNameAttrs hdfNameAttrs; |
| final UniqueStack<String> includeStack; |
| final String parsedFileName; |
| |
| String line; |
| Data currentNode; |
| |
| private ParseState(Data output, LineNumberReader lineReader, ErrorHandler errorHandler, |
| ResourceLoader resourceLoader, NewHdfParser hdfParser, String parsedFileName, |
| boolean ignoreAttributes, HdfNameAttrs hdfNameAttrs, UniqueStack<String> includeStack) { |
| this.lineReader = lineReader; |
| this.errorHandler = errorHandler; |
| this.output = output; |
| currentNode = output; |
| this.resourceLoader = resourceLoader; |
| this.hdfParser = hdfParser; |
| this.parsedFileName = parsedFileName; |
| this.ignoreAttributes = ignoreAttributes; |
| this.hdfNameAttrs = hdfNameAttrs; |
| this.includeStack = includeStack; |
| } |
| |
| public static ParseState createNewParseState(Data output, Reader reader, |
| ErrorHandler errorHandler, ResourceLoader resourceLoader, NewHdfParser hdfParser, |
| String parsedFileName, boolean ignoreAttributes) { |
| |
| if (parsedFileName == null) { |
| parsedFileName = UNNAMED_INPUT; |
| } |
| UniqueStack<String> includeStack = new UniqueStack<String>(); |
| includeStack.push(parsedFileName); |
| |
| return new ParseState(output, new LineNumberReader(reader), errorHandler, resourceLoader, |
| hdfParser, parsedFileName, ignoreAttributes, new HdfNameAttrs(), includeStack); |
| } |
| |
| public static ParseState createParseStateForIncludedFile(ParseState originalState, |
| String includeFileName, Reader includeFileReader) { |
| return new ParseState(originalState.output, new LineNumberReader(includeFileReader), |
| originalState.errorHandler, originalState.resourceLoader, originalState.hdfParser, |
| originalState.parsedFileName, originalState.ignoreAttributes, new HdfNameAttrs(), |
| originalState.includeStack); |
| } |
| } |
| |
| |
| /** |
| * Constructor for {@link NewHdfParser}. |
| * |
| * @param internPool - {@link StringInternStrategy} instance used to optimize the HDF parsing. |
| */ |
| public NewHdfParser(StringInternStrategy internPool) { |
| this.internStrategy = internPool; |
| } |
| |
| private static class NewHdfParserFactory implements ParserFactory { |
| private final StringInternStrategy stringInternStrategy; |
| |
| public NewHdfParserFactory(StringInternStrategy stringInternStrategy) { |
| this.stringInternStrategy = stringInternStrategy; |
| } |
| |
| @Override |
| public Parser newInstance() { |
| return new NewHdfParser(stringInternStrategy); |
| } |
| } |
| |
| /** |
| * Creates a {@link ParserFactory} instance. |
| * |
| * <p> |
| * Provided {@code stringInternStrategy} instance will be used by shared all {@link Parser} |
| * objects created by the factory and used to optimize the HDF parsing process by reusing the |
| * String for keys and values. |
| * |
| * @param stringInternStrategy - {@link StringInternStrategy} instance used to optimize the HDF |
| * parsing. |
| * @return an instance of {@link ParserFactory} implementation. |
| */ |
| public static ParserFactory newFactory(StringInternStrategy stringInternStrategy) { |
| return new NewHdfParserFactory(stringInternStrategy); |
| } |
| |
| public void parse(Reader reader, Data output, Parser.ErrorHandler errorHandler, |
| ResourceLoader resourceLoader, String dataFileName, boolean ignoreAttributes) |
| throws IOException { |
| |
| parse(ParseState.createNewParseState(output, reader, errorHandler, resourceLoader, this, |
| dataFileName, ignoreAttributes)); |
| } |
| |
| private void parse(ParseState state) throws IOException { |
| while ((state.line = state.lineReader.readLine()) != null) { |
| String seq = stripWhitespace(state.line); |
| try { |
| parseCommand(seq, state); |
| } catch (OutOfCharsException e) { |
| reportError(state, "End of line was prematurely reached. Parse error."); |
| } |
| } |
| } |
| |
| private static final String INCLUDE_WS = "#include "; |
| |
| private void parseCommand(String seq, ParseState state) throws IOException, OutOfCharsException { |
| if (seq.length() == 0) { |
| // Empty line. |
| return; |
| } |
| if (charAt(seq, 0) == '#') { |
| // If there isn't a match on include then this is a comment and we do nothing. |
| if (matches(seq, 0, INCLUDE_WS)) { |
| // This is an include command |
| int start = skipLeadingWhitespace(seq, INCLUDE_WS.length()); |
| parseInclude(seq, start, state); |
| } |
| return; |
| } else if (charAt(seq, 0) == '}') { |
| if (skipLeadingWhitespace(seq, 1) != seq.length()) { |
| reportError(state, "Extra chars after '}'"); |
| return; |
| } |
| handleAscend(state); |
| } else { |
| parseHdfElement(seq, state); |
| } |
| } |
| |
| private void parseInclude(String seq, int start, ParseState state) throws IOException, |
| OutOfCharsException { |
| int end = seq.length(); |
| if (charAt(seq, start) == '"') { |
| if (charAt(seq, end - 1) == '"') { |
| start++; |
| end--; |
| } else { |
| reportError(state, "Missing '\"' at end of include"); |
| return; |
| } |
| } |
| handleInclude(seq.substring(start, end), state); |
| } |
| |
| private static final int NO_MATCH = -1; |
| |
| private void parseHdfElement(String seq, ParseState state) throws IOException, |
| OutOfCharsException { |
| // Re-use a single element to avoid repeated allocations/trashing (serious |
| // performance impact, 5% of real service performance) |
| HdfNameAttrs element = state.hdfNameAttrs; |
| if (!parseHdfNameAttrs(element, seq, 0, state)) { |
| return; |
| } |
| int index = skipLeadingWhitespace(seq, element.endOfSequence); |
| switch (charAt(seq, index)) { |
| case '{': |
| // Descend |
| if (index + 1 != seq.length()) { |
| reportError(state, "No characters expected after '{'"); |
| return; |
| } |
| handleDescend(state, element); |
| return; |
| case '=': |
| // Assignment |
| index = skipLeadingWhitespace(seq, index + 1); |
| String value = internStrategy.intern(seq.substring(index, seq.length())); |
| handleAssign(state, element, value); |
| return; |
| case ':': |
| if (charAt(seq, index + 1) == '=') { |
| // Copy |
| index = skipLeadingWhitespace(seq, index + 2); |
| String src = parseHdfName(seq, index); |
| if (src == null) { |
| reportError(state, "Invalid HDF name"); |
| return; |
| } |
| if (index + src.length() != seq.length()) { |
| reportError(state, "No characters expected after '{'"); |
| return; |
| } |
| handleCopy(state, element, src); |
| } else { |
| // Link |
| index = skipLeadingWhitespace(seq, index + 1); |
| String src = parseHdfName(seq, index); |
| if (src == null) { |
| reportError(state, "Invalid HDF name"); |
| return; |
| } |
| if (index + src.length() != seq.length()) { |
| reportError(state, "No characters expected after '{'"); |
| return; |
| } |
| handleLink(state, element, src); |
| } |
| return; |
| case '<': |
| if (charAt(seq, index + 1) != '<') { |
| reportError(state, "Expected '<<'"); |
| } |
| index = skipLeadingWhitespace(seq, index + 2); |
| String eomMarker = seq.substring(index, seq.length()); |
| // TODO: think about moving interning to handleAssign() |
| String multilineValue = internStrategy.intern(parseMultilineValue(state, eomMarker)); |
| if (multilineValue == null) { |
| return; |
| } |
| handleAssign(state, element, multilineValue); |
| return; |
| default: |
| reportError(state, "No valid operator"); |
| return; |
| } |
| } |
| |
| /** |
| * This method parses out an HDF element name and any optional attributes into a caller-supplied |
| * HdfNameAttrs object. It returns a {@code boolean} with whether it succeeded to parse. |
| */ |
| private boolean parseHdfNameAttrs(HdfNameAttrs destination, String seq, int index, |
| ParseState state) throws OutOfCharsException { |
| String hdfName = parseHdfName(seq, index); |
| if (hdfName == null) { |
| reportError(state, "Invalid HDF name"); |
| return false; |
| } |
| destination.reset(hdfName); |
| index = skipLeadingWhitespace(seq, index + hdfName.length()); |
| int end = parseAttributes(seq, index, state, destination); |
| if (end == NO_MATCH) { |
| // Error already reported below. |
| return false; |
| } else { |
| destination.endOfSequence = end; |
| return true; |
| } |
| } |
| |
| /** |
| * Parses a valid hdf path name. |
| */ |
| private String parseHdfName(String seq, int index) throws OutOfCharsException { |
| int end = index; |
| while (end < seq.length() && isHdfNameChar(charAt(seq, end))) { |
| end++; |
| } |
| if (end == index) { |
| return null; |
| } |
| return internStrategy.intern(seq.substring(index, end)); |
| } |
| |
| /** |
| * Looks for optional attributes and adds them to the HdfNameAttrs object passed into the method. |
| */ |
| private int parseAttributes(String seq, int index, ParseState state, HdfNameAttrs element) |
| throws OutOfCharsException { |
| if (charAt(seq, index) != '[') { |
| // No attributes to parse |
| return index; |
| } |
| index = skipLeadingWhitespace(seq, index + 1); |
| |
| // If we don't care about attributes, just skip over them. |
| if (state.ignoreAttributes) { |
| while (charAt(seq, index) != ']') { |
| index++; |
| } |
| return index + 1; |
| } |
| |
| boolean first = true; |
| do { |
| if (first) { |
| first = false; |
| } else if (charAt(seq, index) == ',') { |
| index = skipLeadingWhitespace(seq, index + 1); |
| } else { |
| reportError(state, "Error parsing attribute list"); |
| } |
| index = parseAttribute(seq, index, state, element); |
| if (index == NO_MATCH) { |
| // reportError called by parseAttribute already. |
| return NO_MATCH; |
| } |
| index = skipLeadingWhitespace(seq, index); |
| } while (charAt(seq, index) != ']'); |
| return index + 1; |
| } |
| |
| private static final String DEFAULT_ATTR_VALUE = "1"; |
| |
| /** |
| * Parse out a single HDF attribute. If there is no explicit value, use default value of "1" like |
| * in C clearsilver. Returns NO_MATCH if it fails to parse an attribute. |
| */ |
| private int parseAttribute(String seq, int index, ParseState state, HdfNameAttrs element) |
| throws OutOfCharsException { |
| int end = parseAttributeKey(seq, index); |
| if (index == end) { |
| reportError(state, "No valid attribute key"); |
| return NO_MATCH; |
| } |
| String attrKey = internStrategy.intern(seq.substring(index, end)); |
| index = skipLeadingWhitespace(seq, end); |
| if (charAt(seq, index) != '=') { |
| // No value for this attribute key. Use default value of "1" |
| element.addAttribute(attrKey, DEFAULT_ATTR_VALUE); |
| return index; |
| } |
| // We need to parse out the attribute value. |
| index = skipLeadingWhitespace(seq, index + 1); |
| if (charAt(seq, index) == '"') { |
| index++; |
| StringBuilder sb = new StringBuilder(); |
| end = parseQuotedAttributeValue(seq, index, sb); |
| if (end == NO_MATCH) { |
| reportError(state, "Unable to parse quoted attribute value"); |
| return NO_MATCH; |
| } |
| String attrValue = internStrategy.intern(sb.toString()); |
| element.addAttribute(attrKey, attrValue); |
| end++; |
| } else { |
| // Simple attribute that has no whitespace. |
| String attrValue = parseAttributeValue(seq, index, state); |
| if (attrValue == null || attrValue.length() == 0) { |
| reportError(state, "No attribute for key " + attrKey); |
| return NO_MATCH; |
| } |
| |
| attrValue = internStrategy.intern(attrValue); |
| element.addAttribute(attrKey, attrValue); |
| end = index + attrValue.length(); |
| } |
| return end; |
| } |
| |
| /** |
| * Returns the range in the sequence starting at start that corresponds to a valid attribute key. |
| */ |
| private int parseAttributeKey(String seq, int index) throws OutOfCharsException { |
| while (isAlphaNumericChar(charAt(seq, index))) { |
| index++; |
| } |
| return index; |
| } |
| |
| /** |
| * Parses a quoted attribute value. Unescapes octal characters and \n, \r, \t, \", etc. |
| */ |
| private int parseQuotedAttributeValue(String seq, int index, StringBuilder sb) |
| throws OutOfCharsException { |
| char c; |
| while ((c = charAt(seq, index)) != '"') { |
| if (c == '\\') { |
| // Escaped character. Look for 1 to 3 digits in a row as octal or n,t,r. |
| index++; |
| char next = charAt(seq, index); |
| if (isNumericChar(next)) { |
| // Parse the next 1 to 3 characters if they are digits. Treat it as an octal code. |
| int val = next - '0'; |
| if (isNumericChar(charAt(seq, index + 1))) { |
| index++; |
| val = val * 8 + (charAt(seq, index) - '0'); |
| if (isNumericChar(charAt(seq, index + 1))) { |
| index++; |
| val = val * 8 + (charAt(seq, index) - '0'); |
| } |
| } |
| c = (char) val; |
| } else if (next == 'n') { |
| c = '\n'; |
| } else if (next == 't') { |
| c = '\t'; |
| } else if (next == 'r') { |
| c = '\r'; |
| } else { |
| // Regular escaped char like " or / |
| c = next; |
| } |
| } |
| sb.append(c); |
| index++; |
| } |
| return index; |
| } |
| |
| /** |
| * Parses a simple attribute value that cannot have any whitespace or specific punctuation |
| * reserved by the HDF grammar. |
| */ |
| private String parseAttributeValue(String seq, int index, ParseState state) |
| throws OutOfCharsException { |
| int end = index; |
| char c = charAt(seq, end); |
| while (c != ',' && c != ']' && c != '"' && !Character.isWhitespace(c)) { |
| end++; |
| c = charAt(seq, end); |
| } |
| return seq.substring(index, end); |
| } |
| |
| private String parseMultilineValue(ParseState state, String eomMarker) throws IOException { |
| StringBuilder sb = new StringBuilder(256); |
| String line; |
| while ((line = state.lineReader.readLine()) != null) { |
| if (line.startsWith(eomMarker) |
| && skipLeadingWhitespace(line, eomMarker.length()) == line.length()) { |
| return sb.toString(); |
| } else { |
| sb.append(line).append('\n'); |
| } |
| } |
| reportError(state, "EOM " + eomMarker + " never found"); |
| return null; |
| } |
| |
| // ////////////////////////////////////////////////////////////////////////// |
| // |
| // Handlers |
| |
| private void handleDescend(ParseState state, HdfNameAttrs element) { |
| Data child = handleNodeCreation(state.currentNode, element); |
| state.context.push(state.currentNode); |
| state.currentNode = child; |
| } |
| |
| private Data handleNodeCreation(Data node, HdfNameAttrs element) { |
| return element.toData(node); |
| } |
| |
| private void handleAssign(ParseState state, HdfNameAttrs element, String value) { |
| // TODO: think about moving interning here |
| Data child = handleNodeCreation(state.currentNode, element); |
| child.setValue(value); |
| } |
| |
| private void handleCopy(ParseState state, HdfNameAttrs element, String srcName) { |
| Data child = handleNodeCreation(state.currentNode, element); |
| Data src = state.output.getChild(srcName); |
| if (src != null) { |
| child.setValue(src.getValue()); |
| } else { |
| child.setValue(""); |
| } |
| } |
| |
| private void handleLink(ParseState state, HdfNameAttrs element, String srcName) { |
| Data child = handleNodeCreation(state.currentNode, element); |
| child.setSymlink(state.output.createChild(srcName)); |
| } |
| |
| private void handleAscend(ParseState state) { |
| if (state.context.isEmpty()) { |
| reportError(state, "Too many '}'"); |
| return; |
| } |
| state.currentNode = state.context.pop(); |
| } |
| |
| private void handleInclude(String seq, ParseState state) throws IOException { |
| String includeFileName = internStrategy.intern(seq); |
| |
| // Load the file |
| Reader reader = state.resourceLoader.open(includeFileName); |
| if (reader == null) { |
| reportError(state, "Unable to find file " + includeFileName); |
| return; |
| } |
| |
| // Check whether we are in include loop |
| if (!state.includeStack.push(includeFileName)) { |
| reportError(state, createIncludeStackTraceMessage(state.includeStack, includeFileName)); |
| return; |
| } |
| |
| // Parse the file |
| state.hdfParser.parse(ParseState |
| .createParseStateForIncludedFile(state, includeFileName, reader)); |
| |
| if (!includeFileName.equals(state.includeStack.pop())) { |
| // Include stack trace is corrupted |
| throw new IllegalStateException("Unable to find on include stack: " + includeFileName); |
| } |
| } |
| |
| private String createIncludeStackTraceMessage(UniqueStack<String> includeStack, |
| String includeFileName) { |
| StringBuilder message = new StringBuilder(); |
| message.append("File included twice: "); |
| message.append(includeFileName); |
| |
| message.append(" Include stack: "); |
| for (String fileName : includeStack) { |
| message.append(fileName); |
| message.append(" -> "); |
| } |
| message.append(includeFileName); |
| return message.toString(); |
| } |
| |
| // ///////////////////////////////////////////////////////////////////////// |
| // |
| // Character values |
| |
| private static boolean isNumericChar(char c) { |
| if ('0' <= c && c <= '9') { |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| private static boolean isAlphaNumericChar(char c) { |
| if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) { |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| private static boolean isHdfNameChar(char c) { |
| if (isAlphaNumericChar(c) || c == '_' || c == '.') { |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| private static String stripWhitespace(String seq) { |
| int start = skipLeadingWhitespace(seq, 0); |
| int end = seq.length() - 1; |
| while (end > start && Character.isWhitespace(seq.charAt(end))) { |
| --end; |
| } |
| if (start == 0 && end == seq.length() - 1) { |
| return seq; |
| } else { |
| return seq.substring(start, end + 1); |
| } |
| } |
| |
| private static int skipLeadingWhitespace(String seq, int index) { |
| while (index < seq.length() && Character.isWhitespace(seq.charAt(index))) { |
| index++; |
| } |
| return index; |
| } |
| |
| /** |
| * Determines if a character sequence appears in the given sequence starting at a specified index. |
| * |
| * @param seq the sequence that we want to see if it contains the string match. |
| * @param start the index into seq where we want to check for match |
| * @param match the String we want to look for in the sequence. |
| * @return {@code true} if the string match appears in seq starting at the index start, {@code |
| * false} otherwise. |
| */ |
| private static boolean matches(String seq, int start, String match) { |
| if (seq.length() - start < match.length()) { |
| return false; |
| } |
| for (int i = 0; i < match.length(); i++) { |
| if (match.charAt(i) != seq.charAt(start + i)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Reads the character at the specified index in the given String. Throws an exception to be |
| * caught above if the index is out of range. |
| */ |
| private static char charAt(String seq, int index) throws OutOfCharsException { |
| if (0 <= index && index < seq.length()) { |
| return seq.charAt(index); |
| } else { |
| throw new OutOfCharsException(); |
| } |
| } |
| |
| |
| private static void reportError(ParseState state, String errorMessage) { |
| if (state.errorHandler != null) { |
| state.errorHandler.error(state.lineReader.getLineNumber(), state.line, state.parsedFileName, |
| errorMessage); |
| } else { |
| throw new RuntimeException("Parse Error on line " + state.lineReader.getLineNumber() + ": " |
| + errorMessage + " : " + state.line); |
| } |
| } |
| } |