| /* |
| * Copyright (C) 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.clearsilver.jsilver.autoescape; |
| |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_CSS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_JS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_UNQUOTED_JS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI_START; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_HTML; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS_UNQUOTED; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_STYLE; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_CSS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_JS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI; |
| import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI_START; |
| import com.google.clearsilver.jsilver.exceptions.JSilverAutoEscapingException; |
| import com.google.streamhtmlparser.ExternalState; |
| import com.google.streamhtmlparser.HtmlParser; |
| import com.google.streamhtmlparser.HtmlParserFactory; |
| import com.google.streamhtmlparser.ParseException; |
| |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Map; |
| |
| /** |
| * Encapsulates auto escaping logic. |
| */ |
| public class AutoEscapeContext { |
| /** |
| * Map of content-type to corresponding {@code HtmlParser.Mode}, used by {@code setContentType} to |
| * specify the content type of provided input. Valid values and the corresponding mode are: <br> |
| * <table> |
| * <tr> |
| * <td>text/html</td> |
| * <td>HtmlParser.Mode.HTML</td> |
| * </tr> |
| * <tr> |
| * <td>text/plain</td> |
| * <td>HtmlParser.Mode.HTML</td> |
| * </tr> |
| * <tr> |
| * <td>application/javascript</td> |
| * <td>HtmlParser.Mode.JS</td> |
| * </tr> |
| * <tr> |
| * <td>application/json</td> |
| * <td>HtmlParser.Mode.JS</td> |
| * </tr> |
| * <tr> |
| * <td>text/javascript</td> |
| * <td>HtmlParser.Mode.JS</td> |
| * </tr> |
| * <tr> |
| * <td>text/css</td> |
| * <td>HtmlParser.Mode.CSS</td> |
| * </tr> |
| * </table> |
| * |
| * @see #setContentType |
| */ |
| public static final Map<String, HtmlParser.Mode> CONTENT_TYPE_LIST; |
| |
| // These options are used to provide extra information to HtmlParserFactory.createParserInMode or |
| // HtmlParserFactory.createParserInAttribute, which is required for certain modes. |
| private static final HashSet<HtmlParserFactory.AttributeOptions> quotedJsAttributeOption; |
| private static final HashSet<HtmlParserFactory.AttributeOptions> partialUrlAttributeOption; |
| private static final HashSet<HtmlParserFactory.ModeOptions> jsModeOption; |
| |
| private HtmlParser htmlParser; |
| |
| static { |
| quotedJsAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>(); |
| quotedJsAttributeOption.add(HtmlParserFactory.AttributeOptions.JS_QUOTED); |
| |
| partialUrlAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>(); |
| partialUrlAttributeOption.add(HtmlParserFactory.AttributeOptions.URL_PARTIAL); |
| |
| jsModeOption = new HashSet<HtmlParserFactory.ModeOptions>(); |
| jsModeOption.add(HtmlParserFactory.ModeOptions.JS_QUOTED); |
| |
| CONTENT_TYPE_LIST = new HashMap<String, HtmlParser.Mode>(); |
| CONTENT_TYPE_LIST.put("text/html", HtmlParser.Mode.HTML); |
| CONTENT_TYPE_LIST.put("text/plain", HtmlParser.Mode.HTML); |
| CONTENT_TYPE_LIST.put("application/javascript", HtmlParser.Mode.JS); |
| CONTENT_TYPE_LIST.put("application/json", HtmlParser.Mode.JS); |
| CONTENT_TYPE_LIST.put("text/javascript", HtmlParser.Mode.JS); |
| CONTENT_TYPE_LIST.put("text/css", HtmlParser.Mode.CSS); |
| } |
| |
| /** |
| * Name of resource being auto escaped. Will be used in error and display messages. |
| */ |
| private String resourceName; |
| |
| public AutoEscapeContext() { |
| this(EscapeMode.ESCAPE_AUTO, null); |
| } |
| |
| /** |
| * Create a new context in the state represented by mode. |
| * |
| * @param mode EscapeMode object. |
| */ |
| public AutoEscapeContext(EscapeMode mode) { |
| this(mode, null); |
| } |
| |
| /** |
| * Create a new context in the state represented by mode. If a non-null resourceName is provided, |
| * it will be used in displaying error messages. |
| * |
| * @param mode The initial EscapeMode for this context |
| * @param resourceName Name of the resource being auto escaped. |
| */ |
| public AutoEscapeContext(EscapeMode mode, String resourceName) { |
| this.resourceName = resourceName; |
| htmlParser = createHtmlParser(mode); |
| } |
| |
| /** |
| * Create a new context that is a copy of the current state of this context. |
| * |
| * @return New {@code AutoEscapeContext} that is a snapshot of the current state of this context. |
| */ |
| public AutoEscapeContext cloneCurrentEscapeContext() { |
| AutoEscapeContext autoEscapeContext = new AutoEscapeContext(); |
| autoEscapeContext.resourceName = resourceName; |
| autoEscapeContext.htmlParser = HtmlParserFactory.createParser(htmlParser); |
| return autoEscapeContext; |
| } |
| |
| /** |
| * Sets the current position in the resource being auto escaped. Useful for generating detailed |
| * error messages. |
| * |
| * @param line line number. |
| * @param column column number within line. |
| */ |
| public void setCurrentPosition(int line, int column) { |
| htmlParser.setLineNumber(line); |
| htmlParser.setColumnNumber(column); |
| } |
| |
| /** |
| * Returns the name of the resource currently being auto escaped. |
| */ |
| public String getResourceName() { |
| return resourceName; |
| } |
| |
| /** |
| * Returns the current line number within the resource being auto escaped. |
| */ |
| public int getLineNumber() { |
| return htmlParser.getLineNumber(); |
| } |
| |
| /** |
| * Returns the current column number within the resource being auto escaped. |
| */ |
| public int getColumnNumber() { |
| return htmlParser.getColumnNumber(); |
| } |
| |
| private HtmlParser createHtmlParser(EscapeMode mode) { |
| switch (mode) { |
| case ESCAPE_AUTO: |
| case ESCAPE_AUTO_HTML: |
| return HtmlParserFactory.createParser(); |
| |
| case ESCAPE_AUTO_JS_UNQUOTED: |
| // <script>START HERE |
| return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, null); |
| |
| case ESCAPE_AUTO_JS: |
| // <script> var a = 'START HERE |
| return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, jsModeOption); |
| |
| case ESCAPE_AUTO_STYLE: |
| // <style>START HERE |
| return HtmlParserFactory.createParserInMode(HtmlParser.Mode.CSS, null); |
| |
| case ESCAPE_AUTO_ATTR: |
| // <input text="START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, true, null); |
| |
| case ESCAPE_AUTO_UNQUOTED_ATTR: |
| // <input text=START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, false, null); |
| |
| case ESCAPE_AUTO_ATTR_URI: |
| // <a href="http://www.google.com/a?START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, |
| partialUrlAttributeOption); |
| |
| case ESCAPE_AUTO_UNQUOTED_ATTR_URI: |
| // <a href=http://www.google.com/a?START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, |
| partialUrlAttributeOption); |
| |
| case ESCAPE_AUTO_ATTR_URI_START: |
| // <a href="START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, null); |
| |
| case ESCAPE_AUTO_UNQUOTED_ATTR_URI_START: |
| // <a href=START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, null); |
| |
| case ESCAPE_AUTO_ATTR_JS: |
| // <input onclick="doClick('START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, |
| quotedJsAttributeOption); |
| |
| case ESCAPE_AUTO_ATTR_UNQUOTED_JS: |
| // <input onclick="doClick(START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, null); |
| |
| case ESCAPE_AUTO_UNQUOTED_ATTR_JS: |
| // <input onclick=doClick('START HERE |
| throw new JSilverAutoEscapingException( |
| "Attempting to start HTML parser in unsupported mode" + mode, resourceName); |
| |
| case ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS: |
| // <input onclick=doClick(START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, false, null); |
| |
| case ESCAPE_AUTO_ATTR_CSS: |
| // <input style="START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, true, null); |
| |
| case ESCAPE_AUTO_UNQUOTED_ATTR_CSS: |
| // <input style=START HERE |
| return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, false, null); |
| |
| default: |
| throw new JSilverAutoEscapingException("Attempting to start HTML parser in invalid mode" |
| + mode, resourceName); |
| } |
| } |
| |
| /** |
| * Parse the given data and update internal state accordingly. |
| * |
| * @param data Input to parse, usually the contents of a template. |
| */ |
| public void parseData(String data) { |
| try { |
| htmlParser.parse(data); |
| } catch (ParseException e) { |
| // ParseException displays the proper position, so do not store line and column |
| // number here. |
| throw new JSilverAutoEscapingException("Error in HtmlParser: " + e, resourceName); |
| } |
| } |
| |
| /** |
| * Lets the AutoEscapeContext know that some input was skipped. |
| * |
| * This method will usually be called for variables in the input stream. The AutoEscapeContext is |
| * told that the input stream contained some additional data but does not get to see the data. It |
| * can adjust its internal state accordingly. |
| */ |
| public void insertText() { |
| try { |
| htmlParser.insertText(); |
| } catch (ParseException e) { |
| throw new JSilverAutoEscapingException("Error during insertText(): " + e, resourceName, |
| htmlParser.getLineNumber(), htmlParser.getColumnNumber()); |
| } |
| } |
| |
| /** |
| * Determines whether an included template that begins in state {@code start} is allowed to end in |
| * state {@code end}. Usually included templates are only allowed to end in the same context they |
| * begin in. This lets auto escaping parse the remainder of the parent template without needing to |
| * know the ending context of the included template. However, there is one exception where auto |
| * escaping will allow a different ending context: if the included template is a URI attribute |
| * value, it is allowed to change context from {@code ATTR_URI_START} to {@code ATTR_URI}. This |
| * does not cause any issues because the including template will call {@code insertText} when it |
| * encounters the include command, and {@code insertText} will cause the HTML parser to switch its |
| * internal state in the same way. |
| */ |
| public boolean isPermittedStateChangeForIncludes(AutoEscapeState start, AutoEscapeState end) { |
| return start.equals(end) |
| || (start.equals(AutoEscapeState.ATTR_URI_START) && end.equals(AutoEscapeState.ATTR_URI)) |
| || (start.equals(AutoEscapeState.UNQUOTED_ATTR_URI_START) && end |
| .equals(AutoEscapeState.UNQUOTED_ATTR_URI)); |
| } |
| |
| /** |
| * Determine the correct escaping to apply for a variable. |
| * |
| * Looks at the current state of the htmlParser, and determines what escaping to apply to a |
| * variable in this state. |
| * |
| * @return Name of escaping function to use in this state. |
| */ |
| public String getEscapingFunctionForCurrentState() { |
| return getCurrentState().getFunctionName(); |
| } |
| |
| /** |
| * Returns the EscapeMode which will bring AutoEscapeContext into this state. |
| * |
| * Initializing a new AutoEscapeContext with this EscapeMode will bring it into the state that the |
| * current AutoEscapeContext object is in. |
| * |
| * @return An EscapeMode object. |
| */ |
| public EscapeMode getEscapeModeForCurrentState() { |
| return getCurrentState().getEscapeMode(); |
| } |
| |
| /** |
| * Calls the HtmlParser API to determine current state. |
| * |
| * This function is mostly a wrapper around the HtmlParser API. It gathers all the necessary |
| * information using that API and returns a single enum representing the current state. |
| * |
| * @return AutoEscapeState enum representing the current state. |
| */ |
| public AutoEscapeState getCurrentState() { |
| ExternalState state = htmlParser.getState(); |
| String tag = htmlParser.getTag(); |
| |
| // Currently we do not do any escaping inside CSS blocks, so ignore them. |
| if (state.equals(HtmlParser.STATE_CSS_FILE) || tag.equals("style")) { |
| |
| return AutoEscapeState.STYLE; |
| } |
| |
| // Handle variables inside <script> tags. |
| if (htmlParser.inJavascript() && !state.equals(HtmlParser.STATE_VALUE)) { |
| if (htmlParser.isJavascriptQuoted()) { |
| // <script> var a = "<?cs var: Blah ?>"; </script> |
| return AutoEscapeState.JS; |
| } else { |
| // <script> var a = <?cs var: Blah ?>; </script> |
| // No quotes around the variable, hence it can inject arbitrary javascript. |
| // So severely restrict the values it may contain. |
| return AutoEscapeState.JS_UNQUOTED; |
| } |
| } |
| |
| // Inside an HTML tag or attribute name |
| if (state.equals(HtmlParser.STATE_ATTR) || state.equals(HtmlParser.STATE_TAG)) { |
| return AutoEscapeState.ATTR; |
| // TODO: Need a strict validation function for tag and attribute names. |
| } else if (state.equals(HtmlParser.STATE_VALUE)) { |
| // Inside an HTML attribute value |
| return getCurrentAttributeState(); |
| } else if (state.equals(HtmlParser.STATE_COMMENT) || state.equals(HtmlParser.STATE_TEXT)) { |
| // Default is assumed to be HTML body |
| // <b>Hello <?cs var: UserName ?></b> : |
| return AutoEscapeState.HTML; |
| } |
| |
| throw new JSilverAutoEscapingException("Invalid state received from HtmlParser: " |
| + state.toString(), resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); |
| } |
| |
| private AutoEscapeState getCurrentAttributeState() { |
| HtmlParser.ATTR_TYPE type = htmlParser.getAttributeType(); |
| boolean attrQuoted = htmlParser.isAttributeQuoted(); |
| |
| switch (type) { |
| case REGULAR: |
| // <input value="<?cs var: Blah ?>"> : |
| if (attrQuoted) { |
| return AutoEscapeState.ATTR; |
| } else { |
| return AutoEscapeState.UNQUOTED_ATTR; |
| } |
| |
| case URI: |
| if (htmlParser.isUrlStart()) { |
| // <a href="<?cs var: X ?>"> |
| if (attrQuoted) { |
| return AutoEscapeState.ATTR_URI_START; |
| } else { |
| return AutoEscapeState.UNQUOTED_ATTR_URI_START; |
| } |
| } else { |
| // <a href="http://www.google.com/a?x=<?cs var: X ?>"> |
| if (attrQuoted) { |
| // TODO: Html escaping because that is what Clearsilver does right now. |
| // May change this to url escaping soon. |
| return AutoEscapeState.ATTR_URI; |
| } else { |
| return AutoEscapeState.UNQUOTED_ATTR_URI; |
| } |
| } |
| |
| case JS: |
| if (htmlParser.isJavascriptQuoted()) { |
| /* |
| * Note: js_escape() hex encodes all html metacharacters. Therefore it is safe to not do |
| * an HTML escape around this. |
| */ |
| if (attrQuoted) { |
| // <input onclick="alert('<?cs var:Blah ?>');"> |
| return AutoEscapeState.ATTR_JS; |
| } else { |
| // <input onclick=alert('<?cs var: Blah ?>');> |
| return AutoEscapeState.UNQUOTED_ATTR_JS; |
| } |
| } else { |
| if (attrQuoted) { |
| /* <input onclick="alert(<?cs var:Blah ?>);"> */ |
| return AutoEscapeState.ATTR_UNQUOTED_JS; |
| } else { |
| |
| /* <input onclick=alert(<?cs var:Blah ?>);> */ |
| return AutoEscapeState.UNQUOTED_ATTR_UNQUOTED_JS; |
| } |
| } |
| |
| case STYLE: |
| // <input style="border:<?cs var: FancyBorder ?>"> : |
| if (attrQuoted) { |
| return AutoEscapeState.ATTR_CSS; |
| } else { |
| return AutoEscapeState.UNQUOTED_ATTR_CSS; |
| } |
| |
| default: |
| throw new JSilverAutoEscapingException("Invalid attribute type in HtmlParser: " + type, |
| resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); |
| } |
| } |
| |
| /** |
| * Resets the state of the underlying html parser to a state consistent with the {@code |
| * contentType} provided. This method should be used when the starting auto escaping context of a |
| * resource cannot be determined from its contents - for example, a CSS stylesheet or a javascript |
| * source file. |
| * |
| * @param contentType MIME type header representing the content being parsed. |
| * @see #CONTENT_TYPE_LIST |
| */ |
| public void setContentType(String contentType) { |
| HtmlParser.Mode mode = CONTENT_TYPE_LIST.get(contentType); |
| if (mode == null) { |
| throw new JSilverAutoEscapingException("Invalid content type specified: " + contentType, |
| resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber()); |
| |
| } |
| htmlParser.resetMode(mode); |
| } |
| |
| /** |
| * Enum representing states of the data being parsed. |
| * |
| * This enumeration lists all the states in which autoescaping would have some effect. |
| * |
| */ |
| public static enum AutoEscapeState { |
| HTML("html", ESCAPE_AUTO_HTML), JS("js", ESCAPE_AUTO_JS), STYLE("css", ESCAPE_AUTO_STYLE), JS_UNQUOTED( |
| "js_check_number", ESCAPE_AUTO_JS_UNQUOTED), ATTR("html", ESCAPE_AUTO_ATTR), UNQUOTED_ATTR( |
| "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR), ATTR_URI("html", ESCAPE_AUTO_ATTR_URI), UNQUOTED_ATTR_URI( |
| "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI), ATTR_URI_START("url_validate", |
| ESCAPE_AUTO_ATTR_URI_START), UNQUOTED_ATTR_URI_START("url_validate_unquoted", |
| ESCAPE_AUTO_UNQUOTED_ATTR_URI_START), ATTR_JS("js", ESCAPE_AUTO_ATTR_JS), ATTR_UNQUOTED_JS( |
| "js_check_number", ESCAPE_AUTO_ATTR_UNQUOTED_JS), UNQUOTED_ATTR_JS("js_attr_unquoted", |
| ESCAPE_AUTO_UNQUOTED_ATTR_JS), UNQUOTED_ATTR_UNQUOTED_JS("js_check_number", |
| ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS), ATTR_CSS("css", ESCAPE_AUTO_ATTR_CSS), UNQUOTED_ATTR_CSS( |
| "css_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_CSS); |
| |
| private final String functionName; |
| private final EscapeMode escapeMode; |
| |
| private AutoEscapeState(String functionName, EscapeMode mode) { |
| this.functionName = functionName; |
| this.escapeMode = mode; |
| } |
| |
| public String getFunctionName() { |
| return functionName; |
| } |
| |
| public EscapeMode getEscapeMode() { |
| return escapeMode; |
| } |
| } |
| } |