blob: 718417d1657c3ba6bbe0340c270a916f01373353 [file] [log] [blame]
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html;
final class CssGrammar {
private static void errorRecoveryUntilSemiOrCloseBracket(
CssTokens.TokenIterator it) {
int bracketDepth = 0;
for (; it.hasNext(); it.advance()) {
switch (it.type()) {
case SEMICOLON:
it.advance();
return;
case LEFT_CURLY:
case LEFT_PAREN:
case LEFT_SQUARE:
++bracketDepth;
break;
case RIGHT_CURLY:
case RIGHT_PAREN:
case RIGHT_SQUARE:
--bracketDepth;
if (bracketDepth <= 0) {
if (bracketDepth != 0) { it.advance(); }
return;
}
break;
default:
break;
}
}
}
static void parsePropertyGroup(String css, PropertyHandler handler) {
// Split tokens by semicolons/curly-braces, then by first colon,
// dropping spaces and comments to identify property names and token runs
// that form the value.
CssTokens tokens = CssTokens.lex(css);
CssTokens.TokenIterator it = tokens.iterator();
propertyNameLoop:
while (it.hasTokenAfterSpace()) {
// Check that we have an identifier that might be a property name.
if (it.type() != CssTokens.TokenType.IDENT) {
errorRecoveryUntilSemiOrCloseBracket(it);
continue;
}
String name = it.next();
// Look for a colon.
if (!(it.hasTokenAfterSpace() && ":".equals(it.token()))) {
errorRecoveryUntilSemiOrCloseBracket(it);
continue propertyNameLoop;
}
it.advance();
handler.startProperty(Strings.toLowerCase(name));
parsePropertyValue(it, handler);
handler.endProperty();
}
}
private static void parsePropertyValue(
CssTokens.TokenIterator it, PropertyHandler handler) {
propertyValueLoop:
while (it.hasNext()) {
CssTokens.TokenType type = it.type();
String token = it.token();
switch (type) {
case SEMICOLON:
it.advance();
break propertyValueLoop;
case FUNCTION:
CssTokens.TokenIterator actuals = it.spliceToEnd();
handler.startFunction(token);
parsePropertyValue(actuals, handler);
handler.endFunction(token);
continue; // Skip the advance over token.
case IDENT:
handler.identifier(token);
break;
case HASH_UNRESTRICTED:
if (token.length() == 4 || token.length() == 7) {
handler.hash(token);
}
break;
case STRING:
handler.quotedString(token);
break;
case URL:
handler.url(token);
break;
case DIMENSION:
case NUMBER:
case PERCENTAGE:
handler.quantity(token);
break;
case AT:
case BAD_DIMENSION:
case COLUMN:
case DOT_IDENT:
case HASH_ID:
case MATCH:
case UNICODE_RANGE:
case WHITESPACE:
break;
case LEFT_CURLY:
case LEFT_PAREN:
case LEFT_SQUARE:
case RIGHT_CURLY:
case RIGHT_PAREN:
case RIGHT_SQUARE:
case COMMA:
case COLON:
case DELIM:
handler.punctuation(token);
break;
}
it.advance();
}
}
/**
* Decodes any escape sequences and strips any quotes from the input.
*/
static String cssContent(String token) {
int n = token.length();
int pos = 0;
StringBuilder sb = null;
if (n >= 2) {
char ch0 = token.charAt(0);
if (ch0 == '"' || ch0 == '\'') {
if (ch0 == token.charAt(n - 1)) {
pos = 1;
--n;
sb = new StringBuilder(n);
}
}
}
for (int esc; (esc = token.indexOf('\\', pos)) >= 0;) {
int end = esc + 2;
if (esc > n) { break; }
if (sb == null) { sb = new StringBuilder(n); }
sb.append(token, pos, esc);
int codepoint = token.charAt(end - 1);
if (isHex(codepoint)) {
// Parse \hhhhh<opt-break> where hhhhh is one or more hex digits
// and <opt-break> is an optional space or tab character that can be
// used to separate an escape sequence from a following literal hex
// digit.
while (end < n && isHex(token.charAt(end))) { ++end; }
try {
codepoint = Integer.parseInt(token.substring(esc + 1, end), 16);
} catch (RuntimeException ex) {
codepoint = 0xfffd; // Unknown codepoint.
}
if (end < n) {
char ch = token.charAt(end);
if (ch == ' ' || ch == '\t') { // Ignorable hex follower.
++end;
}
}
}
sb.appendCodePoint(codepoint);
pos = end;
}
if (sb == null) { return token; }
return sb.append(token, pos, n).toString();
}
private static boolean isHex(int codepoint) {
return ('0' <= codepoint && codepoint <= '9')
|| ('A' <= codepoint && codepoint <= 'F')
|| ('a' <= codepoint && codepoint <= 'f');
}
interface PropertyHandler {
void startProperty(String propertyName);
void quantity(String token);
void identifier(String token);
void hash(String token);
void quotedString(String token);
void url(String token);
void punctuation(String token);
void startFunction(String token);
void endFunction(String token);
void endProperty();
}
}