src/main/org/owasp/html/CssGrammar.java - platform/external/owasp/sanitizer - Git at Google

 // Copyright (c) 2011, Mike Samuel
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
 // are met:
 //
 // Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 // Redistributions in binary form must reproduce the above copyright
 // notice, this list of conditions and the following disclaimer in the
 // documentation and/or other materials provided with the distribution.
 // Neither the name of the OWASP nor the names of its contributors may
 // be used to endorse or promote products derived from this software
 // without specific prior written permission.
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.

 package org.owasp.html;

 final class CssGrammar {

   private static void errorRecoveryUntilSemiOrCloseBracket(
       CssTokens.TokenIterator it) {
     int bracketDepth = 0;
     for (; it.hasNext(); it.advance()) {
       switch (it.type()) {
         case SEMICOLON:
           it.advance();
           return;
         case LEFT_CURLY:
         case LEFT_PAREN:
         case LEFT_SQUARE:
           ++bracketDepth;
           break;
         case RIGHT_CURLY:
         case RIGHT_PAREN:
         case RIGHT_SQUARE:
           --bracketDepth;
           if (bracketDepth <= 0) {
             if (bracketDepth != 0) { it.advance(); }
             return;
           }
           break;
         default:
           break;
       }
     }
   }

   static void parsePropertyGroup(String css, PropertyHandler handler) {
     // Split tokens by semicolons/curly-braces, then by first colon,
     // dropping spaces and comments to identify property names and token runs
     // that form the value.

     CssTokens tokens = CssTokens.lex(css);
     CssTokens.TokenIterator it = tokens.iterator();
     propertyNameLoop:
     while (it.hasTokenAfterSpace()) {
       // Check that we have an identifier that might be a property name.
       if (it.type() != CssTokens.TokenType.IDENT) {
         errorRecoveryUntilSemiOrCloseBracket(it);
         continue;
       }

       String name = it.next();

       // Look for a colon.
       if (!(it.hasTokenAfterSpace() && ":".equals(it.token()))) {
         errorRecoveryUntilSemiOrCloseBracket(it);
         continue propertyNameLoop;
       }
       it.advance();

       handler.startProperty(Strings.toLowerCase(name));
       parsePropertyValue(it, handler);
       handler.endProperty();
     }
   }

   private static void parsePropertyValue(
       CssTokens.TokenIterator it, PropertyHandler handler) {
     propertyValueLoop:
     while (it.hasNext()) {
       CssTokens.TokenType type = it.type();
       String token = it.token();
       switch (type) {
         case SEMICOLON:
           it.advance();
           break propertyValueLoop;
         case FUNCTION:
           CssTokens.TokenIterator actuals = it.spliceToEnd();
           handler.startFunction(token);
           parsePropertyValue(actuals, handler);
           handler.endFunction(token);
           continue;  // Skip the advance over token.
         case IDENT:
           handler.identifier(token);
           break;
         case HASH_UNRESTRICTED:
           if (token.length() == 4 || token.length() == 7) {
             handler.hash(token);
           }
           break;
         case STRING:
           handler.quotedString(token);
           break;
         case URL:
           handler.url(token);
           break;
         case DIMENSION:
         case NUMBER:
         case PERCENTAGE:
           handler.quantity(token);
           break;
         case AT:
         case BAD_DIMENSION:
         case COLUMN:
         case DOT_IDENT:
         case HASH_ID:
         case MATCH:
         case UNICODE_RANGE:
         case WHITESPACE:
           break;
         case LEFT_CURLY:
         case LEFT_PAREN:
         case LEFT_SQUARE:
         case RIGHT_CURLY:
         case RIGHT_PAREN:
         case RIGHT_SQUARE:
         case COMMA:
         case COLON:
         case DELIM:
           handler.punctuation(token);
           break;
       }
       it.advance();
     }
   }

   /**
    * Decodes any escape sequences and strips any quotes from the input.
    */
   static String cssContent(String token) {
     int n = token.length();
     int pos = 0;
     StringBuilder sb = null;
     if (n >= 2) {
       char ch0 = token.charAt(0);
       if (ch0 == '"' || ch0 == '\'') {
         if (ch0 == token.charAt(n - 1)) {
           pos = 1;
           --n;
           sb = new StringBuilder(n);
         }
       }
     }
     for (int esc; (esc = token.indexOf('\\', pos)) >= 0;) {
       int end = esc + 2;
       if (esc > n) { break; }
       if (sb == null) { sb = new StringBuilder(n); }
       sb.append(token, pos, esc);
       int codepoint = token.charAt(end - 1);
       if (isHex(codepoint)) {
         // Parse \hhhhh<opt-break> where hhhhh is one or more hex digits
         // and <opt-break> is an optional space or tab character that can be
         // used to separate an escape sequence from a following literal hex
         // digit.
         while (end < n && isHex(token.charAt(end))) { ++end; }
         try {
           codepoint = Integer.parseInt(token.substring(esc + 1, end), 16);
         } catch (RuntimeException ex) {
           codepoint = 0xfffd;  // Unknown codepoint.
         }
         if (end < n) {
           char ch = token.charAt(end);
           if (ch == ' ' || ch == '\t') {  // Ignorable hex follower.
             ++end;
           }
         }
       }
       sb.appendCodePoint(codepoint);
       pos = end;
     }
     if (sb == null) { return token; }
     return sb.append(token, pos, n).toString();
   }

   private static boolean isHex(int codepoint) {
     return ('0' <= codepoint && codepoint <= '9')
         || ('A' <= codepoint && codepoint <= 'F')
         || ('a' <= codepoint && codepoint <= 'f');
   }

   interface PropertyHandler {
     void startProperty(String propertyName);
     void quantity(String token);
     void identifier(String token);
     void hash(String token);
     void quotedString(String token);
     void url(String token);
     void punctuation(String token);
     void startFunction(String token);
     void endFunction(String token);
     void endProperty();
   }

 }
	// Copyright (c) 2011, Mike Samuel
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions
	// are met:
	//
	// Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	// Neither the name of the OWASP nor the names of its contributors may
	// be used to endorse or promote products derived from this software
	// without specific prior written permission.
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.

	package org.owasp.html;

	final class CssGrammar {

	private static void errorRecoveryUntilSemiOrCloseBracket(
	CssTokens.TokenIterator it) {
	int bracketDepth = 0;
	for (; it.hasNext(); it.advance()) {
	switch (it.type()) {
	case SEMICOLON:
	it.advance();
	return;
	case LEFT_CURLY:
	case LEFT_PAREN:
	case LEFT_SQUARE:
	++bracketDepth;
	break;
	case RIGHT_CURLY:
	case RIGHT_PAREN:
	case RIGHT_SQUARE:
	--bracketDepth;
	if (bracketDepth <= 0) {
	if (bracketDepth != 0) { it.advance(); }
	return;
	}
	break;
	default:
	break;
	}
	}
	}

	static void parsePropertyGroup(String css, PropertyHandler handler) {
	// Split tokens by semicolons/curly-braces, then by first colon,
	// dropping spaces and comments to identify property names and token runs
	// that form the value.

	CssTokens tokens = CssTokens.lex(css);
	CssTokens.TokenIterator it = tokens.iterator();
	propertyNameLoop:
	while (it.hasTokenAfterSpace()) {
	// Check that we have an identifier that might be a property name.
	if (it.type() != CssTokens.TokenType.IDENT) {
	errorRecoveryUntilSemiOrCloseBracket(it);
	continue;
	}

	String name = it.next();

	// Look for a colon.
	if (!(it.hasTokenAfterSpace() && ":".equals(it.token()))) {
	errorRecoveryUntilSemiOrCloseBracket(it);
	continue propertyNameLoop;
	}
	it.advance();

	handler.startProperty(Strings.toLowerCase(name));
	parsePropertyValue(it, handler);
	handler.endProperty();
	}
	}

	private static void parsePropertyValue(
	CssTokens.TokenIterator it, PropertyHandler handler) {
	propertyValueLoop:
	while (it.hasNext()) {
	CssTokens.TokenType type = it.type();
	String token = it.token();
	switch (type) {
	case SEMICOLON:
	it.advance();
	break propertyValueLoop;
	case FUNCTION:
	CssTokens.TokenIterator actuals = it.spliceToEnd();
	handler.startFunction(token);
	parsePropertyValue(actuals, handler);
	handler.endFunction(token);
	continue; // Skip the advance over token.
	case IDENT:
	handler.identifier(token);
	break;
	case HASH_UNRESTRICTED:
	if (token.length() == 4 \|\| token.length() == 7) {
	handler.hash(token);
	}
	break;
	case STRING:
	handler.quotedString(token);
	break;
	case URL:
	handler.url(token);
	break;
	case DIMENSION:
	case NUMBER:
	case PERCENTAGE:
	handler.quantity(token);
	break;
	case AT:
	case BAD_DIMENSION:
	case COLUMN:
	case DOT_IDENT:
	case HASH_ID:
	case MATCH:
	case UNICODE_RANGE:
	case WHITESPACE:
	break;
	case LEFT_CURLY:
	case LEFT_PAREN:
	case LEFT_SQUARE:
	case RIGHT_CURLY:
	case RIGHT_PAREN:
	case RIGHT_SQUARE:
	case COMMA:
	case COLON:
	case DELIM:
	handler.punctuation(token);
	break;
	}
	it.advance();
	}
	}

	/**
	* Decodes any escape sequences and strips any quotes from the input.
	*/
	static String cssContent(String token) {
	int n = token.length();
	int pos = 0;
	StringBuilder sb = null;
	if (n >= 2) {
	char ch0 = token.charAt(0);
	if (ch0 == '"' \|\| ch0 == '\'') {
	if (ch0 == token.charAt(n - 1)) {
	pos = 1;
	--n;
	sb = new StringBuilder(n);
	}
	}
	}
	for (int esc; (esc = token.indexOf('\\', pos)) >= 0;) {
	int end = esc + 2;
	if (esc > n) { break; }
	if (sb == null) { sb = new StringBuilder(n); }
	sb.append(token, pos, esc);
	int codepoint = token.charAt(end - 1);
	if (isHex(codepoint)) {
	// Parse \hhhhh<opt-break> where hhhhh is one or more hex digits
	// and <opt-break> is an optional space or tab character that can be
	// used to separate an escape sequence from a following literal hex
	// digit.
	while (end < n && isHex(token.charAt(end))) { ++end; }
	try {
	codepoint = Integer.parseInt(token.substring(esc + 1, end), 16);
	} catch (RuntimeException ex) {
	codepoint = 0xfffd; // Unknown codepoint.
	}
	if (end < n) {
	char ch = token.charAt(end);
	if (ch == ' ' \|\| ch == '\t') { // Ignorable hex follower.
	++end;
	}
	}
	}
	sb.appendCodePoint(codepoint);
	pos = end;
	}
	if (sb == null) { return token; }
	return sb.append(token, pos, n).toString();
	}

	private static boolean isHex(int codepoint) {
	return ('0' <= codepoint && codepoint <= '9')
	\|\| ('A' <= codepoint && codepoint <= 'F')
	\|\| ('a' <= codepoint && codepoint <= 'f');
	}

	interface PropertyHandler {
	void startProperty(String propertyName);
	void quantity(String token);
	void identifier(String token);
	void hash(String token);
	void quotedString(String token);
	void url(String token);
	void punctuation(String token);
	void startFunction(String token);
	void endFunction(String token);
	void endProperty();
	}

	}