| // Copyright (c) 2013, Mike Samuel |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // |
| // Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // Neither the name of the OWASP nor the names of its contributors may |
| // be used to endorse or promote products derived from this software |
| // without specific prior written permission. |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| // POSSIBILITY OF SUCH DAMAGE. |
| |
| package org.owasp.html; |
| |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.NoSuchElementException; |
| |
| import javax.annotation.Nullable; |
| |
| import com.google.common.collect.ImmutableMap; |
| |
| /** |
| * Given a string of CSS, produces a string of normalized CSS with certain |
| * useful properties detailed below. |
| * <ul> |
| * <li>All runs of white-space and comment tokens (including CDO and CDC) |
| * have been replaced with a single space character.</li> |
| * <li>All strings are quoted and escapes are escaped according to the |
| * following scheme: |
| * <table> |
| * <tr><td>NUL</td> <td><code>\0</code></tr> |
| * <tr><td>line feed</td> <td><code>\a</code></tr> |
| * <tr><td>vertical feed</td> <td><code>\c</code></tr> |
| * <tr><td>carriage return</td><td><code>\d</code></tr> |
| * <tr><td>double quote</td> <td><code>\22</code></tr> |
| * <tr><td>ampersand &</td><td><code>\26</code></tr> |
| * <tr><td>single quote</td> <td><code>\27</code></tr> |
| * <tr><td>left-angle <</td><td><code>\3c</code></tr> |
| * <tr><td>rt-angle ></td> <td><code>\3e</code></tr> |
| * <tr><td>back slash</td> <td><code>\\</code></tr> |
| * <tr><td>all others</td> <td>raw</td></tr> |
| * </table> |
| * </li> |
| * <li>All <code>url(…)</code> tokens are quoted. |
| * <li>All keywords, identifiers, and hex literals are lower-case and have |
| * embedded escape sequences decoded, except that .</li> |
| * <li>All brackets nest properly.</li> |
| * <li>Does not contain any case-insensitive variant of the sequences |
| * {@code <!--}, {@code -->}, {@code <![CDATA[}, {@code ]]>}, or |
| * {@code </style}.</li> |
| * <li>All delimiters that can start longer tokens are followed by a space. |
| * </ul> |
| */ |
| final class CssTokens implements Iterable<String> { |
| |
| public final String normalizedCss; |
| public final Brackets brackets; |
| private final int[] tokenBreaks; |
| private final TokenType[] tokenTypes; |
| |
| public TokenIterator start() { |
| return new TokenIterator(tokenTypes.length); |
| } |
| |
| public TokenIterator iterator() { return start(); } |
| |
| public static CssTokens lex(String css) { |
| Lexer lexer = new Lexer(css); |
| lexer.lex(); |
| return lexer.build(); |
| } |
| |
| /** A cursor into a list of tokens. */ |
| public final class TokenIterator implements Iterator<String> { |
| private int tokenIndex = 0; |
| private final int limit; |
| |
| TokenIterator(int limit) { |
| this.limit = limit; |
| } |
| |
| public boolean hasNext() { |
| return hasToken(); |
| } |
| |
| public String next() { |
| String token = token(); |
| advance(); |
| return token; |
| } |
| |
| public @Nullable TokenIterator spliceToEnd() { |
| if (!hasNext()) { throw new NoSuchElementException(); } |
| int end = brackets.partner(tokenIndex); |
| if (end < 0) { |
| return null; |
| } |
| TokenIterator between = new TokenIterator(end); |
| between.tokenIndex = tokenIndex + 1; |
| tokenIndex = end + 1; |
| return between; |
| } |
| |
| public int tokenIndex() { |
| return tokenIndex; |
| } |
| |
| public int startOffset() { |
| return tokenBreaks[tokenIndex]; |
| } |
| |
| public int endOffset() { |
| return tokenBreaks[tokenIndex+1]; |
| } |
| |
| public String token() { |
| return normalizedCss.substring(startOffset(), endOffset()); |
| } |
| |
| public boolean hasToken() { |
| return tokenIndex < limit; |
| } |
| |
| public boolean hasTokenAfterSpace() { |
| while (hasToken()) { |
| if (type() != TokenType.WHITESPACE) { return true; } |
| advance(); |
| } |
| return false; |
| } |
| |
| /** The type of the current token. */ |
| public TokenType type() { |
| return tokenTypes[tokenIndex]; |
| } |
| |
| public void seek(int tokenIndex) { |
| this.tokenIndex = tokenIndex; |
| } |
| |
| public void advance() { |
| if (!hasToken()) { throw new NoSuchElementException(); } |
| ++tokenIndex; |
| } |
| |
| public void backup() { |
| if (tokenIndex == 0) { throw new NoSuchElementException(); } |
| --tokenIndex; |
| } |
| |
| public void remove() throws UnsupportedOperationException { |
| throw new UnsupportedOperationException(); |
| } |
| } |
| |
| private CssTokens( |
| String normalizedCss, Brackets brackets, int[] tokenBreaks, |
| TokenType[] tokenTypes) { |
| this.normalizedCss = normalizedCss; |
| this.brackets = brackets; |
| this.tokenBreaks = tokenBreaks; |
| this.tokenTypes = tokenTypes; |
| } |
| |
| public enum TokenType { |
| /** An identifier. */ |
| IDENT, |
| /** An identifier prefixed with a period. */ |
| DOT_IDENT, |
| /** A function name and opening bracket. */ |
| FUNCTION, |
| /** An {@code @<identifier>} directive token. */ |
| AT, |
| /** A hash token that contains non-hex characters. */ |
| HASH_ID, |
| /** A hash token that could be a color literal. */ |
| HASH_UNRESTRICTED, |
| /** A quoted string. */ |
| STRING, |
| /** A URL of the form <code>url("...")</code>. */ |
| URL, |
| /** A single character. */ |
| DELIM, |
| /** A scalar numeric value. */ |
| NUMBER, |
| /** A percentage. */ |
| PERCENTAGE, |
| /** A numeric value with a unit suffix. */ |
| DIMENSION, |
| /** A numeric value with an unknown unit suffix. */ |
| BAD_DIMENSION, |
| /** {@code U+<hex-or-qmark>} */ |
| UNICODE_RANGE, |
| /** |
| * include-match, dash-match, prefix-match, suffix-match, substring-match |
| */ |
| MATCH, |
| /** {@code ||} */ |
| COLUMN, |
| /** A run of white-space, comment, CDO, and CDC tokens. */ |
| WHITESPACE, |
| /** {@code :} */ |
| COLON, |
| /** {@code ;} */ |
| SEMICOLON, |
| /** {@code ,} */ |
| COMMA, |
| /** {@code [} */ |
| LEFT_SQUARE, |
| /** {@code ]} */ |
| RIGHT_SQUARE, |
| /** {@code (} */ |
| LEFT_PAREN, |
| /** {@code )} */ |
| RIGHT_PAREN, |
| /** <code>{</code> */ |
| LEFT_CURLY, |
| /** <code>}</code> */ |
| RIGHT_CURLY, |
| ; |
| } |
| |
| /** |
| * Maps tokens to their partners. A close bracket token like {@code (} may |
| * have a partner token like {@code )} if properly nested, and vice-versa. |
| */ |
| static final class Brackets { |
| /** |
| * For each token index, the index of the indexed token's partner or -1 if |
| * it has none. |
| */ |
| private final int[] brackets; |
| |
| private Brackets(int[] brackets) { |
| this.brackets = brackets; |
| } |
| |
| /** The index of the partner token or -1 if none. */ |
| int partner(int tokenIndex) { |
| int bracketIndex = bracketIndexForToken(tokenIndex); |
| if (bracketIndex < 0) { return -1; } |
| return brackets[(bracketIndex << 1) + 1]; |
| } |
| |
| int bracketIndexForToken(int target) { |
| // Binary search by leftmost element of pair. |
| int left = 0; |
| int right = brackets.length >> 1; |
| while (left < right) { |
| int mid = left + ((right - left) >> 1); |
| int value = brackets[mid << 1]; |
| if (value == target) { return mid; } |
| if (value < target) { |
| left = mid + 1; |
| } else { |
| right = mid; |
| } |
| } |
| return -1; |
| } |
| } |
| |
| private static final int[] ZERO_INTS = new int[0]; |
| |
| private static final TokenType[] ZERO_TYPES = new TokenType[0]; |
| |
| private static final Brackets EMPTY_BRACKETS = new Brackets(ZERO_INTS); |
| |
| private static final CssTokens EMPTY = new CssTokens( |
| "", EMPTY_BRACKETS, ZERO_INTS, ZERO_TYPES); |
| |
| /** |
| * Tokenizes according to section 4 of http://dev.w3.org/csswg/css-syntax/ |
| */ |
| private static final class Lexer { |
| private final String css; |
| private final StringBuilder sb; |
| private int pos = 0; |
| private final int cssLimit; |
| |
| private List<TokenType> tokenTypes = null; |
| private int[] tokenBreaks = new int[128]; |
| private int tokenBreaksLimit = 0; |
| |
| /** |
| * For each bracket, 2 ints: the token index of the bracket, and the token |
| * index of its partner. |
| * The array is sorted by the first int. |
| * The second int is -1 when the bracket has not yet been closed. |
| */ |
| private int[] brackets = ZERO_INTS; |
| /** |
| * The number of elements in {@link #brackets} that are valid. |
| * {@code brackets[bracketsLimit:]} is zeroed space that the list can grow |
| * into. |
| */ |
| private int bracketsLimit = 0; |
| /** |
| * For each bracket that has not been closed, 2 ints: |
| * its index in {@link #brackets} and the character of its close bracket |
| * as an int. |
| * This is a bracket stack so the array is sorted by the first int. |
| */ |
| private int[] open = ZERO_INTS; |
| /** |
| * The number of elements in {@link #open} that are valid. |
| * {@code open[openLimit:]} is garbage space that the stack can grow into. |
| */ |
| private int openLimit = 0; |
| |
| Lexer(String css) { |
| this.css = css; |
| this.sb = new StringBuilder(); |
| this.cssLimit = css.length(); |
| } |
| |
| TokenType openBracket(char bracketChar) { |
| char close; |
| TokenType type; |
| switch (bracketChar) { |
| case '(': close = ')'; type = TokenType.LEFT_PAREN; break; |
| case '[': close = ']'; type = TokenType.LEFT_SQUARE; break; |
| case '{': close = '}'; type = TokenType.LEFT_CURLY; break; |
| default: |
| throw new AssertionError("Invalid open bracket " + bracketChar); |
| } |
| brackets = expandIfNecessary(brackets, bracketsLimit, 2); |
| open = expandIfNecessary(open, openLimit, 2); |
| open[openLimit++] = bracketsLimit; |
| open[openLimit++] = close; |
| brackets[bracketsLimit++] = tokenBreaksLimit; |
| brackets[bracketsLimit++] = -1; |
| sb.append(bracketChar); |
| return type; |
| } |
| |
| void closeBracket(char bracketChar) { |
| int openLimitAfterClose = openLimit; |
| do { |
| if (openLimitAfterClose == 0) { |
| // Drop an orphaned close bracket. |
| breakOutput(); |
| return; |
| } |
| openLimitAfterClose -= 2; |
| } while (bracketChar != open[openLimitAfterClose + 1]); |
| closeBrackets(openLimitAfterClose); |
| } |
| |
| private void closeBrackets(int openLimitAfterClose) { |
| // Make sure we've got space on brackets. |
| int spaceNeeded = openLimit - openLimitAfterClose; |
| brackets = expandIfNecessary(brackets, bracketsLimit, spaceNeeded); |
| |
| int closeTokenIndex = tokenBreaksLimit; |
| while (openLimit > openLimitAfterClose) { |
| // Pop the stack. |
| int closeBracket = open[--openLimit]; |
| int openBracketIndex = open[--openLimit]; |
| int openTokenIndex = brackets[openBracketIndex]; |
| // Update open bracket to point to its partner. |
| brackets[openBracketIndex + 1] = closeTokenIndex; |
| // Emit the close bracket. |
| brackets[bracketsLimit++] = closeTokenIndex; |
| brackets[bracketsLimit++] = openTokenIndex; |
| sb.appendCodePoint(closeBracket); |
| closeTokenIndex++; |
| } |
| } |
| |
| CssTokens build() { |
| // Close any still open brackets. |
| { |
| int startOfCloseBrackets = sb.length(); |
| closeBrackets(0); |
| emitMergedTokens(startOfCloseBrackets, sb.length()); |
| } |
| |
| if (tokenTypes == null) { return EMPTY; } |
| int[] bracketsTrunc = truncateOrShare(brackets, bracketsLimit); |
| |
| // Strip any trailing space off, since it may have been inserted by a |
| // breakAfter call anyway. |
| int cssEnd = sb.length(); |
| if (cssEnd > 0 && sb.charAt(cssEnd - 1) == ' ') { |
| --cssEnd; |
| tokenTypes.remove(--tokenBreaksLimit); |
| } |
| String normalizedCss = sb.substring(0, cssEnd); |
| |
| // Store the last character on the tokenBreaksList to simplify finding the |
| // end of a token. |
| tokenBreaks = expandIfNecessary(tokenBreaks, tokenBreaksLimit, 1); |
| tokenBreaks[tokenBreaksLimit++] = normalizedCss.length(); |
| |
| int[] tokenBreaksTrunc = truncateOrShare(tokenBreaks, tokenBreaksLimit); |
| TokenType[] tokenTypesArr = tokenTypes.toArray(ZERO_TYPES); |
| |
| return new CssTokens( |
| normalizedCss, new Brackets(bracketsTrunc), |
| tokenBreaksTrunc, tokenTypesArr); |
| } |
| |
| void lex() { |
| // Fast-track no content. |
| consumeIgnorable(); |
| sb.setLength(0); |
| if (pos == cssLimit) { return; } |
| |
| tokenTypes = new ArrayList<TokenType>(); |
| |
| String css = this.css; |
| int cssLimit = this.cssLimit; |
| while (pos < cssLimit) { |
| assert this.tokenBreaksLimit == this.tokenTypes.size() |
| : "token and types out of sync at " + tokenBreaksLimit |
| + " in `" + css + "`"; |
| // SPEC: 4. Tokenization |
| // The output of the tokenization step is a stream of zero |
| // or more of the following tokens: <ident>, <function>, |
| // <at-keyword>, <hash>, <string>, <bad-string>, <url>, |
| // <bad-url>, <delim>, <number>, <percentage>, |
| // <dimension>, <unicode-range>, <include-match>, |
| // <dash-match>, <prefix-match>, <suffix-match>, |
| // <substring-match>, <column>, <whitespace>, <CDO>, |
| // <CDC>, <colon>, <semicolon>, <comma>, <[>, <]>, |
| // <(>, <)>, <{>, and <}>. |
| |
| // IMPLEMENTS: 4.3 Consume a token |
| char ch = css.charAt(pos); |
| int startOfToken = pos; |
| int startOfOutputToken = sb.length(); |
| final TokenType type; |
| switch (ch) { |
| case '\t': case '\n': case '\f': case '\r': case ' ': case '\ufeff': |
| consumeIgnorable(); |
| type = TokenType.WHITESPACE; |
| break; |
| case '/': { |
| char lookahead = pos + 1 < cssLimit ? css.charAt(pos + 1) : 0; |
| if (lookahead == '/' || lookahead == '*') { |
| consumeIgnorable(); |
| type = TokenType.WHITESPACE; |
| } else { |
| consumeDelim(ch); |
| type = TokenType.DELIM; |
| } |
| break; |
| } |
| case '<': |
| if (consumeIgnorable()) { // <!-- |
| type = TokenType.WHITESPACE; |
| } else { |
| consumeDelim('<'); |
| type = TokenType.DELIM; |
| } |
| break; |
| case '>': |
| breakOutput(); |
| sb.append('>'); |
| type = TokenType.DELIM; |
| ++pos; |
| break; |
| case '@': |
| if (consumeAtKeyword()) { |
| type = TokenType.AT; |
| } else { |
| consumeDelim(ch); |
| type = TokenType.DELIM; |
| } |
| break; |
| case '#': { |
| sb.append('#'); |
| TokenType hashType = consumeHash(); |
| if (hashType != null) { |
| type = hashType; |
| } else { |
| ++pos; |
| sb.append(' '); |
| type = TokenType.DELIM; |
| } |
| break; |
| } |
| case '"': |
| case '\'': |
| type = consumeString(); |
| break; |
| case 'U': case 'u': |
| // SPEC handle URL under "ident like token". |
| if (consumeUnicodeRange()) { |
| type = TokenType.UNICODE_RANGE; |
| } else { |
| type = consumeIdentOrUrlOrFunction(); |
| } |
| break; |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| type = consumeNumberOrPercentageOrDimension(); |
| break; |
| case '+': case '-': case '.': { |
| char lookahead = pos + 1 < cssLimit ? css.charAt(pos + 1) : 0; |
| if (isDecimal(lookahead) |
| || (lookahead == '.' && pos + 2 < cssLimit |
| && isDecimal(css.charAt(pos + 2)))) { |
| type = consumeNumberOrPercentageOrDimension(); |
| } else if (ch == '+') { |
| consumeDelim(ch); |
| type = TokenType.DELIM; |
| } else if (ch == '-') { |
| if (consumeIgnorable()) { // --> |
| type = TokenType.WHITESPACE; |
| } else { |
| type = consumeIdentOrUrlOrFunction(); |
| } |
| } else if (isIdentPart(lookahead)) { |
| // treat ".<IDENT>" as one token. |
| sb.append('.'); |
| ++pos; |
| consumeIdent(false); |
| if (pos != startOfToken + 1) { |
| type = TokenType.DOT_IDENT; |
| if (pos < cssLimit) { |
| char next = css.charAt(pos); |
| if ('(' == next) { |
| // A dotted identifier followed by a parenthesis is |
| // ambiguously a function. |
| sb.append(' '); |
| } |
| } |
| } else { |
| type = TokenType.DELIM; |
| sb.append(' '); |
| } |
| } else { |
| consumeDelim('.'); |
| type = TokenType.DELIM; |
| } |
| break; |
| } |
| case ':': consumeDelim(ch); type = TokenType.COLON; break; |
| case ';': consumeDelim(ch); type = TokenType.SEMICOLON; break; |
| case ',': consumeDelim(ch); type = TokenType.COMMA; break; |
| case '[': case '(': case '{': |
| type = openBracket(ch); |
| ++pos; |
| break; |
| case '}': case ')': case ']': |
| closeBracket(ch); |
| ++pos; |
| // Use DELIM so that a later loop will split output into multiple |
| // tokens since we may have inserted missing close brackets for |
| // unclosed open brackets already on the stack. |
| type = TokenType.DELIM; |
| break; |
| case '~': case '|': case '^': case '$': case '*': { |
| char lookahead = pos + 1 < cssLimit ? css.charAt(pos + 1) : 0; |
| if (lookahead == '=') { |
| consumeMatch(ch); |
| type = TokenType.MATCH; |
| } else if (ch == '|' && lookahead == '|') { |
| consumeColumn(); |
| type = TokenType.COLUMN; |
| } else { |
| consumeDelim(ch); |
| type = TokenType.DELIM; |
| } |
| break; |
| } |
| case '_': |
| type = consumeIdentOrUrlOrFunction(); |
| break; |
| case '\\': { |
| // Optimistically parse as an ident. |
| TokenType identType = consumeIdentOrUrlOrFunction(); |
| if (identType == null) { |
| ++pos; // drop |
| breakOutput(); |
| type = TokenType.WHITESPACE; |
| } else { |
| type = identType; |
| } |
| // TODO: handle case where "url" is encoded. |
| break; |
| } |
| default: |
| int chlower = ch | 32; |
| if ('a' <= chlower && chlower <= 'z' || ch >= 0x80) { |
| TokenType identType = consumeIdentOrUrlOrFunction(); |
| if (identType != null) { |
| type = identType; |
| } else { // Occurs on undefined-codepoints. |
| ++pos; |
| breakOutput(); |
| type = TokenType.WHITESPACE; |
| } |
| } else if (ch > 0x20) { |
| consumeDelim(ch); |
| type = TokenType.DELIM; |
| } else { // Ignore. |
| consumeIgnorable(); |
| type = TokenType.WHITESPACE; |
| } |
| } |
| assert pos > startOfToken |
| : "empty token at " + pos + ", ch0=" + css.charAt(startOfToken) |
| + ":U+" + Integer.toHexString(css.charAt(startOfToken)); |
| int endOfOutputToken = sb.length(); |
| if (endOfOutputToken > startOfOutputToken) { |
| if (type == TokenType.DELIM) { |
| emitMergedTokens(startOfOutputToken, endOfOutputToken); |
| } else { |
| if (type != TokenType.WHITESPACE |
| && sb.charAt(startOfOutputToken) == ' ') { |
| emitToken(TokenType.WHITESPACE, startOfOutputToken); |
| ++startOfOutputToken; |
| assert startOfOutputToken != endOfOutputToken; |
| } |
| emitToken(type, startOfOutputToken); |
| // Token emitters can emit a space after a token to avoid possible |
| // merges with following tokens |
| if (type != TokenType.WHITESPACE) { |
| int sbLen = sb.length(); |
| if (startOfOutputToken + 1 < sbLen |
| && sb.charAt(sbLen - 1) == ' ') { |
| emitToken(TokenType.WHITESPACE, sbLen - 1); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| private void emitMergedTokens(int start, int end) { |
| // Handle breakOutput and merging of output tokens. |
| for (int e = start; e < end; ++e) { |
| TokenType delimType; |
| switch (sb.charAt(e)) { |
| case ' ': delimType = TokenType.WHITESPACE; break; |
| case '}': delimType = TokenType.RIGHT_CURLY; break; |
| case ')': delimType = TokenType.RIGHT_PAREN; break; |
| case ']': delimType = TokenType.RIGHT_SQUARE; break; |
| default : delimType = TokenType.DELIM; break; |
| } |
| emitToken(delimType, e); |
| } |
| } |
| |
| private void emitToken(TokenType type, int startOfOutputToken) { |
| if (tokenBreaksLimit == 0 |
| || tokenBreaks[tokenBreaksLimit - 1] != startOfOutputToken) { |
| tokenBreaks = expandIfNecessary(tokenBreaks, tokenBreaksLimit, 1); |
| tokenBreaks[tokenBreaksLimit++] = startOfOutputToken; |
| tokenTypes.add(type); |
| } |
| } |
| |
| private void consumeDelim(char ch) { |
| sb.append(ch); |
| switch (ch) { |
| // Prevent token merging. |
| case '~': case '|': case '^': case '$': case '\\': |
| case '.': case '+': case '-': case '@': case '/': case '<': |
| sb.append(' '); |
| break; |
| default: |
| break; |
| } |
| ++pos; |
| } |
| |
| private boolean consumeIgnorable() { |
| String css = this.css; |
| int cssLimit = this.cssLimit; |
| int posBefore = pos; |
| while (pos < cssLimit) { |
| char ch = css.charAt(pos); |
| if (ch <= 0x20 |
| // Treat a BOM as white-space so that it is ignored at the beginning |
| // of a file. |
| || ch == '\ufeff') { |
| ++pos; |
| } else if (pos + 1 == cssLimit) { |
| break; |
| } else if (ch == '/') { |
| char next = css.charAt(pos + 1); |
| if (next == '*') { |
| pos += 2; |
| while (pos < cssLimit) { |
| int ast = css.indexOf('*', pos); |
| if (ast < 0) { |
| pos = cssLimit; // Unclosed /* comment */ |
| break; |
| } else { |
| // Advance over a run of '*'s. |
| pos = ast + 1; |
| while (pos < cssLimit && css.charAt(pos) == '*') { |
| ++pos; |
| } |
| if (pos < cssLimit && css.charAt(pos) == '/') { |
| ++pos; |
| break; |
| } |
| } |
| } |
| } else if (next == '/') { // Non-standard but widely supported |
| while (++pos < cssLimit) { |
| if (isLineTerminator(css.charAt(pos))) { break; } |
| } |
| } else { |
| break; |
| } |
| } else if (ch == '<') { |
| if (pos + 3 < cssLimit |
| && '!' == css.charAt(pos + 1) |
| && '-' == css.charAt(pos + 2) |
| && '-' == css.charAt(pos + 3)) { |
| pos += 4; |
| } else { |
| break; |
| } |
| } else if (ch == '-') { |
| if (pos + 2 < cssLimit |
| && '-' == css.charAt(pos + 1) |
| && '>' == css.charAt(pos + 2)) { |
| pos += 3; |
| } else { |
| break; |
| } |
| } else { |
| break; |
| } |
| } |
| if (pos == posBefore) { |
| return false; |
| } else { |
| breakOutput(); |
| return true; |
| } |
| } |
| |
| private void breakOutput() { |
| int last = sb.length() - 1; |
| if (last >= 0 && sb.charAt(last) != ' ') { sb.append(' '); } |
| } |
| |
| private void consumeColumn() { |
| pos += 2; |
| sb.append("||"); |
| } |
| |
| private void consumeMatch(char ch) { |
| pos += 2; |
| sb.append(ch).append('='); |
| } |
| |
| private void consumeIdent(boolean allowFirstDigit) { |
| int cssLimit = this.cssLimit; |
| int last = -1, nCodepoints = 0; |
| int sbAtStart = sb.length(); |
| int posAtStart = pos; |
| while (pos < cssLimit) { |
| int posBefore = pos; |
| |
| int decoded = readCodepoint(); |
| if (decoded == '\\') { |
| decoded = consumeAndDecodeEscapeSequence(); |
| } else { |
| ++pos; |
| } |
| |
| if (decoded >= 0 && isIdentPart(decoded)) { |
| if (!allowFirstDigit && nCodepoints < 2 |
| && '0' <= decoded && decoded <= '9') { |
| // Don't allow encoded identifiers that look like numeric tokens |
| // like \-1 or ones that start with an encoded decimal digit. |
| if (last == '-' || last == -1) { |
| pos = posAtStart; |
| sb.setLength(sbAtStart); |
| return; |
| } |
| } |
| sb.appendCodePoint(decoded); |
| last = decoded; |
| ++nCodepoints; |
| } else { |
| pos = posBefore; |
| return; |
| } |
| } |
| } |
| |
| private boolean consumeAtKeyword() { |
| assert css.charAt(pos) == '@'; |
| int bufferLengthBeforeWrite = sb.length(); |
| sb.append('@'); |
| int posBeforeKeyword = ++pos; |
| consumeIdent(false); |
| if (pos == posBeforeKeyword) { |
| --pos; // back up over '@' |
| sb.setLength(bufferLengthBeforeWrite); // Unwrite the '@' |
| return false; |
| } else { |
| return true; |
| } |
| } |
| |
| |
| private int consumeAndDecodeEscapeSequence() { |
| String css = this.css; |
| int cssLimit = this.cssLimit; |
| assert css.charAt(pos) == '\\'; |
| if (pos + 1 >= cssLimit) { return -1; } |
| char esc = css.charAt(pos + 1); |
| if (isLineTerminator(esc)) { return -1; } |
| int escLower = esc | 32; |
| if (('0' <= esc && esc <= '9') |
| || ('a' <= escLower && escLower <= 'f')) { |
| int hexValue = 0; |
| int hexStart = pos + 1; |
| int hexLimit = Math.min(pos + 7, cssLimit); |
| int hexEnd = hexStart; |
| do { |
| hexValue = (hexValue << 4) |
| | (esc <= '9' ? esc - '0' : escLower - ('a' - 10)); |
| ++hexEnd; |
| if (hexEnd == hexLimit) { break; } |
| esc = css.charAt(hexEnd); |
| escLower = esc | 32; |
| } while (('0' <= esc && esc <= '9') |
| || ('a' <= escLower && escLower <= 'f')); |
| if (!Character.isDefined(hexValue)) { |
| hexValue = 0xfffd; |
| } |
| pos = hexEnd; |
| if (pos < cssLimit) { |
| // A sequence of hex digits can be followed by a space that allows |
| // so that code-point U+A followed by the letter 'b' can be rendered |
| // as "\a b" since "\ab" specifies the single code-point U+AB. |
| char next = css.charAt(pos); |
| if (next == ' ' || next == '\t' || isLineTerminator(next)) { |
| ++pos; |
| } |
| } |
| return hexValue; |
| } |
| pos += 2; |
| return esc; |
| } |
| |
| private static final long HEX_ENCODED_BITMASK = |
| (1L << 0) | LINE_TERMINATOR_BITMASK |
| | (1L << '"') | (1L << '\'') | (1L << '&') | (1L << '<') | (1L << '>'); |
| private static boolean isHexEncoded(int codepoint) { |
| return (0 <= codepoint && codepoint < 63 |
| && 0 != ((1L << codepoint) & HEX_ENCODED_BITMASK)); |
| } |
| |
| private void encodeCharOntoOutput(int codepoint, int last) { |
| switch (codepoint) { |
| case '\\': sb.append("\\\\"); break; |
| case '\0': sb.append("\\0"); break; |
| case '\n': sb.append("\\a"); break; |
| case '\f': sb.append("\\c"); break; |
| case '\r': sb.append("\\d"); break; |
| case '\"': sb.append("\\22"); break; |
| case '&': sb.append("\\26"); break; |
| case '\'': sb.append("\\27"); break; |
| case '<': sb.append("\\3c"); break; |
| case '>': sb.append("\\3e"); break; |
| // The set of escapes above that end with a hex digit must appear in |
| // HEX_ENCODED_BITMASK. |
| case '-': |
| sb.append('-'); |
| break; |
| default: |
| if (isHexEncoded(last) |
| // We need to put a space after a trailing hex digit if the |
| // next encoded character on the output would be another hex |
| // digit or a space character. The other space characters |
| // are handled above. |
| && (codepoint == ' ' || codepoint == '\t' |
| || ('0' <= codepoint && codepoint <= '9') |
| || ('a' <= (codepoint | 32) && (codepoint | 32) <= 'f'))) { |
| sb.append(' '); |
| } |
| sb.appendCodePoint(codepoint); |
| break; |
| } |
| } |
| |
| private TokenType consumeNumberOrPercentageOrDimension() { |
| String css = this.css; |
| int cssLimit = this.cssLimit; |
| boolean isZero = true; |
| int intStart = pos; |
| if (intStart < cssLimit) { |
| char ch = css.charAt(intStart); |
| if (ch == '-' || ch == '+') { |
| ++intStart; |
| } |
| } |
| // Find the integer part after any sign. |
| int intEnd = intStart; |
| for (; intEnd < cssLimit; ++intEnd) { |
| char ch = css.charAt(intEnd); |
| if (!('0' <= ch && ch <= '9')) { break; } |
| if (ch != '0') { isZero = false; } |
| } |
| // Find a fraction like ".5" or ".". |
| int fractionStart = intEnd; |
| int fractionEnd = fractionStart; |
| if (fractionEnd < cssLimit && '.' == css.charAt(fractionEnd)) { |
| ++fractionEnd; |
| for (; fractionEnd < cssLimit; ++fractionEnd) { |
| char ch = css.charAt(fractionEnd); |
| if (!('0' <= ch && ch <= '9')) { break; } |
| if (ch != '0') { isZero = false; } |
| } |
| } |
| int exponentStart = fractionEnd; |
| int exponentIntStart = exponentStart; |
| int exponentEnd = exponentStart; |
| boolean isExponentZero = true; |
| if (exponentStart < cssLimit && 'e' == (css.charAt(exponentStart) | 32)) { |
| // 'e' and 'e' in "5e-f" for a |
| exponentEnd = exponentStart + 1; |
| if (exponentEnd < cssLimit) { |
| char ch = css.charAt(exponentEnd); |
| if (ch == '+' || ch == '-') { ++exponentEnd; } |
| } |
| exponentIntStart = exponentEnd; |
| for (; exponentEnd < cssLimit; ++exponentEnd) { |
| char ch = css.charAt(exponentEnd); |
| if (!('0' <= ch && ch <= '9')) { break; } |
| if (ch != '0') { isExponentZero = false; } |
| } |
| // Since |
| // dimension := <number> <ident> |
| // the below are technically valid dimensions even though they appear |
| // to have incomplete exponents: |
| // 5e |
| // 5ex |
| // 5e- |
| if (exponentEnd == exponentIntStart) { // Incomplete exponent. |
| exponentIntStart = exponentEnd = exponentStart; |
| isExponentZero = true; |
| } |
| } |
| |
| int unitStart = exponentEnd; |
| // Skip over space between number and unit. |
| // Many user-agents allow "5 ex" instead of "5ex". |
| while (unitStart < cssLimit) { |
| char ch = css.charAt(unitStart); |
| if (ch == ' ' || isLineTerminator(ch)) { |
| ++unitStart; |
| } else { |
| break; |
| } |
| } |
| |
| if (sb.length() != 0 && isIdentPart(sb.charAt(sb.length() - 1))) { |
| sb.append(' '); |
| } |
| // Normalize the number onto the buffer. |
| // We will normalize and unit later. |
| // Skip the sign if it is positive. |
| if (intStart != pos && '-' == css.charAt(pos) && !isZero) { |
| sb.append('-'); |
| } |
| if (isZero) { |
| sb.append('0'); |
| } else { |
| // Strip leading zeroes from the integer and exponent and trailing |
| // zeroes from the fraction. |
| while (intStart < intEnd && css.charAt(intStart) == '0') { ++intStart; } |
| while (fractionEnd > fractionStart |
| && css.charAt(fractionEnd - 1) == '0') { |
| --fractionEnd; |
| } |
| if (intStart == intEnd) { |
| sb.append('0'); // .5 -> 0.5 |
| } else { |
| sb.append(css, intStart, intEnd); |
| } |
| if (fractionEnd > fractionStart + 1) { // 5. -> 5; 5.0 -> 5 |
| sb.append(css, fractionStart, fractionEnd); |
| } |
| if (!isExponentZero) { |
| sb.append('e'); |
| // 1e+1 -> 1e1 |
| if ('-' == css.charAt(exponentIntStart - 1)) { sb.append('-'); } |
| while (exponentIntStart < exponentEnd |
| && css.charAt(exponentIntStart) == '0') { |
| ++exponentIntStart; |
| } |
| sb.append(css, exponentIntStart, exponentEnd); |
| } |
| } |
| |
| int unitEnd; |
| TokenType type; |
| if (unitStart < cssLimit && '%' == css.charAt(unitStart)) { |
| unitEnd = unitStart + 1; |
| type = TokenType.PERCENTAGE; |
| sb.append('%'); |
| } else { |
| // The grammar says that any identifier following a number is a unit. |
| int bufferBeforeUnit = sb.length(); |
| pos = unitStart; |
| consumeIdent(false); |
| int bufferAfterUnit = sb.length(); |
| boolean knownUnit = isWellKnownUnit( |
| sb, bufferBeforeUnit, bufferAfterUnit); |
| if (unitStart == exponentEnd // No intervening space |
| || knownUnit) { |
| unitEnd = pos; |
| // 3IN -> 3in |
| for (int i = bufferBeforeUnit; i < bufferAfterUnit; ++i) { |
| char ch = sb.charAt(i); |
| if ('A' <= ch && ch <= 'Z') { sb.setCharAt(i, (char) (ch | 32)); } |
| } |
| } else { |
| unitEnd = unitStart = exponentEnd; |
| sb.setLength(bufferBeforeUnit); |
| } |
| type = unitStart == unitEnd |
| ? TokenType.NUMBER |
| : knownUnit |
| ? TokenType.DIMENSION |
| : TokenType.BAD_DIMENSION; |
| } |
| pos = unitEnd; |
| if (type != TokenType.PERCENTAGE |
| && pos < cssLimit && css.charAt(pos) == '.') { |
| sb.append(' '); |
| } |
| return type; |
| } |
| |
| private TokenType consumeString() { |
| String css = this.css; |
| int cssLimit = this.cssLimit; |
| |
| char delim = css.charAt(pos); |
| assert delim == '"' || delim == '\''; |
| ++pos; |
| int startOfStringOnOutput = sb.length(); |
| sb.append('\''); |
| int last = -1; |
| boolean closed = false; |
| while (pos < cssLimit) { |
| char ch = css.charAt(pos); |
| if (ch == delim) { |
| ++pos; |
| closed = true; |
| break; |
| } |
| if (isLineTerminator(ch)) { break; } |
| int decoded = ch; |
| if (ch == '\\') { |
| if (pos + 1 < cssLimit && isLineTerminator(css.charAt(pos+1))) { |
| // consume it but generate no tokens. |
| // Lookahead to treat a \r\n sequence as one line-terminator. |
| if (pos + 2 < cssLimit |
| && css.charAt(pos+1) == '\r' && css.charAt(pos+2) == '\n') { |
| pos += 3; |
| } else { |
| pos += 2; |
| } |
| continue; |
| } else { |
| decoded = consumeAndDecodeEscapeSequence(); |
| if (decoded < 0) { |
| break; |
| } |
| } |
| } else { |
| ++pos; |
| } |
| encodeCharOntoOutput(decoded, last); |
| last = decoded; |
| } |
| if (closed) { |
| sb.append('\''); |
| return TokenType.STRING; |
| } else { // Drop <bad-string>s |
| sb.setLength(startOfStringOnOutput); |
| breakOutput(); |
| return TokenType.WHITESPACE; |
| } |
| } |
| |
| private @Nullable TokenType consumeHash() { |
| assert css.charAt(pos) == '#'; |
| ++pos; |
| int beforeIdent = pos; |
| consumeIdent(true); |
| if (pos == beforeIdent) { |
| pos = beforeIdent - 1; |
| return null; |
| } |
| for (int i = beforeIdent; i < pos; ++i) { |
| char chLower = (char) (css.charAt(i) | 32); |
| if (!(('0' <= chLower && chLower <= '9') |
| || ('a' <= chLower && chLower <= 'f'))) { |
| return TokenType.HASH_ID; |
| } |
| } |
| return TokenType.HASH_UNRESTRICTED; |
| } |
| |
| private boolean consumeUnicodeRange() { |
| final String css = this.css; |
| final int cssLimit = this.cssLimit; |
| |
| assert pos < cssLimit && (css.charAt(pos) | 32) == 'u'; |
| |
| final int start = pos; |
| final int startOfOutput = sb.length(); |
| ++pos; |
| boolean ok = false; |
| parse: |
| try { |
| if (pos == cssLimit || css.charAt(pos) != '+') { |
| break parse; |
| } |
| ++pos; |
| sb.append("U+"); |
| int numStartDigits = 0; |
| while (pos < cssLimit && numStartDigits < 6) { |
| char chLower = (char) (css.charAt(pos) | 32); |
| if (('0' <= chLower && chLower <= '9') |
| || ('a' <= chLower && chLower <= 'f')) { |
| sb.append(chLower); |
| ++numStartDigits; |
| ++pos; |
| } else { |
| break; |
| } |
| } |
| if (numStartDigits == 0) { |
| break parse; |
| } |
| boolean hasQmark = false; |
| while (pos < cssLimit && numStartDigits < 6 && css.charAt(pos) == '?') { |
| hasQmark = true; |
| sb.append('?'); |
| ++numStartDigits; |
| ++pos; |
| } |
| if (numStartDigits == 0) { |
| break parse; |
| } |
| if (pos < cssLimit && css.charAt(pos) == '-') { |
| if (!hasQmark) { |
| // Look for end of range. |
| ++pos; |
| sb.append('-'); |
| int numEndDigits = 0; |
| while (pos < cssLimit && numEndDigits < 6) { |
| char chLower = (char) (css.charAt(pos) | 32); |
| if (('0' <= chLower && chLower <= '9') |
| || ('a' <= chLower && chLower <= 'f')) { |
| ++numEndDigits; |
| ++pos; |
| sb.append(chLower); |
| } else { |
| break; |
| } |
| } |
| if (numEndDigits == 0) { |
| // Back up over '-' |
| --pos; |
| sb.append(' '); |
| } |
| } else { |
| sb.append(' '); |
| } |
| } |
| ok = true; |
| } finally { |
| if (!ok) { |
| pos = start; |
| sb.setLength(startOfOutput); |
| } |
| } |
| return ok; |
| } |
| |
| private @Nullable TokenType consumeIdentOrUrlOrFunction() { |
| int bufferStart = sb.length(); |
| int posBefore = pos; |
| consumeIdent(false); |
| if (pos == posBefore) { return null; } |
| boolean parenAfter = pos < cssLimit && css.charAt(pos) == '('; |
| if (sb.length() - bufferStart == 3 |
| && 'u' == (sb.charAt(bufferStart) | 32) |
| && 'r' == (sb.charAt(bufferStart + 1) | 32) |
| && 'l' == (sb.charAt(bufferStart + 2) | 32)) { |
| if (parenAfter && consumeUrlValue()) { |
| sb.setCharAt(bufferStart, 'u'); |
| sb.setCharAt(bufferStart + 1, 'r'); |
| sb.setCharAt(bufferStart + 2, 'l'); |
| return TokenType.URL; |
| } else { |
| sb.setLength(bufferStart); |
| breakOutput(); |
| return TokenType.WHITESPACE; |
| } |
| } else if (parenAfter) { |
| openBracket('('); |
| ++pos; |
| return TokenType.FUNCTION; |
| } else { |
| if (pos + 1 < cssLimit && '.' == css.charAt(pos)) { |
| // Prevent merging of ident and number as in |
| // border:solid.1cm black |
| // when .1 is rewritten to 0.1 becoming |
| // border:solid0.1cm black |
| char next = css.charAt(pos + 1); |
| if ('0' <= next && next <= '9') { |
| sb.append(' '); |
| } |
| } |
| return TokenType.IDENT; |
| } |
| } |
| |
| private boolean consumeUrlValue() { |
| String css = this.css; |
| int cssLimit = this.cssLimit; |
| if (pos == cssLimit || css.charAt(pos) != '(') { return false; } |
| ++pos; |
| // skip space. |
| for (; pos < cssLimit; ++pos) { |
| char ch = css.charAt(pos); |
| if (ch != ' ' && !isLineTerminator(ch)) { break; } |
| } |
| // Find the value. |
| int delim; |
| if (pos < cssLimit) { |
| char ch = pos < cssLimit ? css.charAt(pos) : '\0'; |
| if (ch == '"' || ch == '\'') { |
| delim = ch; |
| ++pos; |
| } else { |
| delim = '\0'; |
| } |
| } else { |
| return false; |
| } |
| sb.append("('"); |
| while (pos < cssLimit) { |
| int decoded = readCodepoint(); |
| if (delim != 0) { |
| if (decoded == delim) { |
| ++pos; |
| break; |
| } |
| } else if (decoded <= ' ' || decoded == ')') { |
| break; |
| } |
| if (decoded == '\\') { |
| decoded = consumeAndDecodeEscapeSequence(); |
| if (decoded < 0) { |
| return false; |
| } |
| } else { |
| ++pos; |
| } |
| // Any character not in the RFC 3986 safe set is %-encoded. |
| if (decoded < URL_SAFE.length && URL_SAFE[decoded]) { |
| sb.appendCodePoint(decoded); |
| } else if (decoded < 0x80) { |
| sb.append('%') |
| .append(HEX_DIGITS[(decoded >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(decoded >>> 0) & 0xf]); |
| } else if (decoded < 0x800) { |
| int octet0 = 0xc0 | ((decoded >>> 6) & 0x1f), |
| octet1 = 0x80 | (decoded & 0x3f); |
| sb.append('%') |
| .append(HEX_DIGITS[(octet0 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet0 >>> 0) & 0xf]) |
| .append('%') |
| .append(HEX_DIGITS[(octet1 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet1 >>> 0) & 0xf]); |
| } else if (decoded < 0x10000) { |
| int octet0 = 0xe0 | ((decoded >>> 12) & 0xf), |
| octet1 = 0x80 | ((decoded >>> 6) & 0x3f), |
| octet2 = 0x80 | (decoded & 0x3f); |
| sb.append('%') |
| .append(HEX_DIGITS[(octet0 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet0 >>> 0) & 0xf]) |
| .append('%') |
| .append(HEX_DIGITS[(octet1 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet1 >>> 0) & 0xf]) |
| .append('%') |
| .append(HEX_DIGITS[(octet2 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet2 >>> 0) & 0xf]); |
| } else { |
| int octet0 = 0xf0 | ((decoded >>> 18) & 0x7), |
| octet1 = 0x80 | ((decoded >>> 12) & 0x3f), |
| octet2 = 0x80 | ((decoded >>> 6) & 0x3f), |
| octet3 = 0x80 | (decoded & 0x3f); |
| sb.append('%') |
| .append(HEX_DIGITS[(octet0 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet0 >>> 0) & 0xf]) |
| .append('%') |
| .append(HEX_DIGITS[(octet1 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet1 >>> 0) & 0xf]) |
| .append('%') |
| .append(HEX_DIGITS[(octet2 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet2 >>> 0) & 0xf]) |
| .append('%') |
| .append(HEX_DIGITS[(octet3 >>> 4) & 0xf]) |
| .append(HEX_DIGITS[(octet3 >>> 0) & 0xf]); |
| } |
| } |
| |
| // skip space. |
| for (; pos < cssLimit; ++pos) { |
| char ch = css.charAt(pos); |
| if (ch != ' ' && !isLineTerminator(ch)) { break; } |
| } |
| if (pos < cssLimit && css.charAt(pos) == ')') { |
| ++pos; |
| } else { |
| // broken-url |
| } |
| sb.append("')"); |
| return true; |
| } |
| |
| /** |
| * Reads the codepoint at pos, leaving pos at the index of the last code |
| * unit. |
| */ |
| private int readCodepoint() { |
| String css = this.css; |
| char ch = css.charAt(pos); |
| if (Character.isHighSurrogate(ch) && pos + 1 < cssLimit) { |
| char next = css.charAt(pos + 1); |
| if (Character.isLowSurrogate(next)) { |
| ++pos; |
| return 0x10000 + (((ch - 0xd800) << 10) | (next - 0xdc00)); |
| } |
| } |
| return ch; |
| } |
| } |
| |
| private static final boolean isIdentPart(int cp) { |
| return cp >= 0x80 |
| ? Character.isDefined(cp) && cp != '\ufeff' |
| : IDENT_PART_ASCII[cp]; |
| } |
| |
| private static final boolean isDecimal(char ch) { |
| return '0' <= ch && ch <= '9'; |
| } |
| |
| private static final boolean[] IDENT_PART_ASCII = new boolean[128]; |
| static { |
| for (int i = '0'; i <= '9'; ++i) { IDENT_PART_ASCII[i] = true; } |
| for (int i = 'A'; i <= 'Z'; ++i) { IDENT_PART_ASCII[i] = true; } |
| for (int i = 'a'; i <= 'z'; ++i) { IDENT_PART_ASCII[i] = true; } |
| IDENT_PART_ASCII['_'] = true; |
| IDENT_PART_ASCII['-'] = true; |
| } |
| |
| private static final int LINE_TERMINATOR_BITMASK = |
| (1 << '\n') | (1 << '\r') | (1 << '\f'); |
| |
| private static boolean isLineTerminator(char ch) { |
| return ch < 0x20 && 0 != (LINE_TERMINATOR_BITMASK & (1 << ch)); |
| } |
| |
| private static int[] expandIfNecessary(int[] arr, int limit, int needed) { |
| int neededLength = limit + needed; |
| int length = arr.length; |
| if (length >= neededLength) { return arr; } |
| int[] newArr = new int[Math.max(16, Math.max(neededLength, length * 2))]; |
| System.arraycopy(arr, 0, newArr, 0, limit); |
| return newArr; |
| } |
| |
| private static int[] truncateOrShare(int[] arr, int limit) { |
| if (limit == 0) { return ZERO_INTS; } |
| if (limit == arr.length) { |
| return arr; |
| } |
| int[] trunc = new int[limit]; |
| System.arraycopy(arr, 0, trunc, 0, limit); |
| return trunc; |
| } |
| |
| private static final int LENGTH_UNIT_TYPE = 0; |
| private static final int ANGLE_UNIT_TYPE = 1; |
| private static final int TIME_UNIT_TYPE = 2; |
| private static final int FREQUENCY_UNIT_TYPE = 3; |
| private static final int RESOLUTION_UNIT_TYPE = 4; |
| |
| /** |
| * See http://dev.w3.org/csswg/css-values/#lengths and |
| * http://dev.w3.org/csswg/css-values/#other-units |
| */ |
| private static final Trie UNIT_TRIE = new Trie( |
| ImmutableMap.<String, Integer>builder() |
| .put("em", LENGTH_UNIT_TYPE) |
| .put("ex", LENGTH_UNIT_TYPE) |
| .put("ch", LENGTH_UNIT_TYPE) // Width of zero character |
| .put("rem", LENGTH_UNIT_TYPE) // Root element font-size |
| .put("vh", LENGTH_UNIT_TYPE) |
| .put("vw", LENGTH_UNIT_TYPE) |
| .put("vmin", LENGTH_UNIT_TYPE) |
| .put("vmax", LENGTH_UNIT_TYPE) |
| .put("px", LENGTH_UNIT_TYPE) |
| .put("mm", LENGTH_UNIT_TYPE) |
| .put("cm", LENGTH_UNIT_TYPE) |
| .put("in", LENGTH_UNIT_TYPE) |
| .put("pt", LENGTH_UNIT_TYPE) |
| .put("pc", LENGTH_UNIT_TYPE) |
| .put("deg", ANGLE_UNIT_TYPE) |
| .put("rad", ANGLE_UNIT_TYPE) |
| .put("grad", ANGLE_UNIT_TYPE) |
| .put("turn", ANGLE_UNIT_TYPE) |
| .put("s", TIME_UNIT_TYPE) |
| .put("ms", TIME_UNIT_TYPE) |
| .put("hz", FREQUENCY_UNIT_TYPE) |
| .put("khz", FREQUENCY_UNIT_TYPE) |
| .put("dpi", RESOLUTION_UNIT_TYPE) |
| .put("dpcm", RESOLUTION_UNIT_TYPE) |
| .put("dppx", RESOLUTION_UNIT_TYPE) |
| .build()); |
| |
| static boolean isWellKnownUnit(CharSequence s, int start, int end) { |
| if (start == end) { return false; } |
| Trie t = UNIT_TRIE; |
| for (int i = start; i < end; ++i) { |
| char ch = s.charAt(i); |
| t = t.lookup('A' <= ch && ch <= 'Z' ? (char) (ch | 32) : ch); |
| if (t == null) { return false; } |
| } |
| return t.isTerminal(); |
| } |
| |
| static boolean isWellKnownUnit(CharSequence s) { |
| return isWellKnownUnit(s, 0, s.length()); |
| } |
| |
| private static final boolean[] URL_SAFE = new boolean[128]; |
| static { |
| // From RFC 3986 |
| // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
| for (int i = 'A'; i <= 'Z'; ++i) { URL_SAFE[i] = true; } |
| for (int i = 'a'; i <= 'z'; ++i) { URL_SAFE[i] = true; } |
| for (int i = '0'; i <= '9'; ++i) { URL_SAFE[i] = true; } |
| URL_SAFE['-'] = true; |
| URL_SAFE['.'] = true; |
| URL_SAFE['_'] = true; |
| URL_SAFE['~'] = true; |
| // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
| URL_SAFE[':'] = true; |
| URL_SAFE['/'] = true; |
| URL_SAFE['?'] = true; |
| URL_SAFE['#'] = true; |
| URL_SAFE['['] = true; |
| URL_SAFE[']'] = true; |
| URL_SAFE['@'] = true; |
| // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
| // / "*" / "+" / "," / ";" / "=" |
| URL_SAFE['!'] = true; |
| URL_SAFE['$'] = true; |
| URL_SAFE['&'] = true; |
| // Only used in obsolete mark rule and special in unquoted URLs or comment |
| // delimiters. |
| // URL_SAFE['\''] = true; |
| // URL_SAFE['('] = true; |
| // URL_SAFE[')'] = true; |
| // URL_SAFE['*'] = true; |
| URL_SAFE['+'] = true; |
| URL_SAFE[','] = true; |
| URL_SAFE[';'] = true; |
| URL_SAFE['='] = true; |
| // % is used to encode unsafe octets. |
| URL_SAFE['%'] = true; |
| } |
| |
| private static final char[] HEX_DIGITS = { |
| '0', '1', '2', '3', |
| '4', '5', '6', '7', |
| '8', '9', 'a', 'b', |
| 'c', 'd', 'e', 'f' |
| }; |
| } |