| /* |
| * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package jdk.nashorn.internal.runtime.regexp; |
| |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.regex.PatternSyntaxException; |
| import jdk.nashorn.internal.parser.Lexer; |
| import jdk.nashorn.internal.parser.Scanner; |
| import jdk.nashorn.internal.runtime.BitVector; |
| |
| /** |
| * Scan a JavaScript regexp, converting to Java regex if necessary. |
| * |
| */ |
| final class RegExpScanner extends Scanner { |
| |
| /** |
| * String builder used to rewrite the pattern for the currently used regexp factory. |
| */ |
| private final StringBuilder sb; |
| |
| /** Expected token table */ |
| private final Map<Character, Integer> expected = new HashMap<>(); |
| |
| /** Capturing parenthesis that have been found so far. */ |
| private final List<Capture> caps = new LinkedList<>(); |
| |
| /** Forward references to capturing parenthesis to be resolved later.*/ |
| private final LinkedList<Integer> forwardReferences = new LinkedList<>(); |
| |
| /** Current level of zero-width negative lookahead assertions. */ |
| private int negLookaheadLevel; |
| |
| /** Sequential id of current top-level zero-width negative lookahead assertion. */ |
| private int negLookaheadGroup; |
| |
| /** Are we currently inside a character class? */ |
| private boolean inCharClass = false; |
| |
| /** Are we currently inside a negated character class? */ |
| private boolean inNegativeClass = false; |
| |
| private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?-"; |
| |
| private static class Capture { |
| /** Zero-width negative lookaheads enclosing the capture. */ |
| private final int negLookaheadLevel; |
| /** Sequential id of top-level negative lookaheads containing the capture. */ |
| private final int negLookaheadGroup; |
| |
| Capture(final int negLookaheadGroup, final int negLookaheadLevel) { |
| this.negLookaheadGroup = negLookaheadGroup; |
| this.negLookaheadLevel = negLookaheadLevel; |
| } |
| |
| /** |
| * Returns true if this Capture can be referenced from the position specified by the |
| * group and level parameters. This is the case if either the group is not within |
| * a negative lookahead, or the position of the referrer is in the same negative lookahead. |
| * |
| * @param group current negative lookahead group |
| * @param level current negative lokahead level |
| * @return true if this capture group can be referenced from the given position |
| */ |
| boolean canBeReferencedFrom(final int group, final int level) { |
| return this.negLookaheadLevel == 0 || (group == this.negLookaheadGroup && level >= this.negLookaheadLevel); |
| } |
| |
| } |
| |
| /** |
| * Constructor |
| * @param string the JavaScript regexp to parse |
| */ |
| private RegExpScanner(final String string) { |
| super(string); |
| sb = new StringBuilder(limit); |
| reset(0); |
| expected.put(']', 0); |
| expected.put('}', 0); |
| } |
| |
| private void processForwardReferences() { |
| |
| final Iterator<Integer> iterator = forwardReferences.descendingIterator(); |
| while (iterator.hasNext()) { |
| final int pos = iterator.next(); |
| final int num = iterator.next(); |
| if (num > caps.size()) { |
| // Non-existing backreference. If the number begins with a valid octal convert it to |
| // Unicode escape and append the rest to a literal character sequence. |
| final StringBuilder buffer = new StringBuilder(); |
| octalOrLiteral(Integer.toString(num), buffer); |
| sb.insert(pos, buffer); |
| } |
| } |
| |
| forwardReferences.clear(); |
| } |
| |
| /** |
| * Scan a JavaScript regexp string returning a Java safe regex string. |
| * |
| * @param string |
| * JavaScript regexp string. |
| * @return Java safe regex string. |
| */ |
| public static RegExpScanner scan(final String string) { |
| final RegExpScanner scanner = new RegExpScanner(string); |
| |
| try { |
| scanner.disjunction(); |
| } catch (final Exception e) { |
| throw new PatternSyntaxException(e.getMessage(), string, scanner.position); |
| } |
| |
| scanner.processForwardReferences(); |
| |
| // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors |
| if (scanner.position != string.length()) { |
| final String p = scanner.getStringBuilder().toString(); |
| throw new PatternSyntaxException(string, p, p.length() + 1); |
| } |
| |
| return scanner; |
| } |
| |
| final StringBuilder getStringBuilder() { |
| return sb; |
| } |
| |
| String getJavaPattern() { |
| return sb.toString(); |
| } |
| |
| BitVector getGroupsInNegativeLookahead() { |
| BitVector vec = null; |
| for (int i = 0; i < caps.size(); i++) { |
| final Capture cap = caps.get(i); |
| if (cap.negLookaheadLevel > 0) { |
| if (vec == null) { |
| vec = new BitVector(caps.size() + 1); |
| } |
| vec.set(i + 1); |
| } |
| } |
| return vec; |
| } |
| |
| /** |
| * Commit n characters to the builder and to a given token |
| * @param n Number of characters. |
| * @return Committed token |
| */ |
| private boolean commit(final int n) { |
| switch (n) { |
| case 1: |
| sb.append(ch0); |
| skip(1); |
| break; |
| case 2: |
| sb.append(ch0); |
| sb.append(ch1); |
| skip(2); |
| break; |
| case 3: |
| sb.append(ch0); |
| sb.append(ch1); |
| sb.append(ch2); |
| skip(3); |
| break; |
| default: |
| assert false : "Should not reach here"; |
| } |
| |
| return true; |
| } |
| |
| /** |
| * Restart the buffers back at an earlier position. |
| * |
| * @param startIn |
| * Position in the input stream. |
| * @param startOut |
| * Position in the output stream. |
| */ |
| private void restart(final int startIn, final int startOut) { |
| reset(startIn); |
| sb.setLength(startOut); |
| } |
| |
| private void push(final char ch) { |
| expected.put(ch, expected.get(ch) + 1); |
| } |
| |
| private void pop(final char ch) { |
| expected.put(ch, Math.min(0, expected.get(ch) - 1)); |
| } |
| |
| /* |
| * Recursive descent tokenizer starts below. |
| */ |
| |
| /* |
| * Disjunction :: |
| * Alternative |
| * Alternative | Disjunction |
| */ |
| private void disjunction() { |
| while (true) { |
| alternative(); |
| |
| if (ch0 == '|') { |
| commit(1); |
| } else { |
| break; |
| } |
| } |
| } |
| |
| /* |
| * Alternative :: |
| * [empty] |
| * Alternative Term |
| */ |
| private void alternative() { |
| while (term()) { |
| // do nothing |
| } |
| } |
| |
| /* |
| * Term :: |
| * Assertion |
| * Atom |
| * Atom Quantifier |
| */ |
| private boolean term() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (assertion()) { |
| return true; |
| } |
| |
| if (atom()) { |
| quantifier(); |
| return true; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * Assertion :: |
| * ^ |
| * $ |
| * \b |
| * \B |
| * ( ? = Disjunction ) |
| * ( ? ! Disjunction ) |
| */ |
| private boolean assertion() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| switch (ch0) { |
| case '^': |
| case '$': |
| return commit(1); |
| |
| case '\\': |
| if (ch1 == 'b' || ch1 == 'B') { |
| return commit(2); |
| } |
| break; |
| |
| case '(': |
| if (ch1 != '?') { |
| break; |
| } |
| if (ch2 != '=' && ch2 != '!') { |
| break; |
| } |
| final boolean isNegativeLookahead = (ch2 == '!'); |
| commit(3); |
| |
| if (isNegativeLookahead) { |
| if (negLookaheadLevel == 0) { |
| negLookaheadGroup++; |
| } |
| negLookaheadLevel++; |
| } |
| disjunction(); |
| if (isNegativeLookahead) { |
| negLookaheadLevel--; |
| } |
| |
| if (ch0 == ')') { |
| return commit(1); |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * Quantifier :: |
| * QuantifierPrefix |
| * QuantifierPrefix ? |
| */ |
| private boolean quantifier() { |
| if (quantifierPrefix()) { |
| if (ch0 == '?') { |
| commit(1); |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| /* |
| * QuantifierPrefix :: |
| * * |
| * + |
| * ? |
| * { DecimalDigits } |
| * { DecimalDigits , } |
| * { DecimalDigits , DecimalDigits } |
| */ |
| private boolean quantifierPrefix() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| switch (ch0) { |
| case '*': |
| case '+': |
| case '?': |
| return commit(1); |
| |
| case '{': |
| commit(1); |
| |
| if (!decimalDigits()) { |
| break; // not a quantifier - back out |
| } |
| push('}'); |
| |
| if (ch0 == ',') { |
| commit(1); |
| decimalDigits(); |
| } |
| |
| if (ch0 == '}') { |
| pop('}'); |
| commit(1); |
| } else { |
| // Bad quantifier should be rejected but is accepted by all major engines |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| return true; |
| |
| default: |
| break; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * Atom :: |
| * PatternCharacter |
| * . |
| * \ AtomEscape |
| * CharacterClass |
| * ( Disjunction ) |
| * ( ? : Disjunction ) |
| * |
| */ |
| private boolean atom() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (patternCharacter()) { |
| return true; |
| } |
| |
| if (ch0 == '.') { |
| return commit(1); |
| } |
| |
| if (ch0 == '\\') { |
| commit(1); |
| |
| if (atomEscape()) { |
| return true; |
| } |
| } |
| |
| if (characterClass()) { |
| return true; |
| } |
| |
| if (ch0 == '(') { |
| commit(1); |
| if (ch0 == '?' && ch1 == ':') { |
| commit(2); |
| } else { |
| caps.add(new Capture(negLookaheadGroup, negLookaheadLevel)); |
| } |
| |
| disjunction(); |
| |
| if (ch0 == ')') { |
| commit(1); |
| return true; |
| } |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * PatternCharacter :: |
| * SourceCharacter but not any of: ^$\.*+?()[]{}| |
| */ |
| @SuppressWarnings("fallthrough") |
| private boolean patternCharacter() { |
| if (atEOF()) { |
| return false; |
| } |
| |
| switch (ch0) { |
| case '^': |
| case '$': |
| case '\\': |
| case '.': |
| case '*': |
| case '+': |
| case '?': |
| case '(': |
| case ')': |
| case '[': |
| case '|': |
| return false; |
| |
| case '}': |
| case ']': |
| final int n = expected.get(ch0); |
| if (n != 0) { |
| return false; |
| } |
| |
| case '{': |
| // if not a valid quantifier escape curly brace to match itself |
| // this ensures compatibility with other JS implementations |
| if (!quantifierPrefix()) { |
| sb.append('\\'); |
| return commit(1); |
| } |
| return false; |
| |
| default: |
| return commit(1); // SOURCECHARACTER |
| } |
| } |
| |
| /* |
| * AtomEscape :: |
| * DecimalEscape |
| * CharacterEscape |
| * CharacterClassEscape |
| */ |
| private boolean atomEscape() { |
| // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all |
| return decimalEscape() || characterClassEscape() || characterEscape() || identityEscape(); |
| } |
| |
| /* |
| * CharacterEscape :: |
| * ControlEscape |
| * c ControlLetter |
| * HexEscapeSequence |
| * UnicodeEscapeSequence |
| * IdentityEscape |
| */ |
| private boolean characterEscape() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (controlEscape()) { |
| return true; |
| } |
| |
| if (ch0 == 'c') { |
| commit(1); |
| if (controlLetter()) { |
| return true; |
| } |
| restart(startIn, startOut); |
| } |
| |
| if (hexEscapeSequence() || unicodeEscapeSequence()) { |
| return true; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| private boolean scanEscapeSequence(final char leader, final int length) { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (ch0 != leader) { |
| return false; |
| } |
| |
| commit(1); |
| for (int i = 0; i < length; i++) { |
| final char ch0l = Character.toLowerCase(ch0); |
| if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) { |
| commit(1); |
| } else { |
| restart(startIn, startOut); |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private boolean hexEscapeSequence() { |
| return scanEscapeSequence('x', 2); |
| } |
| |
| private boolean unicodeEscapeSequence() { |
| return scanEscapeSequence('u', 4); |
| } |
| |
| /* |
| * ControlEscape :: |
| * one of fnrtv |
| */ |
| private boolean controlEscape() { |
| switch (ch0) { |
| case 'f': |
| case 'n': |
| case 'r': |
| case 't': |
| case 'v': |
| return commit(1); |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* |
| * ControlLetter :: |
| * one of abcdefghijklmnopqrstuvwxyz |
| * ABCDEFGHIJKLMNOPQRSTUVWXYZ |
| */ |
| private boolean controlLetter() { |
| // To match other engines we also accept '0'..'9' and '_' as control letters inside a character class. |
| if ((ch0 >= 'A' && ch0 <= 'Z') || (ch0 >= 'a' && ch0 <= 'z') |
| || (inCharClass && (isDecimalDigit(ch0) || ch0 == '_'))) { |
| // for some reason java regexps don't like control characters on the |
| // form "\\ca".match([string with ascii 1 at char0]). Translating |
| // them to unicode does it though. |
| sb.setLength(sb.length() - 1); |
| unicode(ch0 % 32, sb); |
| skip(1); |
| return true; |
| } |
| return false; |
| } |
| |
| /* |
| * IdentityEscape :: |
| * SourceCharacter but not IdentifierPart |
| * <ZWJ> (200c) |
| * <ZWNJ> (200d) |
| */ |
| private boolean identityEscape() { |
| if (atEOF()) { |
| throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException |
| } |
| // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here. |
| if (ch0 == 'c') { |
| sb.append('\\'); // Treat invalid \c control sequence as \\c |
| } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) { |
| sb.setLength(sb.length() - 1); |
| } |
| return commit(1); |
| } |
| |
| /* |
| * DecimalEscape :: |
| * DecimalIntegerLiteral [lookahead DecimalDigit] |
| */ |
| private boolean decimalEscape() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (ch0 == '0' && !isOctalDigit(ch1)) { |
| skip(1); |
| // DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000); |
| sb.append("\u0000"); |
| return true; |
| } |
| |
| if (isDecimalDigit(ch0)) { |
| |
| if (ch0 == '0') { |
| // We know this is an octal escape. |
| if (inCharClass) { |
| // Convert octal escape to unicode escape if inside character class. |
| int octalValue = 0; |
| while (isOctalDigit(ch0)) { |
| octalValue = octalValue * 8 + ch0 - '0'; |
| skip(1); |
| } |
| |
| unicode(octalValue, sb); |
| |
| } else { |
| // Copy decimal escape as-is |
| decimalDigits(); |
| } |
| } else { |
| // This should be a backreference, but could also be an octal escape or even a literal string. |
| int decimalValue = 0; |
| while (isDecimalDigit(ch0)) { |
| decimalValue = decimalValue * 10 + ch0 - '0'; |
| skip(1); |
| } |
| |
| if (inCharClass) { |
| // No backreferences in character classes. Encode as unicode escape or literal char sequence |
| sb.setLength(sb.length() - 1); |
| octalOrLiteral(Integer.toString(decimalValue), sb); |
| |
| } else if (decimalValue <= caps.size()) { |
| // Captures inside a negative lookahead are undefined when referenced from the outside. |
| final Capture capture = caps.get(decimalValue - 1); |
| if (!capture.canBeReferencedFrom(negLookaheadGroup, negLookaheadLevel)) { |
| // Outside reference to capture in negative lookahead, omit from output buffer. |
| sb.setLength(sb.length() - 1); |
| } else { |
| // Append backreference to output buffer. |
| sb.append(decimalValue); |
| } |
| } else { |
| // Forward references to a capture group are always undefined so we can omit it from the output buffer. |
| // However, if the target capture does not exist, we need to rewrite the reference as hex escape |
| // or literal string, so register the reference for later processing. |
| sb.setLength(sb.length() - 1); |
| forwardReferences.add(decimalValue); |
| forwardReferences.add(sb.length()); |
| } |
| |
| } |
| return true; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * CharacterClassEscape :: |
| * one of dDsSwW |
| */ |
| private boolean characterClassEscape() { |
| switch (ch0) { |
| // java.util.regex requires translation of \s and \S to explicit character list |
| case 's': |
| if (RegExpFactory.usesJavaUtilRegex()) { |
| sb.setLength(sb.length() - 1); |
| // No nested class required if we already are inside a character class |
| if (inCharClass) { |
| sb.append(Lexer.getWhitespaceRegExp()); |
| } else { |
| sb.append('[').append(Lexer.getWhitespaceRegExp()).append(']'); |
| } |
| skip(1); |
| return true; |
| } |
| return commit(1); |
| case 'S': |
| if (RegExpFactory.usesJavaUtilRegex()) { |
| sb.setLength(sb.length() - 1); |
| // In negative class we must use intersection to get double negation ("not anything else than space") |
| sb.append(inNegativeClass ? "&&[" : "[^").append(Lexer.getWhitespaceRegExp()).append(']'); |
| skip(1); |
| return true; |
| } |
| return commit(1); |
| case 'd': |
| case 'D': |
| case 'w': |
| case 'W': |
| return commit(1); |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* |
| * CharacterClass :: |
| * [ [lookahead {^}] ClassRanges ] |
| * [ ^ ClassRanges ] |
| */ |
| private boolean characterClass() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (ch0 == '[') { |
| try { |
| inCharClass = true; |
| push(']'); |
| commit(1); |
| |
| if (ch0 == '^') { |
| inNegativeClass = true; |
| commit(1); |
| } |
| |
| if (classRanges() && ch0 == ']') { |
| pop(']'); |
| commit(1); |
| |
| // Substitute empty character classes [] and [^] that never or always match |
| if (position == startIn + 2) { |
| sb.setLength(sb.length() - 1); |
| sb.append("^\\s\\S]"); |
| } else if (position == startIn + 3 && inNegativeClass) { |
| sb.setLength(sb.length() - 2); |
| sb.append("\\s\\S]"); |
| } |
| |
| return true; |
| } |
| } finally { |
| inCharClass = false; // no nested character classes in JavaScript |
| inNegativeClass = false; |
| } |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * ClassRanges :: |
| * [empty] |
| * NonemptyClassRanges |
| */ |
| private boolean classRanges() { |
| nonemptyClassRanges(); |
| return true; |
| } |
| |
| /* |
| * NonemptyClassRanges :: |
| * ClassAtom |
| * ClassAtom NonemptyClassRangesNoDash |
| * ClassAtom - ClassAtom ClassRanges |
| */ |
| private boolean nonemptyClassRanges() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (classAtom()) { |
| |
| if (ch0 == '-') { |
| commit(1); |
| |
| if (classAtom() && classRanges()) { |
| return true; |
| } |
| } |
| |
| nonemptyClassRangesNoDash(); |
| |
| return true; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * NonemptyClassRangesNoDash :: |
| * ClassAtom |
| * ClassAtomNoDash NonemptyClassRangesNoDash |
| * ClassAtomNoDash - ClassAtom ClassRanges |
| */ |
| private boolean nonemptyClassRangesNoDash() { |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| if (classAtomNoDash()) { |
| |
| // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom |
| if (ch0 == '-') { |
| commit(1); |
| |
| if (classAtom() && classRanges()) { |
| return true; |
| } |
| //fallthru |
| } |
| |
| nonemptyClassRangesNoDash(); |
| return true; // still a class atom |
| } |
| |
| if (classAtom()) { |
| return true; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| } |
| |
| /* |
| * ClassAtom : - ClassAtomNoDash |
| */ |
| private boolean classAtom() { |
| |
| if (ch0 == '-') { |
| return commit(1); |
| } |
| |
| return classAtomNoDash(); |
| } |
| |
| /* |
| * ClassAtomNoDash :: |
| * SourceCharacter but not one of \ or ] or - |
| * \ ClassEscape |
| */ |
| private boolean classAtomNoDash() { |
| if (atEOF()) { |
| return false; |
| } |
| final int startIn = position; |
| final int startOut = sb.length(); |
| |
| switch (ch0) { |
| case ']': |
| case '-': |
| return false; |
| |
| case '[': |
| // unescaped left square bracket - add escape |
| sb.append('\\'); |
| return commit(1); |
| |
| case '\\': |
| commit(1); |
| if (classEscape()) { |
| return true; |
| } |
| |
| restart(startIn, startOut); |
| return false; |
| |
| default: |
| return commit(1); |
| } |
| } |
| |
| /* |
| * ClassEscape :: |
| * DecimalEscape |
| * b |
| * CharacterEscape |
| * CharacterClassEscape |
| */ |
| private boolean classEscape() { |
| |
| if (decimalEscape()) { |
| return true; |
| } |
| |
| if (ch0 == 'b') { |
| sb.setLength(sb.length() - 1); |
| sb.append('\b'); |
| skip(1); |
| return true; |
| } |
| |
| // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all |
| return characterEscape() || characterClassEscape() || identityEscape(); |
| } |
| |
| /* |
| * DecimalDigits |
| */ |
| private boolean decimalDigits() { |
| if (!isDecimalDigit(ch0)) { |
| return false; |
| } |
| |
| while (isDecimalDigit(ch0)) { |
| commit(1); |
| } |
| |
| return true; |
| } |
| |
| private static void unicode(final int value, final StringBuilder buffer) { |
| final String hex = Integer.toHexString(value); |
| buffer.append('u'); |
| for (int i = 0; i < 4 - hex.length(); i++) { |
| buffer.append('0'); |
| } |
| buffer.append(hex); |
| } |
| |
| // Convert what would have been a backreference into a unicode escape, or a number literal, or both. |
| private static void octalOrLiteral(final String numberLiteral, final StringBuilder buffer) { |
| final int length = numberLiteral.length(); |
| int octalValue = 0; |
| int pos = 0; |
| // Maximum value for octal escape is 0377 (255) so we stop the loop at 32 |
| while (pos < length && octalValue < 0x20) { |
| final char ch = numberLiteral.charAt(pos); |
| if (isOctalDigit(ch)) { |
| octalValue = octalValue * 8 + ch - '0'; |
| } else { |
| break; |
| } |
| pos++; |
| } |
| if (octalValue > 0) { |
| buffer.append('\\'); |
| unicode(octalValue, buffer); |
| buffer.append(numberLiteral.substring(pos)); |
| } else { |
| buffer.append(numberLiteral); |
| } |
| } |
| |
| private static boolean isOctalDigit(final char ch) { |
| return ch >= '0' && ch <= '7'; |
| } |
| |
| private static boolean isDecimalDigit(final char ch) { |
| return ch >= '0' && ch <= '9'; |
| } |
| } |