| /* |
| * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| * this software and associated documentation files (the "Software"), to deal in |
| * the Software without restriction, including without limitation the rights to |
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is furnished to do |
| * so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| package jdk.nashorn.internal.runtime.regexp.joni; |
| |
| import static jdk.nashorn.internal.runtime.regexp.joni.Option.isSingleline; |
| import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; |
| import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; |
| |
| class Lexer extends ScannerSupport { |
| protected final ScanEnvironment env; |
| protected final Syntax syntax; // fast access to syntax |
| protected final Token token = new Token(); // current token |
| |
| protected Lexer(final ScanEnvironment env, final char[] chars, final int p, final int end) { |
| super(chars, p, end); |
| this.env = env; |
| this.syntax = env.syntax; |
| } |
| |
| /** |
| * @return 0: normal {n,m}, 2: fixed {n} |
| * !introduce returnCode here |
| */ |
| private int fetchRangeQuantifier() { |
| mark(); |
| final boolean synAllow = syntax.allowInvalidInterval(); |
| |
| if (!left()) { |
| if (synAllow) { |
| return 1; /* "....{" : OK! */ |
| } |
| throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); |
| } |
| |
| if (!synAllow) { |
| c = peek(); |
| if (c == ')' || c == '(' || c == '|') { |
| throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE); |
| } |
| } |
| |
| int low = scanUnsignedNumber(); |
| if (low < 0) { |
| throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); |
| } |
| if (low > Config.MAX_REPEAT_NUM) { |
| throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); |
| } |
| |
| boolean nonLow = false; |
| if (p == _p) { /* can't read low */ |
| if (syntax.allowIntervalLowAbbrev()) { |
| low = 0; |
| nonLow = true; |
| } else { |
| return invalidRangeQuantifier(synAllow); |
| } |
| } |
| |
| if (!left()) { |
| return invalidRangeQuantifier(synAllow); |
| } |
| |
| fetch(); |
| int up; |
| int ret = 0; |
| if (c == ',') { |
| final int prev = p; // ??? last |
| up = scanUnsignedNumber(); |
| if (up < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); |
| } |
| if (up > Config.MAX_REPEAT_NUM) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE); |
| } |
| |
| if (p == prev) { |
| if (nonLow) { |
| return invalidRangeQuantifier(synAllow); |
| } |
| up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */ |
| } |
| } else { |
| if (nonLow) { |
| return invalidRangeQuantifier(synAllow); |
| } |
| unfetch(); |
| up = low; /* {n} : exact n times */ |
| ret = 2; /* fixed */ |
| } |
| |
| if (!left()) { |
| return invalidRangeQuantifier(synAllow); |
| } |
| fetch(); |
| |
| if (syntax.opEscBraceInterval()) { |
| if (c != syntax.metaCharTable.esc) { |
| return invalidRangeQuantifier(synAllow); |
| } |
| fetch(); |
| } |
| |
| if (c != '}') { |
| return invalidRangeQuantifier(synAllow); |
| } |
| |
| if (!isRepeatInfinite(up) && low > up) { |
| throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); |
| } |
| |
| token.type = TokenType.INTERVAL; |
| token.setRepeatLower(low); |
| token.setRepeatUpper(up); |
| |
| return ret; /* 0: normal {n,m}, 2: fixed {n} */ |
| } |
| |
| private int invalidRangeQuantifier(final boolean synAllow) { |
| if (synAllow) { |
| restore(); |
| return 1; |
| } |
| throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN); |
| } |
| |
| @SuppressWarnings("fallthrough") |
| /* \M-, \C-, \c, or \... */ |
| private int fetchEscapedValue() { |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); |
| } |
| fetch(); |
| |
| switch(c) { |
| |
| case 'M': |
| if (syntax.op2EscCapitalMBarMeta()) { |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_META); |
| } |
| fetch(); |
| if (c != '-') { |
| throw new SyntaxException(ERR_META_CODE_SYNTAX); |
| } |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_META); |
| } |
| fetch(); |
| if (c == syntax.metaCharTable.esc) { |
| c = fetchEscapedValue(); |
| } |
| c = ((c & 0xff) | 0x80); |
| } else { |
| fetchEscapedValueBackSlash(); |
| } |
| break; |
| |
| case 'C': |
| if (syntax.op2EscCapitalCBarControl()) { |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL); |
| } |
| fetch(); |
| if (c != '-') { |
| throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX); |
| } |
| fetchEscapedValueControl(); |
| } else { |
| fetchEscapedValueBackSlash(); |
| } |
| break; |
| |
| case 'c': |
| if (syntax.opEscCControl()) { |
| fetchEscapedValueControl(); |
| } |
| /* fall through */ |
| |
| default: |
| fetchEscapedValueBackSlash(); |
| } // switch |
| |
| return c; // ??? |
| } |
| |
| private void fetchEscapedValueBackSlash() { |
| c = env.convertBackslashValue(c); |
| } |
| |
| private void fetchEscapedValueControl() { |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL); |
| } |
| fetch(); |
| if (c == '?') { |
| c = 0177; |
| } else { |
| if (c == syntax.metaCharTable.esc) { |
| c = fetchEscapedValue(); |
| } |
| c &= 0x9f; |
| } |
| } |
| |
| private void fetchTokenInCCFor_charType(final boolean flag, final int type) { |
| token.type = TokenType.CHAR_TYPE; |
| token.setPropCType(type); |
| token.setPropNot(flag); |
| } |
| |
| private void fetchTokenInCCFor_x() { |
| if (!left()) { |
| return; |
| } |
| final int last = p; |
| |
| if (peekIs('{') && syntax.opEscXBraceHex8()) { |
| inc(); |
| final int num = scanUnsignedHexadecimalNumber(8); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); |
| } |
| if (left()) { |
| final int c2 = peek(); |
| if (EncodingHelper.isXDigit(c2)) { |
| throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); |
| } |
| } |
| |
| if (p > last + 1 && left() && peekIs('}')) { |
| inc(); |
| token.type = TokenType.CODE_POINT; |
| token.setCode(num); |
| } else { |
| /* can't read nothing or invalid format */ |
| p = last; |
| } |
| } else if (syntax.opEscXHex2()) { |
| int num = scanUnsignedHexadecimalNumber(2); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER); |
| } |
| if (p == last) { /* can't read nothing. */ |
| num = 0; /* but, it's not error */ |
| } |
| token.type = TokenType.RAW_BYTE; |
| token.setC(num); |
| } |
| } |
| |
| private void fetchTokenInCCFor_u() { |
| if (!left()) { |
| return; |
| } |
| final int last = p; |
| |
| if (syntax.op2EscUHex4()) { |
| int num = scanUnsignedHexadecimalNumber(4); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER); |
| } |
| if (p == last) { /* can't read nothing. */ |
| num = 0; /* but, it's not error */ |
| } |
| token.type = TokenType.CODE_POINT; |
| token.setCode(num); |
| } |
| } |
| |
| private void fetchTokenInCCFor_digit() { |
| if (syntax.opEscOctal3()) { |
| unfetch(); |
| final int last = p; |
| int num = scanUnsignedOctalNumber(3); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER); |
| } |
| if (p == last) { /* can't read nothing. */ |
| num = 0; /* but, it's not error */ |
| } |
| token.type = TokenType.RAW_BYTE; |
| token.setC(num); |
| } |
| } |
| |
| private void fetchTokenInCCFor_and() { |
| if (syntax.op2CClassSetOp() && left() && peekIs('&')) { |
| inc(); |
| token.type = TokenType.CC_AND; |
| } |
| } |
| |
| protected final TokenType fetchTokenInCC() { |
| if (!left()) { |
| token.type = TokenType.EOT; |
| return token.type; |
| } |
| |
| fetch(); |
| token.type = TokenType.CHAR; |
| token.setC(c); |
| token.escaped = false; |
| |
| if (c == ']') { |
| token.type = TokenType.CC_CLOSE; |
| } else if (c == '-') { |
| token.type = TokenType.CC_RANGE; |
| } else if (c == syntax.metaCharTable.esc) { |
| if (!syntax.backSlashEscapeInCC()) { |
| return token.type; |
| } |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); |
| } |
| fetch(); |
| token.escaped = true; |
| token.setC(c); |
| |
| switch (c) { |
| case 'w': |
| fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); |
| break; |
| case 'W': |
| fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); |
| break; |
| case 'd': |
| fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); |
| break; |
| case 'D': |
| fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); |
| break; |
| case 's': |
| fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); |
| break; |
| case 'S': |
| fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); |
| break; |
| case 'h': |
| if (syntax.op2EscHXDigit()) { |
| fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); |
| } |
| break; |
| case 'H': |
| if (syntax.op2EscHXDigit()) { |
| fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); |
| } |
| break; |
| case 'x': |
| fetchTokenInCCFor_x(); |
| break; |
| case 'u': |
| fetchTokenInCCFor_u(); |
| break; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| fetchTokenInCCFor_digit(); |
| break; |
| |
| default: |
| unfetch(); |
| final int num = fetchEscapedValue(); |
| if (token.getC() != num) { |
| token.setCode(num); |
| token.type = TokenType.CODE_POINT; |
| } |
| break; |
| } // switch |
| |
| } else if (c == '&') { |
| fetchTokenInCCFor_and(); |
| } |
| return token.type; |
| } |
| |
| private void fetchTokenFor_repeat(final int lower, final int upper) { |
| token.type = TokenType.OP_REPEAT; |
| token.setRepeatLower(lower); |
| token.setRepeatUpper(upper); |
| greedyCheck(); |
| } |
| |
| private void fetchTokenFor_openBrace() { |
| switch (fetchRangeQuantifier()) { |
| case 0: |
| greedyCheck(); |
| break; |
| case 2: |
| if (syntax.fixedIntervalIsGreedyOnly()) { |
| possessiveCheck(); |
| } else { |
| greedyCheck(); |
| } |
| break; |
| default: /* 1 : normal char */ |
| } // inner switch |
| } |
| |
| private void fetchTokenFor_anchor(final int subType) { |
| token.type = TokenType.ANCHOR; |
| token.setAnchor(subType); |
| } |
| |
| private void fetchTokenFor_xBrace() { |
| if (!left()) { |
| return; |
| } |
| |
| final int last = p; |
| if (peekIs('{') && syntax.opEscXBraceHex8()) { |
| inc(); |
| final int num = scanUnsignedHexadecimalNumber(8); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); |
| } |
| if (left()) { |
| if (EncodingHelper.isXDigit(peek())) { |
| throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); |
| } |
| } |
| |
| if (p > last + 1 && left() && peekIs('}')) { |
| inc(); |
| token.type = TokenType.CODE_POINT; |
| token.setCode(num); |
| } else { |
| /* can't read nothing or invalid format */ |
| p = last; |
| } |
| } else if (syntax.opEscXHex2()) { |
| int num = scanUnsignedHexadecimalNumber(2); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER); |
| } |
| if (p == last) { /* can't read nothing. */ |
| num = 0; /* but, it's not error */ |
| } |
| token.type = TokenType.RAW_BYTE; |
| token.setC(num); |
| } |
| } |
| |
| private void fetchTokenFor_uHex() { |
| if (!left()) { |
| return; |
| } |
| final int last = p; |
| |
| if (syntax.op2EscUHex4()) { |
| int num = scanUnsignedHexadecimalNumber(4); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER); |
| } |
| if (p == last) { /* can't read nothing. */ |
| num = 0; /* but, it's not error */ |
| } |
| token.type = TokenType.CODE_POINT; |
| token.setCode(num); |
| } |
| } |
| |
| private void fetchTokenFor_digit() { |
| unfetch(); |
| final int last = p; |
| final int num = scanUnsignedNumber(); |
| if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref |
| } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */ |
| if (syntax.strictCheckBackref()) { |
| if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) { |
| throw new ValueException(ERR_INVALID_BACKREF); |
| } |
| } |
| token.type = TokenType.BACKREF; |
| token.setBackrefRef(num); |
| return; |
| } |
| |
| if (c == '8' || c == '9') { /* normal char */ // skip_backref: |
| p = last; |
| inc(); |
| return; |
| } |
| p = last; |
| |
| fetchTokenFor_zero(); /* fall through */ |
| } |
| |
| private void fetchTokenFor_zero() { |
| if (syntax.opEscOctal3()) { |
| final int last = p; |
| int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); |
| if (num < 0) { |
| throw new ValueException(ERR_TOO_BIG_NUMBER); |
| } |
| if (p == last) { /* can't read nothing. */ |
| num = 0; /* but, it's not error */ |
| } |
| token.type = TokenType.RAW_BYTE; |
| token.setC(num); |
| } else if (c != '0') { |
| inc(); |
| } |
| } |
| |
| private void fetchTokenFor_metaChars() { |
| if (c == syntax.metaCharTable.anyChar) { |
| token.type = TokenType.ANYCHAR; |
| } else if (c == syntax.metaCharTable.anyTime) { |
| fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); |
| } else if (c == syntax.metaCharTable.zeroOrOneTime) { |
| fetchTokenFor_repeat(0, 1); |
| } else if (c == syntax.metaCharTable.oneOrMoreTime) { |
| fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); |
| } else if (c == syntax.metaCharTable.anyCharAnyTime) { |
| token.type = TokenType.ANYCHAR_ANYTIME; |
| // goto out |
| } |
| } |
| |
| protected final TokenType fetchToken() { |
| // mark(); // out |
| start: |
| while(true) { |
| if (!left()) { |
| token.type = TokenType.EOT; |
| return token.type; |
| } |
| |
| token.type = TokenType.STRING; |
| token.backP = p; |
| |
| fetch(); |
| |
| if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE); |
| } |
| |
| token.backP = p; |
| fetch(); |
| |
| token.setC(c); |
| token.escaped = true; |
| switch(c) { |
| |
| case '*': |
| if (syntax.opEscAsteriskZeroInf()) { |
| fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); |
| } |
| break; |
| case '+': |
| if (syntax.opEscPlusOneInf()) { |
| fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); |
| } |
| break; |
| case '?': |
| if (syntax.opEscQMarkZeroOne()) { |
| fetchTokenFor_repeat(0, 1); |
| } |
| break; |
| case '{': |
| if (syntax.opEscBraceInterval()) { |
| fetchTokenFor_openBrace(); |
| } |
| break; |
| case '|': |
| if (syntax.opEscVBarAlt()) { |
| token.type = TokenType.ALT; |
| } |
| break; |
| case '(': |
| if (syntax.opEscLParenSubexp()) { |
| token.type = TokenType.SUBEXP_OPEN; |
| } |
| break; |
| case ')': |
| if (syntax.opEscLParenSubexp()) { |
| token.type = TokenType.SUBEXP_CLOSE; |
| } |
| break; |
| case 'w': |
| if (syntax.opEscWWord()) { |
| fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); |
| } |
| break; |
| case 'W': |
| if (syntax.opEscWWord()) { |
| fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); |
| } |
| break; |
| case 'b': |
| if (syntax.opEscBWordBound()) { |
| fetchTokenFor_anchor(AnchorType.WORD_BOUND); |
| } |
| break; |
| case 'B': |
| if (syntax.opEscBWordBound()) { |
| fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); |
| } |
| break; |
| case '<': |
| if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { |
| fetchTokenFor_anchor(AnchorType.WORD_BEGIN); |
| } |
| break; |
| case '>': |
| if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { |
| fetchTokenFor_anchor(AnchorType.WORD_END); |
| } |
| break; |
| case 's': |
| if (syntax.opEscSWhiteSpace()) { |
| fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); |
| } |
| break; |
| case 'S': |
| if (syntax.opEscSWhiteSpace()) { |
| fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); |
| } |
| break; |
| case 'd': |
| if (syntax.opEscDDigit()) { |
| fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); |
| } |
| break; |
| case 'D': |
| if (syntax.opEscDDigit()) { |
| fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT); |
| } |
| break; |
| case 'h': |
| if (syntax.op2EscHXDigit()) { |
| fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); |
| } |
| break; |
| case 'H': |
| if (syntax.op2EscHXDigit()) { |
| fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); |
| } |
| break; |
| case 'A': |
| if (syntax.opEscAZBufAnchor()) { |
| fetchTokenFor_anchor(AnchorType.BEGIN_BUF); |
| } |
| break; |
| case 'Z': |
| if (syntax.opEscAZBufAnchor()) { |
| fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); |
| } |
| break; |
| case 'z': |
| if (syntax.opEscAZBufAnchor()) { |
| fetchTokenFor_anchor(AnchorType.END_BUF); |
| } |
| break; |
| case 'G': |
| if (syntax.opEscCapitalGBeginAnchor()) { |
| fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); |
| } |
| break; |
| case '`': |
| if (syntax.op2EscGnuBufAnchor()) { |
| fetchTokenFor_anchor(AnchorType.BEGIN_BUF); |
| } |
| break; |
| case '\'': |
| if (syntax.op2EscGnuBufAnchor()) { |
| fetchTokenFor_anchor(AnchorType.END_BUF); |
| } |
| break; |
| case 'x': |
| fetchTokenFor_xBrace(); |
| break; |
| case 'u': |
| fetchTokenFor_uHex(); |
| break; |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| fetchTokenFor_digit(); |
| break; |
| case '0': |
| fetchTokenFor_zero(); |
| break; |
| |
| default: |
| unfetch(); |
| final int num = fetchEscapedValue(); |
| |
| /* set_raw: */ |
| if (token.getC() != num) { |
| token.type = TokenType.CODE_POINT; |
| token.setCode(num); |
| } else { /* string */ |
| p = token.backP + 1; |
| } |
| break; |
| |
| } // switch (c) |
| |
| } else { |
| token.setC(c); |
| token.escaped = false; |
| |
| if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { |
| fetchTokenFor_metaChars(); |
| break; |
| } |
| |
| { |
| switch(c) { |
| case '.': |
| if (syntax.opDotAnyChar()) { |
| token.type = TokenType.ANYCHAR; |
| } |
| break; |
| case '*': |
| if (syntax.opAsteriskZeroInf()) { |
| fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); |
| } |
| break; |
| case '+': |
| if (syntax.opPlusOneInf()) { |
| fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); |
| } |
| break; |
| case '?': |
| if (syntax.opQMarkZeroOne()) { |
| fetchTokenFor_repeat(0, 1); |
| } |
| break; |
| case '{': |
| if (syntax.opBraceInterval()) { |
| fetchTokenFor_openBrace(); |
| } |
| break; |
| case '|': |
| if (syntax.opVBarAlt()) { |
| token.type = TokenType.ALT; |
| } |
| break; |
| |
| case '(': |
| if (peekIs('?') && syntax.op2QMarkGroupEffect()) { |
| inc(); |
| if (peekIs('#')) { |
| fetch(); |
| while (true) { |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); |
| } |
| fetch(); |
| if (c == syntax.metaCharTable.esc) { |
| if (left()) { |
| fetch(); |
| } |
| } else { |
| if (c == ')') { |
| break; |
| } |
| } |
| } |
| continue start; // goto start |
| } |
| unfetch(); |
| } |
| |
| if (syntax.opLParenSubexp()) { |
| token.type = TokenType.SUBEXP_OPEN; |
| } |
| break; |
| case ')': |
| if (syntax.opLParenSubexp()) { |
| token.type = TokenType.SUBEXP_CLOSE; |
| } |
| break; |
| case '^': |
| if (syntax.opLineAnchor()) { |
| fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); |
| } |
| break; |
| case '$': |
| if (syntax.opLineAnchor()) { |
| fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE); |
| } |
| break; |
| case '[': |
| if (syntax.opBracketCC()) { |
| token.type = TokenType.CC_CC_OPEN; |
| } |
| break; |
| case ']': |
| //if (*src > env->pattern) /* /].../ is allowed. */ |
| //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); |
| break; |
| case '#': |
| if (Option.isExtend(env.option)) { |
| while (left()) { |
| fetch(); |
| if (EncodingHelper.isNewLine(c)) { |
| break; |
| } |
| } |
| continue start; // goto start |
| } |
| break; |
| |
| case ' ': |
| case '\t': |
| case '\n': |
| case '\r': |
| case '\f': |
| if (Option.isExtend(env.option)) |
| { |
| continue start; // goto start |
| } |
| break; |
| |
| default: // string |
| break; |
| |
| } // switch |
| } |
| } |
| |
| break; |
| } // while |
| return token.type; |
| } |
| |
| private void greedyCheck() { |
| if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) { |
| |
| fetch(); |
| |
| token.setRepeatGreedy(false); |
| token.setRepeatPossessive(false); |
| } else { |
| possessiveCheck(); |
| } |
| } |
| |
| private void possessiveCheck() { |
| if (left() && peekIs('+') && |
| (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL || |
| syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) { |
| |
| fetch(); |
| |
| token.setRepeatGreedy(true); |
| token.setRepeatPossessive(true); |
| } else { |
| token.setRepeatGreedy(true); |
| token.setRepeatPossessive(false); |
| } |
| } |
| |
| protected final void syntaxWarn(final String message, final char ch) { |
| syntaxWarn(message.replace("<%n>", Character.toString(ch))); |
| } |
| |
| protected final void syntaxWarn(final String message) { |
| if (Config.USE_WARN) { |
| env.reg.warnings.warn(message + ": /" + new String(chars, getBegin(), getEnd()) + "/"); |
| } |
| } |
| } |