| /* |
| * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| * this software and associated documentation files (the "Software"), to deal in |
| * the Software without restriction, including without limitation the rights to |
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is furnished to do |
| * so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| package jdk.nashorn.internal.runtime.regexp.joni; |
| |
| import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff; |
| import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup; |
| import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; |
| |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType; |
| import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; |
| |
| class Parser extends Lexer { |
| |
| protected final Regex regex; |
| protected Node root; |
| |
| protected int returnCode; // return code used by parser methods (they itself return parsed nodes) |
| // this approach will not affect recursive calls |
| |
| protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) { |
| super(env, chars, p, end); |
| regex = env.reg; |
| } |
| |
| // onig_parse_make_tree |
| protected final Node parse() { |
| root = parseRegexp(); |
| regex.numMem = env.numMem; |
| return root; |
| } |
| |
| private boolean codeExistCheck(final int code, final boolean ignoreEscaped) { |
| mark(); |
| |
| boolean inEsc = false; |
| while (left()) { |
| if (ignoreEscaped && inEsc) { |
| inEsc = false; |
| } else { |
| fetch(); |
| if (c == code) { |
| restore(); |
| return true; |
| } |
| if (c == syntax.metaCharTable.esc) inEsc = true; |
| } |
| } |
| |
| restore(); |
| return false; |
| } |
| |
| private CClassNode parseCharClass() { |
| fetchTokenInCC(); |
| |
| final boolean neg; |
| if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { |
| neg = true; |
| fetchTokenInCC(); |
| } else { |
| neg = false; |
| } |
| |
| if (token.type == TokenType.CC_CLOSE) { |
| if (!codeExistCheck(']', true)) { |
| throw new SyntaxException(ERR_EMPTY_CHAR_CLASS); |
| } |
| env.ccEscWarn("]"); |
| token.type = TokenType.CHAR; /* allow []...] */ |
| } |
| |
| CClassNode cc = new CClassNode(); |
| CClassNode prevCC = null; |
| CClassNode workCC = null; |
| |
| final CCStateArg arg = new CCStateArg(); |
| |
| boolean andStart = false; |
| arg.state = CCSTATE.START; |
| |
| while (token.type != TokenType.CC_CLOSE) { |
| boolean fetched = false; |
| |
| switch (token.type) { |
| |
| case CHAR: |
| if (token.getC() > 0xff) { |
| arg.inType = CCVALTYPE.CODE_POINT; |
| } else { |
| arg.inType = CCVALTYPE.SB; // sb_char: |
| } |
| arg.v = token.getC(); |
| arg.vIsRaw = false; |
| parseCharClassValEntry2(cc, arg); // goto val_entry2 |
| break; |
| |
| case RAW_BYTE: |
| arg.v = token.getC(); |
| arg.inType = CCVALTYPE.SB; // raw_single: |
| arg.vIsRaw = true; |
| parseCharClassValEntry2(cc, arg); // goto val_entry2 |
| break; |
| |
| case CODE_POINT: |
| arg.v = token.getCode(); |
| arg.vIsRaw = true; |
| parseCharClassValEntry(cc, arg); // val_entry:, val_entry2 |
| break; |
| |
| case CHAR_TYPE: |
| cc.addCType(token.getPropCType(), token.getPropNot(), env, this); |
| cc.nextStateClass(arg, env); // next_class: |
| break; |
| |
| case CC_RANGE: |
| if (arg.state == CCSTATE.VALUE) { |
| fetchTokenInCC(); |
| fetched = true; |
| if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */ |
| parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry; |
| break; |
| } else if (token.type == TokenType.CC_AND) { |
| env.ccEscWarn("-"); |
| parseCharClassRangeEndVal(cc, arg); // goto range_end_val |
| break; |
| } |
| arg.state = CCSTATE.RANGE; |
| } else if (arg.state == CCSTATE.START) { |
| arg.v = token.getC(); /* [-xa] is allowed */ |
| arg.vIsRaw = false; |
| fetchTokenInCC(); |
| fetched = true; |
| if (token.type == TokenType.CC_RANGE || andStart) env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */ |
| parseCharClassValEntry(cc, arg); // goto val_entry |
| break; |
| } else if (arg.state == CCSTATE.RANGE) { |
| env.ccEscWarn("-"); |
| parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */ |
| break; |
| } else { /* CCS_COMPLETE */ |
| fetchTokenInCC(); |
| fetched = true; |
| if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */ |
| parseCharClassRangeEndVal(cc, arg); // goto range_end_val |
| break; |
| } else if (token.type == TokenType.CC_AND) { |
| env.ccEscWarn("-"); |
| parseCharClassRangeEndVal(cc, arg); // goto range_end_val |
| break; |
| } |
| |
| if (syntax.allowDoubleRangeOpInCC()) { |
| env.ccEscWarn("-"); |
| arg.inType = CCVALTYPE.SB; |
| arg.v = '-'; |
| arg.vIsRaw = false; |
| parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */ |
| break; |
| } |
| throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); |
| } |
| break; |
| |
| case CC_CC_OPEN: /* [ */ |
| final CClassNode acc = parseCharClass(); |
| cc.or(acc); |
| break; |
| |
| case CC_AND: /* && */ |
| if (arg.state == CCSTATE.VALUE) { |
| arg.v = 0; // ??? safe v ? |
| arg.vIsRaw = false; |
| cc.nextStateValue(arg, env); |
| } |
| /* initialize local variables */ |
| andStart = true; |
| arg.state = CCSTATE.START; |
| if (prevCC != null) { |
| prevCC.and(cc); |
| } else { |
| prevCC = cc; |
| if (workCC == null) workCC = new CClassNode(); |
| cc = workCC; |
| } |
| cc.clear(); |
| break; |
| |
| case EOT: |
| throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS); |
| |
| default: |
| throw new InternalException(ERR_PARSER_BUG); |
| } // switch |
| |
| if (!fetched) fetchTokenInCC(); |
| |
| } // while |
| |
| if (arg.state == CCSTATE.VALUE) { |
| arg.v = 0; // ??? safe v ? |
| arg.vIsRaw = false; |
| cc.nextStateValue(arg, env); |
| } |
| |
| if (prevCC != null) { |
| prevCC.and(cc); |
| cc = prevCC; |
| } |
| |
| if (neg) { |
| cc.setNot(); |
| } else { |
| cc.clearNot(); |
| } |
| |
| if (cc.isNot() && syntax.notNewlineInNegativeCC()) { |
| if (!cc.isEmpty()) { |
| final int NEW_LINE = 0x0a; |
| if (EncodingHelper.isNewLine(NEW_LINE)) { |
| cc.bs.set(NEW_LINE); |
| } |
| } |
| } |
| |
| return cc; |
| } |
| |
| private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) { |
| arg.inType = CCVALTYPE.SB; |
| arg.v = token.getC(); |
| arg.vIsRaw = false; |
| parseCharClassValEntry2(cc, arg); // goto val_entry2 |
| } |
| |
| private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) { |
| arg.v = '-'; |
| arg.vIsRaw = false; |
| parseCharClassValEntry(cc, arg); // goto val_entry |
| } |
| |
| private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) { |
| arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT; |
| parseCharClassValEntry2(cc, arg); // val_entry2: |
| } |
| |
| private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) { |
| cc.nextStateValue(arg, env); |
| } |
| |
| private Node parseEnclose(final TokenType term) { |
| Node node = null; |
| |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); |
| } |
| |
| int option = env.option; |
| |
| if (peekIs('?') && syntax.op2QMarkGroupEffect()) { |
| inc(); |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); |
| } |
| |
| fetch(); |
| switch(c) { |
| case ':': /* (?:...) grouping only */ |
| fetchToken(); // group: |
| node = parseSubExp(term); |
| returnCode = 1; /* group */ |
| return node; |
| case '=': |
| node = new AnchorNode(AnchorType.PREC_READ); |
| break; |
| case '!': /* preceding read */ |
| node = new AnchorNode(AnchorType.PREC_READ_NOT); |
| break; |
| case '>': /* (?>...) stop backtrack */ |
| node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose |
| break; |
| case '\'': |
| break; |
| case '<': /* look behind (?<=...), (?<!...) */ |
| fetch(); |
| if (c == '=') { |
| node = new AnchorNode(AnchorType.LOOK_BEHIND); |
| } else if (c == '!') { |
| node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT); |
| } else { |
| throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); |
| } |
| break; |
| case '@': |
| if (syntax.op2AtMarkCaptureHistory()) { |
| final EncloseNode en = new EncloseNode(); // node_new_enclose_memory |
| final int num = env.addMemEntry(); |
| if (num >= BitStatus.BIT_STATUS_BITS_NUM) { |
| throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); |
| } |
| en.regNum = num; |
| node = en; |
| } else { |
| throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); |
| } |
| break; |
| |
| // case 'p': #ifdef USE_POSIXLINE_OPTION |
| case '-': |
| case 'i': |
| case 'm': |
| case 's': |
| case 'x': |
| boolean neg = false; |
| while (true) { |
| switch(c) { |
| case ':': |
| case ')': |
| break; |
| case '-': |
| neg = true; |
| break; |
| case 'x': |
| option = bsOnOff(option, Option.EXTEND, neg); |
| break; |
| case 'i': |
| option = bsOnOff(option, Option.IGNORECASE, neg); |
| break; |
| case 's': |
| if (syntax.op2OptionPerl()) { |
| option = bsOnOff(option, Option.MULTILINE, neg); |
| } else { |
| throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); |
| } |
| break; |
| case 'm': |
| if (syntax.op2OptionPerl()) { |
| option = bsOnOff(option, Option.SINGLELINE, !neg); |
| } else if (syntax.op2OptionRuby()) { |
| option = bsOnOff(option, Option.MULTILINE, neg); |
| } else { |
| throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); |
| } |
| break; |
| // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined |
| // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); |
| // break; |
| |
| default: |
| throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); |
| } // switch |
| |
| if (c == ')') { |
| final EncloseNode en = new EncloseNode(option, 0); // node_new_option |
| node = en; |
| returnCode = 2; /* option only */ |
| return node; |
| } else if (c == ':') { |
| final int prev = env.option; |
| env.option = option; |
| fetchToken(); |
| final Node target = parseSubExp(term); |
| env.option = prev; |
| final EncloseNode en = new EncloseNode(option, 0); // node_new_option |
| en.setTarget(target); |
| node = en; |
| returnCode = 0; |
| return node; |
| } |
| if (!left()) { |
| throw new SyntaxException(ERR_END_PATTERN_IN_GROUP); |
| } |
| fetch(); |
| } // while |
| |
| default: |
| throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION); |
| } // switch |
| |
| } else { |
| if (isDontCaptureGroup(env.option)) { |
| fetchToken(); // goto group |
| node = parseSubExp(term); |
| returnCode = 1; /* group */ |
| return node; |
| } |
| final EncloseNode en = new EncloseNode(); // node_new_enclose_memory |
| final int num = env.addMemEntry(); |
| en.regNum = num; |
| node = en; |
| } |
| |
| fetchToken(); |
| final Node target = parseSubExp(term); |
| |
| if (node.getType() == NodeType.ANCHOR) { |
| final AnchorNode an = (AnchorNode) node; |
| an.setTarget(target); |
| } else { |
| final EncloseNode en = (EncloseNode)node; |
| en.setTarget(target); |
| if (en.type == EncloseType.MEMORY) { |
| /* Don't move this to previous of parse_subexp() */ |
| env.setMemNode(en.regNum, node); |
| } |
| } |
| returnCode = 0; |
| return node; // ?? |
| } |
| |
| private Node parseExp(final TokenType term) { |
| if (token.type == term) return StringNode.EMPTY; // goto end_of_token |
| |
| Node node = null; |
| boolean group = false; |
| |
| switch(token.type) { |
| case ALT: |
| case EOT: |
| return StringNode.EMPTY; // end_of_token:, node_new_empty |
| |
| case SUBEXP_OPEN: |
| node = parseEnclose(TokenType.SUBEXP_CLOSE); |
| if (returnCode == 1) { |
| group = true; |
| } else if (returnCode == 2) { /* option only */ |
| final int prev = env.option; |
| final EncloseNode en = (EncloseNode)node; |
| env.option = en.option; |
| fetchToken(); |
| final Node target = parseSubExp(term); |
| env.option = prev; |
| en.setTarget(target); |
| return node; |
| } |
| break; |
| case SUBEXP_CLOSE: |
| if (!syntax.allowUnmatchedCloseSubexp()) { |
| throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS); |
| } |
| if (token.escaped) { |
| return parseExpTkRawByte(group); // goto tk_raw_byte |
| } else { |
| return parseExpTkByte(group); // goto tk_byte |
| } |
| case STRING: |
| return parseExpTkByte(group); // tk_byte: |
| |
| case RAW_BYTE: |
| return parseExpTkRawByte(group); // tk_raw_byte: |
| case CODE_POINT: |
| final char[] buf = new char[] {(char)token.getCode()}; |
| // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else |
| node = new StringNode(buf, 0, 1); |
| break; |
| |
| case CHAR_TYPE: |
| switch(token.getPropCType()) { |
| case CharacterType.D: |
| case CharacterType.S: |
| case CharacterType.W: |
| if (Config.NON_UNICODE_SDW) { |
| final CClassNode cc = new CClassNode(); |
| cc.addCType(token.getPropCType(), false, env, this); |
| if (token.getPropNot()) cc.setNot(); |
| node = cc; |
| } |
| break; |
| |
| case CharacterType.SPACE: |
| case CharacterType.DIGIT: |
| case CharacterType.XDIGIT: |
| // #ifdef USE_SHARED_CCLASS_TABLE ... #endif |
| final CClassNode ccn = new CClassNode(); |
| ccn.addCType(token.getPropCType(), false, env, this); |
| if (token.getPropNot()) ccn.setNot(); |
| node = ccn; |
| break; |
| |
| default: |
| throw new InternalException(ERR_PARSER_BUG); |
| |
| } // inner switch |
| break; |
| |
| case CC_CC_OPEN: |
| final CClassNode cc = parseCharClass(); |
| node = cc; |
| if (isIgnoreCase(env.option)) { |
| final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc); |
| EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg); |
| |
| if (arg.altRoot != null) { |
| node = ConsAltNode.newAltNode(node, arg.altRoot); |
| } |
| } |
| break; |
| |
| case ANYCHAR: |
| node = new AnyCharNode(); |
| break; |
| |
| case ANYCHAR_ANYTIME: |
| node = new AnyCharNode(); |
| final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); |
| qn.setTarget(node); |
| node = qn; |
| break; |
| |
| case BACKREF: |
| final int backRef = token.getBackrefRef(); |
| node = new BackRefNode(backRef, env); |
| break; |
| |
| case ANCHOR: |
| node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma |
| break; |
| |
| case OP_REPEAT: |
| case INTERVAL: |
| if (syntax.contextIndepRepeatOps()) { |
| if (syntax.contextInvalidRepeatOps()) { |
| throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); |
| } else { |
| node = StringNode.EMPTY; // node_new_empty |
| } |
| } else { |
| return parseExpTkByte(group); // goto tk_byte |
| } |
| break; |
| |
| default: |
| throw new InternalException(ERR_PARSER_BUG); |
| } //switch |
| |
| //targetp = node; |
| |
| fetchToken(); // re_entry: |
| |
| return parseExpRepeat(node, group); // repeat: |
| } |
| |
| private Node parseExpTkByte(final boolean group) { |
| final StringNode node = new StringNode(chars, token.backP, p); // tk_byte: |
| while (true) { |
| fetchToken(); |
| if (token.type != TokenType.STRING) break; |
| |
| if (token.backP == node.end) { |
| node.end = p; // non escaped character, remain shared, just increase shared range |
| } else { |
| node.cat(chars, token.backP, p); // non continuous string stream, need to COW |
| } |
| } |
| // targetp = node; |
| return parseExpRepeat(node, group); // string_end:, goto repeat |
| } |
| |
| private Node parseExpTkRawByte(final boolean group) { |
| // tk_raw_byte: |
| |
| // important: we don't use 0xff mask here neither in the compiler |
| // (in the template string) so we won't have to mask target |
| // strings when comparing against them in the matcher |
| final StringNode node = new StringNode((char)token.getC()); |
| node.setRaw(); |
| |
| fetchToken(); |
| node.clearRaw(); |
| // !goto string_end;! |
| return parseExpRepeat(node, group); |
| } |
| |
| private Node parseExpRepeat(Node target, final boolean group) { |
| while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: |
| if (target.isInvalidQuantifier()) { |
| throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); |
| } |
| |
| final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), |
| token.getRepeatUpper(), |
| token.type == TokenType.INTERVAL); |
| |
| qtfr.greedy = token.getRepeatGreedy(); |
| final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd()); |
| Node qn = qtfr; |
| |
| if (token.getRepeatPossessive()) { |
| final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose |
| en.setTarget(qn); |
| qn = en; |
| } |
| |
| if (ret == 0) { |
| target = qn; |
| } else if (ret == 2) { /* split case: /abc+/ */ |
| target = ConsAltNode.newListNode(target, null); |
| final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null)); |
| |
| fetchToken(); |
| return parseExpRepeatForCar(target, tmp, group); |
| } |
| fetchToken(); // goto re_entry |
| } |
| return target; |
| } |
| |
| private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) { |
| while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: |
| if (target.car.isInvalidQuantifier()) { |
| throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID); |
| } |
| |
| final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), |
| token.getRepeatUpper(), |
| token.type == TokenType.INTERVAL); |
| |
| qtfr.greedy = token.getRepeatGreedy(); |
| final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd()); |
| Node qn = qtfr; |
| |
| if (token.getRepeatPossessive()) { |
| final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose |
| en.setTarget(qn); |
| qn = en; |
| } |
| |
| if (ret == 0) { |
| target.setCar(qn); |
| } else if (ret == 2) { /* split case: /abc+/ */ |
| assert false; |
| } |
| fetchToken(); // goto re_entry |
| } |
| return top; |
| } |
| |
| private Node parseBranch(final TokenType term) { |
| Node node = parseExp(term); |
| |
| if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) { |
| return node; |
| } else { |
| final ConsAltNode top = ConsAltNode.newListNode(node, null); |
| ConsAltNode t = top; |
| |
| while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) { |
| node = parseExp(term); |
| if (node.getType() == NodeType.LIST) { |
| t.setCdr((ConsAltNode)node); |
| while (((ConsAltNode)node).cdr != null ) node = ((ConsAltNode)node).cdr; |
| |
| t = ((ConsAltNode)node); |
| } else { |
| t.setCdr(ConsAltNode.newListNode(node, null)); |
| t = t.cdr; |
| } |
| } |
| return top; |
| } |
| } |
| |
| /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ |
| private Node parseSubExp(final TokenType term) { |
| Node node = parseBranch(term); |
| |
| if (token.type == term) { |
| return node; |
| } else if (token.type == TokenType.ALT) { |
| final ConsAltNode top = ConsAltNode.newAltNode(node, null); |
| ConsAltNode t = top; |
| while (token.type == TokenType.ALT) { |
| fetchToken(); |
| node = parseBranch(term); |
| |
| t.setCdr(ConsAltNode.newAltNode(node, null)); |
| t = t.cdr; |
| } |
| |
| if (token.type != term) parseSubExpError(term); |
| return top; |
| } else { |
| parseSubExpError(term); |
| return null; //not reached |
| } |
| } |
| |
| private void parseSubExpError(final TokenType term) { |
| if (term == TokenType.SUBEXP_CLOSE) { |
| throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); |
| } else { |
| throw new InternalException(ERR_PARSER_BUG); |
| } |
| } |
| |
| private Node parseRegexp() { |
| fetchToken(); |
| return parseSubExp(TokenType.EOT); |
| } |
| } |