| /* |
| * Permission is hereby granted, free of charge, to any person obtaining a copy of |
| * this software and associated documentation files (the "Software"), to deal in |
| * the Software without restriction, including without limitation the rights to |
| * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
| * of the Software, and to permit persons to whom the Software is furnished to do |
| * so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| package jdk.nashorn.internal.runtime.regexp.joni; |
| |
| import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll; |
| import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt; |
| import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt; |
| import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition; |
| import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase; |
| import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline; |
| import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode; |
| import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite; |
| import java.util.HashSet; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.Node; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel; |
| import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo; |
| import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; |
| import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; |
| |
| final class Analyser extends Parser { |
| |
| protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) { |
| super(env, chars, p, end); |
| } |
| |
| @SuppressWarnings("unused") |
| protected final void compile() { |
| if (Config.DEBUG) { |
| Config.log.println(new String(chars, getBegin(), getEnd())); |
| } |
| |
| reset(); |
| |
| regex.numMem = 0; |
| regex.numRepeat = 0; |
| regex.numNullCheck = 0; |
| //regex.repeatRangeAlloc = 0; |
| regex.repeatRangeLo = null; |
| regex.repeatRangeHi = null; |
| |
| parse(); |
| |
| if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) { |
| Config.log.println("<RAW TREE>"); |
| Config.log.println(root + "\n"); |
| } |
| |
| root = setupTree(root, 0); |
| if (Config.DEBUG_PARSE_TREE) { |
| if (Config.DEBUG_PARSE_TREE_RAW) { |
| Config.log.println("<TREE>"); |
| } |
| root.verifyTree(new HashSet<Node>(), env.reg.warnings); |
| Config.log.println(root + "\n"); |
| } |
| |
| regex.captureHistory = env.captureHistory; |
| regex.btMemStart = env.btMemStart; |
| regex.btMemEnd = env.btMemEnd; |
| |
| if (isFindCondition(regex.options)) { |
| regex.btMemEnd = bsAll(); |
| } else { |
| regex.btMemEnd = env.btMemEnd; |
| regex.btMemEnd |= regex.captureHistory; |
| } |
| |
| regex.clearOptimizeInfo(); |
| |
| if (!Config.DONT_OPTIMIZE) { |
| setOptimizedInfoFromTree(root); |
| } |
| |
| env.memNodes = null; |
| |
| if (regex.numRepeat != 0 || regex.btMemEnd != 0) { |
| regex.stackPopLevel = StackPopLevel.ALL; |
| } else { |
| if (regex.btMemStart != 0) { |
| regex.stackPopLevel = StackPopLevel.MEM_START; |
| } else { |
| regex.stackPopLevel = StackPopLevel.FREE; |
| } |
| } |
| |
| if (Config.DEBUG_COMPILE) { |
| Config.log.println("stack used: " + regex.stackNeeded); |
| if (Config.USE_STRING_TEMPLATES) { |
| Config.log.print("templates: " + regex.templateNum + "\n"); |
| } |
| Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); |
| |
| } // DEBUG_COMPILE |
| } |
| |
| private void swap(final Node a, final Node b) { |
| a.swap(b); |
| |
| if (root == b) { |
| root = a; |
| } else if (root == a) { |
| root = b; |
| } |
| } |
| |
| // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK |
| private int quantifiersMemoryInfo(final Node node) { |
| int info = 0; |
| |
| switch(node.getType()) { |
| case NodeType.LIST: |
| case NodeType.ALT: |
| ConsAltNode can = (ConsAltNode)node; |
| do { |
| final int v = quantifiersMemoryInfo(can.car); |
| if (v > info) { |
| info = v; |
| } |
| } while ((can = can.cdr) != null); |
| break; |
| |
| case NodeType.QTFR: |
| final QuantifierNode qn = (QuantifierNode)node; |
| if (qn.upper != 0) { |
| info = quantifiersMemoryInfo(qn.target); |
| } |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| switch (en.type) { |
| case EncloseType.MEMORY: |
| return TargetInfo.IS_EMPTY_MEM; |
| |
| case EncloseType.OPTION: |
| case EncloseNode.STOP_BACKTRACK: |
| info = quantifiersMemoryInfo(en.target); |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| |
| case NodeType.BREF: |
| case NodeType.STR: |
| case NodeType.CTYPE: |
| case NodeType.CCLASS: |
| case NodeType.CANY: |
| case NodeType.ANCHOR: |
| default: |
| break; |
| } // switch |
| |
| return info; |
| } |
| |
| private int getMinMatchLength(final Node node) { |
| int min = 0; |
| |
| switch (node.getType()) { |
| case NodeType.BREF: |
| final BackRefNode br = (BackRefNode)node; |
| if (br.isRecursion()) { |
| break; |
| } |
| |
| if (br.backRef > env.numMem) { |
| throw new ValueException(ERR_INVALID_BACKREF); |
| } |
| min = getMinMatchLength(env.memNodes[br.backRef]); |
| |
| break; |
| |
| case NodeType.LIST: |
| ConsAltNode can = (ConsAltNode)node; |
| do { |
| min += getMinMatchLength(can.car); |
| } while ((can = can.cdr) != null); |
| break; |
| |
| case NodeType.ALT: |
| ConsAltNode y = (ConsAltNode)node; |
| do { |
| final Node x = y.car; |
| final int tmin = getMinMatchLength(x); |
| if (y == node) { |
| min = tmin; |
| } else if (min > tmin) { |
| min = tmin; |
| } |
| } while ((y = y.cdr) != null); |
| break; |
| |
| case NodeType.STR: |
| min = ((StringNode)node).length(); |
| break; |
| |
| case NodeType.CTYPE: |
| min = 1; |
| break; |
| |
| case NodeType.CCLASS: |
| case NodeType.CANY: |
| min = 1; |
| break; |
| |
| case NodeType.QTFR: |
| final QuantifierNode qn = (QuantifierNode)node; |
| if (qn.lower > 0) { |
| min = getMinMatchLength(qn.target); |
| min = MinMaxLen.distanceMultiply(min, qn.lower); |
| } |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| switch (en.type) { |
| case EncloseType.MEMORY: |
| if (en.isMinFixed()) { |
| min = en.minLength; |
| } else { |
| min = getMinMatchLength(en.target); |
| en.minLength = min; |
| en.setMinFixed(); |
| } |
| break; |
| |
| case EncloseType.OPTION: |
| case EncloseType.STOP_BACKTRACK: |
| min = getMinMatchLength(en.target); |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| |
| case NodeType.ANCHOR: |
| default: |
| break; |
| } // switch |
| |
| return min; |
| } |
| |
| private int getMaxMatchLength(final Node node) { |
| int max = 0; |
| |
| switch (node.getType()) { |
| case NodeType.LIST: |
| ConsAltNode ln = (ConsAltNode)node; |
| do { |
| final int tmax = getMaxMatchLength(ln.car); |
| max = MinMaxLen.distanceAdd(max, tmax); |
| } while ((ln = ln.cdr) != null); |
| break; |
| |
| case NodeType.ALT: |
| ConsAltNode an = (ConsAltNode)node; |
| do { |
| final int tmax = getMaxMatchLength(an.car); |
| if (max < tmax) { |
| max = tmax; |
| } |
| } while ((an = an.cdr) != null); |
| break; |
| |
| case NodeType.STR: |
| max = ((StringNode)node).length(); |
| break; |
| |
| case NodeType.CTYPE: |
| max = 1; |
| break; |
| |
| case NodeType.CCLASS: |
| case NodeType.CANY: |
| max = 1; |
| break; |
| |
| case NodeType.BREF: |
| final BackRefNode br = (BackRefNode)node; |
| if (br.isRecursion()) { |
| max = MinMaxLen.INFINITE_DISTANCE; |
| break; |
| } |
| |
| if (br.backRef > env.numMem) { |
| throw new ValueException(ERR_INVALID_BACKREF); |
| } |
| final int tmax = getMaxMatchLength(env.memNodes[br.backRef]); |
| if (max < tmax) { |
| max = tmax; |
| } |
| break; |
| |
| case NodeType.QTFR: |
| final QuantifierNode qn = (QuantifierNode)node; |
| if (qn.upper != 0) { |
| max = getMaxMatchLength(qn.target); |
| if (max != 0) { |
| if (!isRepeatInfinite(qn.upper)) { |
| max = MinMaxLen.distanceMultiply(max, qn.upper); |
| } else { |
| max = MinMaxLen.INFINITE_DISTANCE; |
| } |
| } |
| } |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| switch (en.type) { |
| case EncloseType.MEMORY: |
| if (en.isMaxFixed()) { |
| max = en.maxLength; |
| } else { |
| max = getMaxMatchLength(en.target); |
| en.maxLength = max; |
| en.setMaxFixed(); |
| } |
| break; |
| |
| case EncloseType.OPTION: |
| case EncloseType.STOP_BACKTRACK: |
| max = getMaxMatchLength(en.target); |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| |
| case NodeType.ANCHOR: |
| default: |
| break; |
| } // switch |
| |
| return max; |
| } |
| |
| private static final int GET_CHAR_LEN_VARLEN = -1; |
| private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; |
| protected final int getCharLengthTree(final Node node) { |
| return getCharLengthTree(node, 0); |
| } |
| |
| private int getCharLengthTree(final Node node, final int levelp) { |
| final int level = levelp + 1; |
| |
| int len = 0; |
| returnCode = 0; |
| |
| switch(node.getType()) { |
| case NodeType.LIST: |
| ConsAltNode ln = (ConsAltNode)node; |
| do { |
| final int tlen = getCharLengthTree(ln.car, level); |
| if (returnCode == 0) { |
| len = MinMaxLen.distanceAdd(len, tlen); |
| } |
| } while (returnCode == 0 && (ln = ln.cdr) != null); |
| break; |
| |
| case NodeType.ALT: |
| ConsAltNode an = (ConsAltNode)node; |
| boolean varLen = false; |
| |
| int tlen = getCharLengthTree(an.car, level); |
| while (returnCode == 0 && (an = an.cdr) != null) { |
| final int tlen2 = getCharLengthTree(an.car, level); |
| if (returnCode == 0) { |
| if (tlen != tlen2) { |
| varLen = true; |
| } |
| } |
| } |
| |
| if (returnCode == 0) { |
| if (varLen) { |
| if (level == 1) { |
| returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; |
| } else { |
| returnCode = GET_CHAR_LEN_VARLEN; |
| } |
| } else { |
| len = tlen; |
| } |
| } |
| break; |
| |
| case NodeType.STR: |
| final StringNode sn = (StringNode)node; |
| len = sn.length(); |
| break; |
| |
| case NodeType.QTFR: |
| final QuantifierNode qn = (QuantifierNode)node; |
| if (qn.lower == qn.upper) { |
| tlen = getCharLengthTree(qn.target, level); |
| if (returnCode == 0) { |
| len = MinMaxLen.distanceMultiply(tlen, qn.lower); |
| } |
| } else { |
| returnCode = GET_CHAR_LEN_VARLEN; |
| } |
| break; |
| |
| case NodeType.CTYPE: |
| case NodeType.CCLASS: |
| case NodeType.CANY: |
| len = 1; |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| switch(en.type) { |
| case EncloseType.MEMORY: |
| if (en.isCLenFixed()) { |
| len = en.charLength; |
| } else { |
| len = getCharLengthTree(en.target, level); |
| if (returnCode == 0) { |
| en.charLength = len; |
| en.setCLenFixed(); |
| } |
| } |
| break; |
| |
| case EncloseType.OPTION: |
| case EncloseType.STOP_BACKTRACK: |
| len = getCharLengthTree(en.target, level); |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| |
| case NodeType.ANCHOR: |
| break; |
| |
| default: |
| returnCode = GET_CHAR_LEN_VARLEN; |
| } // switch |
| return len; |
| } |
| |
| /* x is not included y ==> 1 : 0 */ |
| private static boolean isNotIncluded(final Node xn, final Node yn) { |
| Node x = xn; |
| Node y = yn; |
| Node tmp; |
| |
| // !retry:! |
| retry: while(true) { |
| |
| final int yType = y.getType(); |
| |
| switch(x.getType()) { |
| case NodeType.CTYPE: |
| switch(yType) { |
| |
| case NodeType.CCLASS: |
| // !swap:! |
| tmp = x; |
| x = y; |
| y = tmp; |
| // !goto retry;! |
| continue retry; |
| |
| case NodeType.STR: |
| // !goto swap;! |
| tmp = x; |
| x = y; |
| y = tmp; |
| continue retry; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| |
| case NodeType.CCLASS: |
| final CClassNode xc = (CClassNode)x; |
| |
| switch(yType) { |
| |
| case NodeType.CCLASS: |
| final CClassNode yc = (CClassNode)y; |
| |
| for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { |
| boolean v = xc.bs.at(i); |
| if ((v && !xc.isNot()) || (!v && xc.isNot())) { |
| v = yc.bs.at(i); |
| if ((v && !yc.isNot()) || (!v && yc.isNot())) { |
| return false; |
| } |
| } |
| } |
| if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) { |
| return true; |
| } |
| return false; |
| // break; not reached |
| |
| case NodeType.STR: |
| // !goto swap;! |
| tmp = x; |
| x = y; |
| y = tmp; |
| continue retry; |
| |
| default: |
| break; |
| |
| } // inner switch |
| break; // case NodeType.CCLASS |
| |
| case NodeType.STR: |
| final StringNode xs = (StringNode)x; |
| if (xs.length() == 0) { |
| break; |
| } |
| |
| switch (yType) { |
| |
| case NodeType.CCLASS: |
| final CClassNode cc = (CClassNode)y; |
| final int code = xs.chars[xs.p]; |
| return !cc.isCodeInCC(code); |
| |
| case NodeType.STR: |
| final StringNode ys = (StringNode)y; |
| int len = xs.length(); |
| if (len > ys.length()) { |
| len = ys.length(); |
| } |
| if (xs.isAmbig() || ys.isAmbig()) { |
| /* tiny version */ |
| return false; |
| } |
| for (int i=0, pt=ys.p, q=xs.p; i<len; i++, pt++, q++) { |
| if (ys.chars[pt] != xs.chars[q]) { |
| return true; |
| } |
| } |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| |
| break; // case NodeType.STR |
| default: |
| break; |
| |
| } // switch |
| |
| break; |
| } // retry: while |
| return false; |
| } |
| |
| private Node getHeadValueNode(final Node node, final boolean exact) { |
| Node n = null; |
| |
| switch(node.getType()) { |
| case NodeType.BREF: |
| case NodeType.ALT: |
| case NodeType.CANY: |
| break; |
| |
| case NodeType.CTYPE: |
| case NodeType.CCLASS: |
| if (!exact) { |
| n = node; |
| } |
| break; |
| |
| case NodeType.LIST: |
| n = getHeadValueNode(((ConsAltNode)node).car, exact); |
| break; |
| |
| case NodeType.STR: |
| final StringNode sn = (StringNode)node; |
| if (sn.end <= sn.p) |
| { |
| break; // ??? |
| } |
| |
| if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){ |
| // nothing |
| } else { |
| n = node; |
| } |
| break; |
| |
| case NodeType.QTFR: |
| final QuantifierNode qn = (QuantifierNode)node; |
| if (qn.lower > 0) { |
| if (qn.headExact != null) { |
| n = qn.headExact; |
| } else { |
| n = getHeadValueNode(qn.target, exact); |
| } |
| } |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| |
| switch (en.type) { |
| case EncloseType.OPTION: |
| final int options = regex.options; |
| regex.options = en.option; |
| n = getHeadValueNode(en.target, exact); |
| regex.options = options; |
| break; |
| |
| case EncloseType.MEMORY: |
| case EncloseType.STOP_BACKTRACK: |
| n = getHeadValueNode(en.target, exact); |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| |
| case NodeType.ANCHOR: |
| final AnchorNode an = (AnchorNode)node; |
| if (an.type == AnchorType.PREC_READ) { |
| n = getHeadValueNode(an.target, exact); |
| } |
| break; |
| |
| default: |
| break; |
| } // switch |
| |
| return n; |
| } |
| |
| // true: invalid |
| private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) { |
| if ((node.getType2Bit() & typeMask) == 0) { |
| return true; |
| } |
| |
| boolean invalid = false; |
| |
| switch(node.getType()) { |
| case NodeType.LIST: |
| case NodeType.ALT: |
| ConsAltNode can = (ConsAltNode)node; |
| do { |
| invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask); |
| } while (!invalid && (can = can.cdr) != null); |
| break; |
| |
| case NodeType.QTFR: |
| invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| if ((en.type & encloseMask) == 0) { |
| return true; |
| } |
| invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); |
| break; |
| |
| case NodeType.ANCHOR: |
| final AnchorNode an = (AnchorNode)node; |
| if ((an.type & anchorMask) == 0) { |
| return true; |
| } |
| |
| if (an.target != null) { |
| invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); |
| } |
| break; |
| |
| default: |
| break; |
| |
| } // switch |
| |
| return invalid; |
| } |
| |
| /* divide different length alternatives in look-behind. |
| (?<=A|B) ==> (?<=A)|(?<=B) |
| (?<!A|B) ==> (?<!A)(?<!B) |
| */ |
| private Node divideLookBehindAlternatives(final Node nodep) { |
| Node node = nodep; |
| final AnchorNode an = (AnchorNode)node; |
| final int anchorType = an.type; |
| Node head = an.target; |
| Node np = ((ConsAltNode)head).car; |
| |
| swap(node, head); |
| |
| final Node tmp = node; |
| node = head; |
| head = tmp; |
| |
| ((ConsAltNode)node).setCar(head); |
| ((AnchorNode)head).setTarget(np); |
| np = node; |
| |
| while ((np = ((ConsAltNode)np).cdr) != null) { |
| final AnchorNode insert = new AnchorNode(anchorType); |
| insert.setTarget(((ConsAltNode)np).car); |
| ((ConsAltNode)np).setCar(insert); |
| } |
| |
| if (anchorType == AnchorType.LOOK_BEHIND_NOT) { |
| np = node; |
| do { |
| ((ConsAltNode)np).toListNode(); /* alt -> list */ |
| } while ((np = ((ConsAltNode)np).cdr) != null); |
| } |
| |
| return node; |
| } |
| |
| private Node setupLookBehind(final Node node) { |
| final AnchorNode an = (AnchorNode)node; |
| final int len = getCharLengthTree(an.target); |
| switch (returnCode) { |
| case 0: |
| an.charLength = len; |
| break; |
| case GET_CHAR_LEN_VARLEN: |
| throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); |
| case GET_CHAR_LEN_TOP_ALT_VARLEN: |
| if (syntax.differentLengthAltLookBehind()) { |
| return divideLookBehindAlternatives(node); |
| } |
| throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); |
| default: |
| break; |
| } |
| return node; |
| } |
| |
| private void nextSetup(final Node nodep, final Node nextNode) { |
| Node node = nodep; |
| |
| // retry: |
| retry: while(true) { |
| |
| final int type = node.getType(); |
| if (type == NodeType.QTFR) { |
| final QuantifierNode qn = (QuantifierNode)node; |
| if (qn.greedy && isRepeatInfinite(qn.upper)) { |
| if (Config.USE_QTFR_PEEK_NEXT) { |
| final StringNode n = (StringNode)getHeadValueNode(nextNode, true); |
| /* '\0': for UTF-16BE etc... */ |
| if (n != null && n.chars[n.p] != 0) { // ????????? |
| qn.nextHeadExact = n; |
| } |
| } // USE_QTFR_PEEK_NEXT |
| /* automatic posseivation a*b ==> (?>a*)b */ |
| if (qn.lower <= 1) { |
| if (qn.target.isSimple()) { |
| final Node x = getHeadValueNode(qn.target, false); |
| if (x != null) { |
| final Node y = getHeadValueNode(nextNode, false); |
| if (y != null && isNotIncluded(x, y)) { |
| final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose |
| en.setStopBtSimpleRepeat(); |
| //en.setTarget(qn.target); // optimize it ?? |
| swap(node, en); |
| |
| en.setTarget(node); |
| } |
| } |
| } |
| } |
| } |
| } else if (type == NodeType.ENCLOSE) { |
| final EncloseNode en = (EncloseNode)node; |
| if (en.isMemory()) { |
| node = en.target; |
| // !goto retry;! |
| continue retry; |
| } |
| } |
| |
| break; |
| } // while |
| } |
| |
| private void updateStringNodeCaseFoldMultiByte(final StringNode sn) { |
| final char[] ch = sn.chars; |
| final int end = sn.end; |
| value = sn.p; |
| int sp = 0; |
| char buf; |
| |
| while (value < end) { |
| final int ovalue = value; |
| buf = EncodingHelper.toLowerCase(ch[value++]); |
| |
| if (ch[ovalue] != buf) { |
| |
| char[] sbuf = new char[sn.length() << 1]; |
| System.arraycopy(ch, sn.p, sbuf, 0, ovalue - sn.p); |
| value = ovalue; |
| while (value < end) { |
| buf = EncodingHelper.toLowerCase(ch[value++]); |
| if (sp >= sbuf.length) { |
| final char[]tmp = new char[sbuf.length << 1]; |
| System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); |
| sbuf = tmp; |
| } |
| sbuf[sp++] = buf; |
| } |
| sn.set(sbuf, 0, sp); |
| return; |
| } |
| sp++; |
| } |
| } |
| |
| private void updateStringNodeCaseFold(final Node node) { |
| final StringNode sn = (StringNode)node; |
| updateStringNodeCaseFoldMultiByte(sn); |
| } |
| |
| private Node expandCaseFoldMakeRemString(final char[] ch, final int pp, final int end) { |
| final StringNode node = new StringNode(ch, pp, end); |
| |
| updateStringNodeCaseFold(node); |
| node.setAmbig(); |
| node.setDontGetOptInfo(); |
| return node; |
| } |
| |
| private static boolean expandCaseFoldStringAlt(final int itemNum, final char[] items, |
| final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) { |
| |
| ConsAltNode altNode; |
| node.p = altNode = newAltNode(null, null); |
| |
| StringNode snode = new StringNode(chars, p, p + slen); |
| altNode.setCar(snode); |
| |
| for (int i=0; i<itemNum; i++) { |
| snode = new StringNode(); |
| |
| snode.catCode(items[i]); |
| |
| final ConsAltNode an = newAltNode(null, null); |
| an.setCar(snode); |
| altNode.setCdr(an); |
| altNode = an; |
| } |
| return false; |
| } |
| |
| private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8; |
| private Node expandCaseFoldString(final Node node) { |
| final StringNode sn = (StringNode)node; |
| |
| if (sn.isAmbig() || sn.length() <= 0) { |
| return node; |
| } |
| |
| final char[] chars1 = sn.chars; |
| int pt = sn.p; |
| final int end = sn.end; |
| int altNum = 1; |
| |
| ConsAltNode topRoot = null, r = null; |
| @SuppressWarnings("unused") |
| final ObjPtr<Node> prevNode = new ObjPtr<Node>(); |
| StringNode stringNode = null; |
| |
| while (pt < end) { |
| final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars1[pt]); |
| |
| if (items.length == 0) { |
| if (stringNode == null) { |
| if (r == null && prevNode.p != null) { |
| topRoot = r = ConsAltNode.listAdd(null, prevNode.p); |
| } |
| |
| prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); |
| |
| if (r != null) { |
| ConsAltNode.listAdd(r, stringNode); |
| } |
| |
| } |
| |
| stringNode.cat(chars1, pt, pt + 1); |
| } else { |
| altNum *= (items.length + 1); |
| if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) { |
| break; |
| } |
| |
| if (r == null && prevNode.p != null) { |
| topRoot = r = ConsAltNode.listAdd(null, prevNode.p); |
| } |
| |
| expandCaseFoldStringAlt(items.length, items, chars1, pt, 1, end, prevNode); |
| if (r != null) { |
| ConsAltNode.listAdd(r, prevNode.p); |
| } |
| stringNode = null; |
| } |
| pt++; |
| } |
| |
| if (pt < end) { |
| final Node srem = expandCaseFoldMakeRemString(chars1, pt, end); |
| |
| if (prevNode.p != null && r == null) { |
| topRoot = r = ConsAltNode.listAdd(null, prevNode.p); |
| } |
| |
| if (r == null) { |
| prevNode.p = srem; |
| } else { |
| ConsAltNode.listAdd(r, srem); |
| } |
| } |
| /* ending */ |
| final Node xnode = topRoot != null ? topRoot : prevNode.p; |
| |
| swap(node, xnode); |
| return xnode; |
| } |
| |
| private static final int IN_ALT = (1<<0); |
| private static final int IN_NOT = (1<<1); |
| private static final int IN_REPEAT = (1<<2); |
| private static final int IN_VAR_REPEAT = (1<<3); |
| private static final int EXPAND_STRING_MAX_LENGTH = 100; |
| |
| /* setup_tree does the following work. |
| 1. check empty loop. (set qn->target_empty_info) |
| 2. expand ignore-case in char class. |
| 3. set memory status bit flags. (reg->mem_stats) |
| 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. |
| 5. find invalid patterns in look-behind. |
| 6. expand repeated string. |
| */ |
| protected final Node setupTree(final Node nodep, final int statep) { |
| Node node = nodep; |
| int state = statep; |
| |
| restart: while (true) { |
| switch (node.getType()) { |
| case NodeType.LIST: |
| ConsAltNode lin = (ConsAltNode)node; |
| Node prev = null; |
| do { |
| setupTree(lin.car, state); |
| if (prev != null) { |
| nextSetup(prev, lin.car); |
| } |
| prev = lin.car; |
| } while ((lin = lin.cdr) != null); |
| break; |
| |
| case NodeType.ALT: |
| ConsAltNode aln = (ConsAltNode)node; |
| do { |
| setupTree(aln.car, (state | IN_ALT)); |
| } while ((aln = aln.cdr) != null); |
| break; |
| |
| case NodeType.CCLASS: |
| break; |
| |
| case NodeType.STR: |
| if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { |
| node = expandCaseFoldString(node); |
| } |
| break; |
| |
| case NodeType.CTYPE: |
| case NodeType.CANY: |
| break; |
| |
| case NodeType.BREF: |
| final BackRefNode br = (BackRefNode)node; |
| if (br.backRef > env.numMem) { |
| throw new ValueException(ERR_INVALID_BACKREF); |
| } |
| env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef); |
| env.btMemStart = bsOnAt(env.btMemStart, br.backRef); |
| ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed(); |
| break; |
| |
| case NodeType.QTFR: |
| final QuantifierNode qn = (QuantifierNode)node; |
| Node target = qn.target; |
| |
| if ((state & IN_REPEAT) != 0) { |
| qn.setInRepeat(); |
| } |
| |
| if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { |
| final int d = getMinMatchLength(target); |
| if (d == 0) { |
| qn.targetEmptyInfo = TargetInfo.IS_EMPTY; |
| if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { |
| final int info = quantifiersMemoryInfo(target); |
| if (info > 0) { |
| qn.targetEmptyInfo = info; |
| } |
| } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK |
| // strange stuff here (turned off) |
| } |
| } |
| |
| state |= IN_REPEAT; |
| if (qn.lower != qn.upper) { |
| state |= IN_VAR_REPEAT; |
| } |
| |
| target = setupTree(target, state); |
| |
| /* expand string */ |
| if (target.getType() == NodeType.STR) { |
| if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper && |
| qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) { |
| final StringNode sn = (StringNode)target; |
| final int len = sn.length(); |
| |
| if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) { |
| final StringNode str = qn.convertToString(sn.flag); |
| final int n = qn.lower; |
| for (int i = 0; i < n; i++) { |
| str.cat(sn.chars, sn.p, sn.end); |
| } |
| break; /* break case NT_QTFR: */ |
| } |
| |
| } |
| } |
| if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { |
| if (qn.greedy && qn.targetEmptyInfo != 0) { |
| if (target.getType() == NodeType.QTFR) { |
| final QuantifierNode tqn = (QuantifierNode)target; |
| if (tqn.headExact != null) { |
| qn.headExact = tqn.headExact; |
| tqn.headExact = null; |
| } |
| } else { |
| qn.headExact = getHeadValueNode(qn.target, true); |
| } |
| } |
| } // USE_OP_PUSH_OR_JUMP_EXACT |
| break; |
| |
| case NodeType.ENCLOSE: |
| final EncloseNode en = (EncloseNode)node; |
| switch (en.type) { |
| case EncloseType.OPTION: |
| final int options = regex.options; |
| regex.options = en.option; |
| setupTree(en.target, state); |
| regex.options = options; |
| break; |
| |
| case EncloseType.MEMORY: |
| if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { |
| env.btMemStart = bsOnAt(env.btMemStart, en.regNum); |
| /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ |
| |
| } |
| setupTree(en.target, state); |
| break; |
| |
| case EncloseType.STOP_BACKTRACK: |
| setupTree(en.target, state); |
| if (en.target.getType() == NodeType.QTFR) { |
| final QuantifierNode tqn = (QuantifierNode)en.target; |
| if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { |
| /* (?>a*), a*+ etc... */ |
| if (tqn.target.isSimple()) { |
| en.setStopBtSimpleRepeat(); |
| } |
| } |
| } |
| break; |
| |
| default: |
| break; |
| |
| } // inner switch |
| break; |
| |
| case NodeType.ANCHOR: |
| final AnchorNode an = (AnchorNode)node; |
| switch (an.type) { |
| case AnchorType.PREC_READ: |
| setupTree(an.target, state); |
| break; |
| |
| case AnchorType.PREC_READ_NOT: |
| setupTree(an.target, (state | IN_NOT)); |
| break; |
| |
| case AnchorType.LOOK_BEHIND: |
| if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { |
| throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); |
| } |
| node = setupLookBehind(node); |
| if (node.getType() != NodeType.ANCHOR) { |
| continue restart; |
| } |
| setupTree(((AnchorNode)node).target, state); |
| break; |
| |
| case AnchorType.LOOK_BEHIND_NOT: |
| if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) { |
| throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN); |
| } |
| node = setupLookBehind(node); |
| if (node.getType() != NodeType.ANCHOR) { |
| continue restart; |
| } |
| setupTree(((AnchorNode)node).target, (state | IN_NOT)); |
| break; |
| |
| default: |
| break; |
| |
| } // inner switch |
| break; |
| default: |
| break; |
| } // switch |
| return node; |
| } // restart: while |
| } |
| |
| private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; |
| private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd |
| opt.clear(); |
| opt.setBoundNode(oenv.mmd); |
| |
| switch (node.getType()) { |
| case NodeType.LIST: { |
| final OptEnvironment nenv = new OptEnvironment(); |
| final NodeOptInfo nopt = new NodeOptInfo(); |
| nenv.copy(oenv); |
| ConsAltNode lin = (ConsAltNode)node; |
| do { |
| optimizeNodeLeft(lin.car, nopt, nenv); |
| nenv.mmd.add(nopt.length); |
| opt.concatLeftNode(nopt); |
| } while ((lin = lin.cdr) != null); |
| break; |
| } |
| |
| case NodeType.ALT: { |
| final NodeOptInfo nopt = new NodeOptInfo(); |
| ConsAltNode aln = (ConsAltNode)node; |
| do { |
| optimizeNodeLeft(aln.car, nopt, oenv); |
| if (aln == node) { |
| opt.copy(nopt); |
| } else { |
| opt.altMerge(nopt, oenv); |
| } |
| } while ((aln = aln.cdr) != null); |
| break; |
| } |
| |
| case NodeType.STR: { |
| final StringNode sn = (StringNode)node; |
| |
| final int slen = sn.length(); |
| |
| if (!sn.isAmbig()) { |
| opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); |
| |
| if (slen > 0) { |
| opt.map.addChar(sn.chars[sn.p]); |
| } |
| |
| opt.length.set(slen, slen); |
| } else { |
| int max; |
| if (sn.isDontGetOptInfo()) { |
| max = sn.length(); |
| } else { |
| opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw()); |
| opt.exb.ignoreCase = true; |
| |
| if (slen > 0) { |
| opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag); |
| } |
| |
| max = slen; |
| } |
| opt.length.set(slen, max); |
| } |
| |
| if (opt.exb.length == slen) { |
| opt.exb.reachEnd = true; |
| } |
| break; |
| } |
| |
| case NodeType.CCLASS: { |
| final CClassNode cc = (CClassNode)node; |
| /* no need to check ignore case. (setted in setup_tree()) */ |
| if (cc.mbuf != null || cc.isNot()) { |
| opt.length.set(1, 1); |
| } else { |
| for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) { |
| final boolean z = cc.bs.at(i); |
| if ((z && !cc.isNot()) || (!z && cc.isNot())) { |
| opt.map.addChar(i); |
| } |
| } |
| opt.length.set(1, 1); |
| } |
| break; |
| } |
| |
| case NodeType.CANY: { |
| opt.length.set(1, 1); |
| break; |
| } |
| |
| case NodeType.ANCHOR: { |
| final AnchorNode an = (AnchorNode)node; |
| switch (an.type) { |
| case AnchorType.BEGIN_BUF: |
| case AnchorType.BEGIN_POSITION: |
| case AnchorType.BEGIN_LINE: |
| case AnchorType.END_BUF: |
| case AnchorType.SEMI_END_BUF: |
| case AnchorType.END_LINE: |
| opt.anchor.add(an.type); |
| break; |
| |
| case AnchorType.PREC_READ: |
| final NodeOptInfo nopt = new NodeOptInfo(); |
| optimizeNodeLeft(an.target, nopt, oenv); |
| if (nopt.exb.length > 0) { |
| opt.expr.copy(nopt.exb); |
| } else if (nopt.exm.length > 0) { |
| opt.expr.copy(nopt.exm); |
| } |
| opt.expr.reachEnd = false; |
| if (nopt.map.value > 0) { |
| opt.map.copy(nopt.map); |
| } |
| break; |
| |
| case AnchorType.PREC_READ_NOT: |
| case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */ |
| case AnchorType.LOOK_BEHIND_NOT: |
| break; |
| |
| default: |
| break; |
| |
| } // inner switch |
| break; |
| } |
| |
| case NodeType.BREF: { |
| final BackRefNode br = (BackRefNode)node; |
| |
| if (br.isRecursion()) { |
| opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); |
| break; |
| } |
| |
| final Node[]nodes = oenv.scanEnv.memNodes; |
| |
| final int min = getMinMatchLength(nodes[br.backRef]); |
| final int max = getMaxMatchLength(nodes[br.backRef]); |
| |
| opt.length.set(min, max); |
| break; |
| } |
| |
| |
| case NodeType.QTFR: { |
| final NodeOptInfo nopt = new NodeOptInfo(); |
| final QuantifierNode qn = (QuantifierNode)node; |
| optimizeNodeLeft(qn.target, nopt, oenv); |
| if (qn.lower == 0 && isRepeatInfinite(qn.upper)) { |
| if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { |
| if (isMultiline(oenv.options)) { |
| opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); |
| } else { |
| opt.anchor.add(AnchorType.ANYCHAR_STAR); |
| } |
| } |
| } else { |
| if (qn.lower > 0) { |
| opt.copy(nopt); |
| if (nopt.exb.length > 0) { |
| if (nopt.exb.reachEnd) { |
| int i; |
| for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { |
| opt.exb.concat(nopt.exb); |
| } |
| if (i < qn.lower) { |
| opt.exb.reachEnd = false; |
| } |
| } |
| } |
| if (qn.lower != qn.upper) { |
| opt.exb.reachEnd = false; |
| opt.exm.reachEnd = false; |
| } |
| if (qn.lower > 1) { |
| opt.exm.reachEnd = false; |
| } |
| |
| } |
| } |
| final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); |
| int max; |
| if (isRepeatInfinite(qn.upper)) { |
| max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; |
| } else { |
| max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); |
| } |
| opt.length.set(min, max); |
| break; |
| } |
| |
| case NodeType.ENCLOSE: { |
| final EncloseNode en = (EncloseNode)node; |
| switch (en.type) { |
| case EncloseType.OPTION: |
| final int save = oenv.options; |
| oenv.options = en.option; |
| optimizeNodeLeft(en.target, opt, oenv); |
| oenv.options = save; |
| break; |
| |
| case EncloseType.MEMORY: |
| if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { |
| int min = 0; |
| int max = MinMaxLen.INFINITE_DISTANCE; |
| if (en.isMinFixed()) { |
| min = en.minLength; |
| } |
| if (en.isMaxFixed()) { |
| max = en.maxLength; |
| } |
| opt.length.set(min, max); |
| } else { // USE_SUBEXP_CALL |
| optimizeNodeLeft(en.target, opt, oenv); |
| if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { |
| if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { |
| opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); |
| } |
| } |
| } |
| break; |
| |
| case EncloseType.STOP_BACKTRACK: |
| optimizeNodeLeft(en.target, opt, oenv); |
| break; |
| |
| default: |
| break; |
| } // inner switch |
| break; |
| } |
| |
| default: |
| throw new InternalException(ERR_PARSER_BUG); |
| } // switch |
| } |
| |
| @SuppressWarnings("unused") |
| protected final void setOptimizedInfoFromTree(final Node node) { |
| final NodeOptInfo opt = new NodeOptInfo(); |
| final OptEnvironment oenv = new OptEnvironment(); |
| |
| oenv.options = regex.options; |
| oenv.caseFoldFlag = regex.caseFoldFlag; |
| oenv.scanEnv = env; |
| oenv.mmd.clear(); // ?? |
| |
| optimizeNodeLeft(node, opt, oenv); |
| |
| regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | |
| AnchorType.BEGIN_POSITION | |
| AnchorType.ANYCHAR_STAR | |
| AnchorType.ANYCHAR_STAR_ML); |
| |
| regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | |
| AnchorType.SEMI_END_BUF); |
| |
| if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { |
| regex.anchorDmin = opt.length.min; |
| regex.anchorDmax = opt.length.max; |
| } |
| |
| if (opt.exb.length > 0 || opt.exm.length > 0) { |
| opt.exb.select(opt.exm); |
| if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { |
| // !goto set_map;! |
| regex.setOptimizeMapInfo(opt.map); |
| regex.setSubAnchor(opt.map.anchor); |
| } else { |
| regex.setExactInfo(opt.exb); |
| regex.setSubAnchor(opt.exb.anchor); |
| } |
| } else if (opt.map.value > 0) { |
| // !set_map:! |
| regex.setOptimizeMapInfo(opt.map); |
| regex.setSubAnchor(opt.map.anchor); |
| } else { |
| regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; |
| if (opt.length.max == 0) { |
| regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; |
| } |
| } |
| |
| if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { |
| Config.log.println(regex.optimizeInfoToString()); |
| } |
| } |
| } |