blob: 64d90c03c8ff98b25bf9f139c94acd790af37de9 [file] [log] [blame]
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package jdk.nashorn.internal.runtime.regexp.joni;
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll;
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline;
import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode;
import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
import java.util.HashSet;
import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
final class Analyser extends Parser {
protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
super(env, chars, p, end);
}
@SuppressWarnings("unused")
protected final void compile() {
if (Config.DEBUG) {
Config.log.println(new String(chars, getBegin(), getEnd()));
}
reset();
regex.numMem = 0;
regex.numRepeat = 0;
regex.numNullCheck = 0;
//regex.repeatRangeAlloc = 0;
regex.repeatRangeLo = null;
regex.repeatRangeHi = null;
parse();
if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) {
Config.log.println("<RAW TREE>");
Config.log.println(root + "\n");
}
root = setupTree(root, 0);
if (Config.DEBUG_PARSE_TREE) {
if (Config.DEBUG_PARSE_TREE_RAW) {
Config.log.println("<TREE>");
}
root.verifyTree(new HashSet<Node>(), env.reg.warnings);
Config.log.println(root + "\n");
}
regex.captureHistory = env.captureHistory;
regex.btMemStart = env.btMemStart;
regex.btMemEnd = env.btMemEnd;
if (isFindCondition(regex.options)) {
regex.btMemEnd = bsAll();
} else {
regex.btMemEnd = env.btMemEnd;
regex.btMemEnd |= regex.captureHistory;
}
regex.clearOptimizeInfo();
if (!Config.DONT_OPTIMIZE) {
setOptimizedInfoFromTree(root);
}
env.memNodes = null;
if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
regex.stackPopLevel = StackPopLevel.ALL;
} else {
if (regex.btMemStart != 0) {
regex.stackPopLevel = StackPopLevel.MEM_START;
} else {
regex.stackPopLevel = StackPopLevel.FREE;
}
}
if (Config.DEBUG_COMPILE) {
Config.log.println("stack used: " + regex.stackNeeded);
if (Config.USE_STRING_TEMPLATES) {
Config.log.print("templates: " + regex.templateNum + "\n");
}
Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
} // DEBUG_COMPILE
}
private void swap(final Node a, final Node b) {
a.swap(b);
if (root == b) {
root = a;
} else if (root == a) {
root = b;
}
}
// USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
private int quantifiersMemoryInfo(final Node node) {
int info = 0;
switch(node.getType()) {
case NodeType.LIST:
case NodeType.ALT:
ConsAltNode can = (ConsAltNode)node;
do {
final int v = quantifiersMemoryInfo(can.car);
if (v > info) {
info = v;
}
} while ((can = can.cdr) != null);
break;
case NodeType.QTFR:
final QuantifierNode qn = (QuantifierNode)node;
if (qn.upper != 0) {
info = quantifiersMemoryInfo(qn.target);
}
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
switch (en.type) {
case EncloseType.MEMORY:
return TargetInfo.IS_EMPTY_MEM;
case EncloseType.OPTION:
case EncloseNode.STOP_BACKTRACK:
info = quantifiersMemoryInfo(en.target);
break;
default:
break;
} // inner switch
break;
case NodeType.BREF:
case NodeType.STR:
case NodeType.CTYPE:
case NodeType.CCLASS:
case NodeType.CANY:
case NodeType.ANCHOR:
default:
break;
} // switch
return info;
}
private int getMinMatchLength(final Node node) {
int min = 0;
switch (node.getType()) {
case NodeType.BREF:
final BackRefNode br = (BackRefNode)node;
if (br.isRecursion()) {
break;
}
if (br.backRef > env.numMem) {
throw new ValueException(ERR_INVALID_BACKREF);
}
min = getMinMatchLength(env.memNodes[br.backRef]);
break;
case NodeType.LIST:
ConsAltNode can = (ConsAltNode)node;
do {
min += getMinMatchLength(can.car);
} while ((can = can.cdr) != null);
break;
case NodeType.ALT:
ConsAltNode y = (ConsAltNode)node;
do {
final Node x = y.car;
final int tmin = getMinMatchLength(x);
if (y == node) {
min = tmin;
} else if (min > tmin) {
min = tmin;
}
} while ((y = y.cdr) != null);
break;
case NodeType.STR:
min = ((StringNode)node).length();
break;
case NodeType.CTYPE:
min = 1;
break;
case NodeType.CCLASS:
case NodeType.CANY:
min = 1;
break;
case NodeType.QTFR:
final QuantifierNode qn = (QuantifierNode)node;
if (qn.lower > 0) {
min = getMinMatchLength(qn.target);
min = MinMaxLen.distanceMultiply(min, qn.lower);
}
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
switch (en.type) {
case EncloseType.MEMORY:
if (en.isMinFixed()) {
min = en.minLength;
} else {
min = getMinMatchLength(en.target);
en.minLength = min;
en.setMinFixed();
}
break;
case EncloseType.OPTION:
case EncloseType.STOP_BACKTRACK:
min = getMinMatchLength(en.target);
break;
default:
break;
} // inner switch
break;
case NodeType.ANCHOR:
default:
break;
} // switch
return min;
}
private int getMaxMatchLength(final Node node) {
int max = 0;
switch (node.getType()) {
case NodeType.LIST:
ConsAltNode ln = (ConsAltNode)node;
do {
final int tmax = getMaxMatchLength(ln.car);
max = MinMaxLen.distanceAdd(max, tmax);
} while ((ln = ln.cdr) != null);
break;
case NodeType.ALT:
ConsAltNode an = (ConsAltNode)node;
do {
final int tmax = getMaxMatchLength(an.car);
if (max < tmax) {
max = tmax;
}
} while ((an = an.cdr) != null);
break;
case NodeType.STR:
max = ((StringNode)node).length();
break;
case NodeType.CTYPE:
max = 1;
break;
case NodeType.CCLASS:
case NodeType.CANY:
max = 1;
break;
case NodeType.BREF:
final BackRefNode br = (BackRefNode)node;
if (br.isRecursion()) {
max = MinMaxLen.INFINITE_DISTANCE;
break;
}
if (br.backRef > env.numMem) {
throw new ValueException(ERR_INVALID_BACKREF);
}
final int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
if (max < tmax) {
max = tmax;
}
break;
case NodeType.QTFR:
final QuantifierNode qn = (QuantifierNode)node;
if (qn.upper != 0) {
max = getMaxMatchLength(qn.target);
if (max != 0) {
if (!isRepeatInfinite(qn.upper)) {
max = MinMaxLen.distanceMultiply(max, qn.upper);
} else {
max = MinMaxLen.INFINITE_DISTANCE;
}
}
}
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
switch (en.type) {
case EncloseType.MEMORY:
if (en.isMaxFixed()) {
max = en.maxLength;
} else {
max = getMaxMatchLength(en.target);
en.maxLength = max;
en.setMaxFixed();
}
break;
case EncloseType.OPTION:
case EncloseType.STOP_BACKTRACK:
max = getMaxMatchLength(en.target);
break;
default:
break;
} // inner switch
break;
case NodeType.ANCHOR:
default:
break;
} // switch
return max;
}
private static final int GET_CHAR_LEN_VARLEN = -1;
private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2;
protected final int getCharLengthTree(final Node node) {
return getCharLengthTree(node, 0);
}
private int getCharLengthTree(final Node node, final int levelp) {
final int level = levelp + 1;
int len = 0;
returnCode = 0;
switch(node.getType()) {
case NodeType.LIST:
ConsAltNode ln = (ConsAltNode)node;
do {
final int tlen = getCharLengthTree(ln.car, level);
if (returnCode == 0) {
len = MinMaxLen.distanceAdd(len, tlen);
}
} while (returnCode == 0 && (ln = ln.cdr) != null);
break;
case NodeType.ALT:
ConsAltNode an = (ConsAltNode)node;
boolean varLen = false;
int tlen = getCharLengthTree(an.car, level);
while (returnCode == 0 && (an = an.cdr) != null) {
final int tlen2 = getCharLengthTree(an.car, level);
if (returnCode == 0) {
if (tlen != tlen2) {
varLen = true;
}
}
}
if (returnCode == 0) {
if (varLen) {
if (level == 1) {
returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
} else {
returnCode = GET_CHAR_LEN_VARLEN;
}
} else {
len = tlen;
}
}
break;
case NodeType.STR:
final StringNode sn = (StringNode)node;
len = sn.length();
break;
case NodeType.QTFR:
final QuantifierNode qn = (QuantifierNode)node;
if (qn.lower == qn.upper) {
tlen = getCharLengthTree(qn.target, level);
if (returnCode == 0) {
len = MinMaxLen.distanceMultiply(tlen, qn.lower);
}
} else {
returnCode = GET_CHAR_LEN_VARLEN;
}
break;
case NodeType.CTYPE:
case NodeType.CCLASS:
case NodeType.CANY:
len = 1;
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
switch(en.type) {
case EncloseType.MEMORY:
if (en.isCLenFixed()) {
len = en.charLength;
} else {
len = getCharLengthTree(en.target, level);
if (returnCode == 0) {
en.charLength = len;
en.setCLenFixed();
}
}
break;
case EncloseType.OPTION:
case EncloseType.STOP_BACKTRACK:
len = getCharLengthTree(en.target, level);
break;
default:
break;
} // inner switch
break;
case NodeType.ANCHOR:
break;
default:
returnCode = GET_CHAR_LEN_VARLEN;
} // switch
return len;
}
/* x is not included y ==> 1 : 0 */
private static boolean isNotIncluded(final Node xn, final Node yn) {
Node x = xn;
Node y = yn;
Node tmp;
// !retry:!
retry: while(true) {
final int yType = y.getType();
switch(x.getType()) {
case NodeType.CTYPE:
switch(yType) {
case NodeType.CCLASS:
// !swap:!
tmp = x;
x = y;
y = tmp;
// !goto retry;!
continue retry;
case NodeType.STR:
// !goto swap;!
tmp = x;
x = y;
y = tmp;
continue retry;
default:
break;
} // inner switch
break;
case NodeType.CCLASS:
final CClassNode xc = (CClassNode)x;
switch(yType) {
case NodeType.CCLASS:
final CClassNode yc = (CClassNode)y;
for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
boolean v = xc.bs.at(i);
if ((v && !xc.isNot()) || (!v && xc.isNot())) {
v = yc.bs.at(i);
if ((v && !yc.isNot()) || (!v && yc.isNot())) {
return false;
}
}
}
if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) {
return true;
}
return false;
// break; not reached
case NodeType.STR:
// !goto swap;!
tmp = x;
x = y;
y = tmp;
continue retry;
default:
break;
} // inner switch
break; // case NodeType.CCLASS
case NodeType.STR:
final StringNode xs = (StringNode)x;
if (xs.length() == 0) {
break;
}
switch (yType) {
case NodeType.CCLASS:
final CClassNode cc = (CClassNode)y;
final int code = xs.chars[xs.p];
return !cc.isCodeInCC(code);
case NodeType.STR:
final StringNode ys = (StringNode)y;
int len = xs.length();
if (len > ys.length()) {
len = ys.length();
}
if (xs.isAmbig() || ys.isAmbig()) {
/* tiny version */
return false;
}
for (int i=0, pt=ys.p, q=xs.p; i<len; i++, pt++, q++) {
if (ys.chars[pt] != xs.chars[q]) {
return true;
}
}
break;
default:
break;
} // inner switch
break; // case NodeType.STR
default:
break;
} // switch
break;
} // retry: while
return false;
}
private Node getHeadValueNode(final Node node, final boolean exact) {
Node n = null;
switch(node.getType()) {
case NodeType.BREF:
case NodeType.ALT:
case NodeType.CANY:
break;
case NodeType.CTYPE:
case NodeType.CCLASS:
if (!exact) {
n = node;
}
break;
case NodeType.LIST:
n = getHeadValueNode(((ConsAltNode)node).car, exact);
break;
case NodeType.STR:
final StringNode sn = (StringNode)node;
if (sn.end <= sn.p)
{
break; // ???
}
if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
// nothing
} else {
n = node;
}
break;
case NodeType.QTFR:
final QuantifierNode qn = (QuantifierNode)node;
if (qn.lower > 0) {
if (qn.headExact != null) {
n = qn.headExact;
} else {
n = getHeadValueNode(qn.target, exact);
}
}
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
switch (en.type) {
case EncloseType.OPTION:
final int options = regex.options;
regex.options = en.option;
n = getHeadValueNode(en.target, exact);
regex.options = options;
break;
case EncloseType.MEMORY:
case EncloseType.STOP_BACKTRACK:
n = getHeadValueNode(en.target, exact);
break;
default:
break;
} // inner switch
break;
case NodeType.ANCHOR:
final AnchorNode an = (AnchorNode)node;
if (an.type == AnchorType.PREC_READ) {
n = getHeadValueNode(an.target, exact);
}
break;
default:
break;
} // switch
return n;
}
// true: invalid
private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) {
if ((node.getType2Bit() & typeMask) == 0) {
return true;
}
boolean invalid = false;
switch(node.getType()) {
case NodeType.LIST:
case NodeType.ALT:
ConsAltNode can = (ConsAltNode)node;
do {
invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
} while (!invalid && (can = can.cdr) != null);
break;
case NodeType.QTFR:
invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
if ((en.type & encloseMask) == 0) {
return true;
}
invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
break;
case NodeType.ANCHOR:
final AnchorNode an = (AnchorNode)node;
if ((an.type & anchorMask) == 0) {
return true;
}
if (an.target != null) {
invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
}
break;
default:
break;
} // switch
return invalid;
}
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(?<!A|B) ==> (?<!A)(?<!B)
*/
private Node divideLookBehindAlternatives(final Node nodep) {
Node node = nodep;
final AnchorNode an = (AnchorNode)node;
final int anchorType = an.type;
Node head = an.target;
Node np = ((ConsAltNode)head).car;
swap(node, head);
final Node tmp = node;
node = head;
head = tmp;
((ConsAltNode)node).setCar(head);
((AnchorNode)head).setTarget(np);
np = node;
while ((np = ((ConsAltNode)np).cdr) != null) {
final AnchorNode insert = new AnchorNode(anchorType);
insert.setTarget(((ConsAltNode)np).car);
((ConsAltNode)np).setCar(insert);
}
if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
np = node;
do {
((ConsAltNode)np).toListNode(); /* alt -> list */
} while ((np = ((ConsAltNode)np).cdr) != null);
}
return node;
}
private Node setupLookBehind(final Node node) {
final AnchorNode an = (AnchorNode)node;
final int len = getCharLengthTree(an.target);
switch (returnCode) {
case 0:
an.charLength = len;
break;
case GET_CHAR_LEN_VARLEN:
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
case GET_CHAR_LEN_TOP_ALT_VARLEN:
if (syntax.differentLengthAltLookBehind()) {
return divideLookBehindAlternatives(node);
}
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
default:
break;
}
return node;
}
private void nextSetup(final Node nodep, final Node nextNode) {
Node node = nodep;
// retry:
retry: while(true) {
final int type = node.getType();
if (type == NodeType.QTFR) {
final QuantifierNode qn = (QuantifierNode)node;
if (qn.greedy && isRepeatInfinite(qn.upper)) {
if (Config.USE_QTFR_PEEK_NEXT) {
final StringNode n = (StringNode)getHeadValueNode(nextNode, true);
/* '\0': for UTF-16BE etc... */
if (n != null && n.chars[n.p] != 0) { // ?????????
qn.nextHeadExact = n;
}
} // USE_QTFR_PEEK_NEXT
/* automatic posseivation a*b ==> (?>a*)b */
if (qn.lower <= 1) {
if (qn.target.isSimple()) {
final Node x = getHeadValueNode(qn.target, false);
if (x != null) {
final Node y = getHeadValueNode(nextNode, false);
if (y != null && isNotIncluded(x, y)) {
final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
en.setStopBtSimpleRepeat();
//en.setTarget(qn.target); // optimize it ??
swap(node, en);
en.setTarget(node);
}
}
}
}
}
} else if (type == NodeType.ENCLOSE) {
final EncloseNode en = (EncloseNode)node;
if (en.isMemory()) {
node = en.target;
// !goto retry;!
continue retry;
}
}
break;
} // while
}
private void updateStringNodeCaseFoldMultiByte(final StringNode sn) {
final char[] ch = sn.chars;
final int end = sn.end;
value = sn.p;
int sp = 0;
char buf;
while (value < end) {
final int ovalue = value;
buf = EncodingHelper.toLowerCase(ch[value++]);
if (ch[ovalue] != buf) {
char[] sbuf = new char[sn.length() << 1];
System.arraycopy(ch, sn.p, sbuf, 0, ovalue - sn.p);
value = ovalue;
while (value < end) {
buf = EncodingHelper.toLowerCase(ch[value++]);
if (sp >= sbuf.length) {
final char[]tmp = new char[sbuf.length << 1];
System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
sbuf = tmp;
}
sbuf[sp++] = buf;
}
sn.set(sbuf, 0, sp);
return;
}
sp++;
}
}
private void updateStringNodeCaseFold(final Node node) {
final StringNode sn = (StringNode)node;
updateStringNodeCaseFoldMultiByte(sn);
}
private Node expandCaseFoldMakeRemString(final char[] ch, final int pp, final int end) {
final StringNode node = new StringNode(ch, pp, end);
updateStringNodeCaseFold(node);
node.setAmbig();
node.setDontGetOptInfo();
return node;
}
private static boolean expandCaseFoldStringAlt(final int itemNum, final char[] items,
final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) {
ConsAltNode altNode;
node.p = altNode = newAltNode(null, null);
StringNode snode = new StringNode(chars, p, p + slen);
altNode.setCar(snode);
for (int i=0; i<itemNum; i++) {
snode = new StringNode();
snode.catCode(items[i]);
final ConsAltNode an = newAltNode(null, null);
an.setCar(snode);
altNode.setCdr(an);
altNode = an;
}
return false;
}
private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
private Node expandCaseFoldString(final Node node) {
final StringNode sn = (StringNode)node;
if (sn.isAmbig() || sn.length() <= 0) {
return node;
}
final char[] chars1 = sn.chars;
int pt = sn.p;
final int end = sn.end;
int altNum = 1;
ConsAltNode topRoot = null, r = null;
@SuppressWarnings("unused")
final ObjPtr<Node> prevNode = new ObjPtr<Node>();
StringNode stringNode = null;
while (pt < end) {
final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars1[pt]);
if (items.length == 0) {
if (stringNode == null) {
if (r == null && prevNode.p != null) {
topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
}
prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL);
if (r != null) {
ConsAltNode.listAdd(r, stringNode);
}
}
stringNode.cat(chars1, pt, pt + 1);
} else {
altNum *= (items.length + 1);
if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
break;
}
if (r == null && prevNode.p != null) {
topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
}
expandCaseFoldStringAlt(items.length, items, chars1, pt, 1, end, prevNode);
if (r != null) {
ConsAltNode.listAdd(r, prevNode.p);
}
stringNode = null;
}
pt++;
}
if (pt < end) {
final Node srem = expandCaseFoldMakeRemString(chars1, pt, end);
if (prevNode.p != null && r == null) {
topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
}
if (r == null) {
prevNode.p = srem;
} else {
ConsAltNode.listAdd(r, srem);
}
}
/* ending */
final Node xnode = topRoot != null ? topRoot : prevNode.p;
swap(node, xnode);
return xnode;
}
private static final int IN_ALT = (1<<0);
private static final int IN_NOT = (1<<1);
private static final int IN_REPEAT = (1<<2);
private static final int IN_VAR_REPEAT = (1<<3);
private static final int EXPAND_STRING_MAX_LENGTH = 100;
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
5. find invalid patterns in look-behind.
6. expand repeated string.
*/
protected final Node setupTree(final Node nodep, final int statep) {
Node node = nodep;
int state = statep;
restart: while (true) {
switch (node.getType()) {
case NodeType.LIST:
ConsAltNode lin = (ConsAltNode)node;
Node prev = null;
do {
setupTree(lin.car, state);
if (prev != null) {
nextSetup(prev, lin.car);
}
prev = lin.car;
} while ((lin = lin.cdr) != null);
break;
case NodeType.ALT:
ConsAltNode aln = (ConsAltNode)node;
do {
setupTree(aln.car, (state | IN_ALT));
} while ((aln = aln.cdr) != null);
break;
case NodeType.CCLASS:
break;
case NodeType.STR:
if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
node = expandCaseFoldString(node);
}
break;
case NodeType.CTYPE:
case NodeType.CANY:
break;
case NodeType.BREF:
final BackRefNode br = (BackRefNode)node;
if (br.backRef > env.numMem) {
throw new ValueException(ERR_INVALID_BACKREF);
}
env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
break;
case NodeType.QTFR:
final QuantifierNode qn = (QuantifierNode)node;
Node target = qn.target;
if ((state & IN_REPEAT) != 0) {
qn.setInRepeat();
}
if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
final int d = getMinMatchLength(target);
if (d == 0) {
qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
final int info = quantifiersMemoryInfo(target);
if (info > 0) {
qn.targetEmptyInfo = info;
}
} // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
// strange stuff here (turned off)
}
}
state |= IN_REPEAT;
if (qn.lower != qn.upper) {
state |= IN_VAR_REPEAT;
}
target = setupTree(target, state);
/* expand string */
if (target.getType() == NodeType.STR) {
if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
final StringNode sn = (StringNode)target;
final int len = sn.length();
if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
final StringNode str = qn.convertToString(sn.flag);
final int n = qn.lower;
for (int i = 0; i < n; i++) {
str.cat(sn.chars, sn.p, sn.end);
}
break; /* break case NT_QTFR: */
}
}
}
if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
if (qn.greedy && qn.targetEmptyInfo != 0) {
if (target.getType() == NodeType.QTFR) {
final QuantifierNode tqn = (QuantifierNode)target;
if (tqn.headExact != null) {
qn.headExact = tqn.headExact;
tqn.headExact = null;
}
} else {
qn.headExact = getHeadValueNode(qn.target, true);
}
}
} // USE_OP_PUSH_OR_JUMP_EXACT
break;
case NodeType.ENCLOSE:
final EncloseNode en = (EncloseNode)node;
switch (en.type) {
case EncloseType.OPTION:
final int options = regex.options;
regex.options = en.option;
setupTree(en.target, state);
regex.options = options;
break;
case EncloseType.MEMORY:
if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
/* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
}
setupTree(en.target, state);
break;
case EncloseType.STOP_BACKTRACK:
setupTree(en.target, state);
if (en.target.getType() == NodeType.QTFR) {
final QuantifierNode tqn = (QuantifierNode)en.target;
if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
/* (?>a*), a*+ etc... */
if (tqn.target.isSimple()) {
en.setStopBtSimpleRepeat();
}
}
}
break;
default:
break;
} // inner switch
break;
case NodeType.ANCHOR:
final AnchorNode an = (AnchorNode)node;
switch (an.type) {
case AnchorType.PREC_READ:
setupTree(an.target, state);
break;
case AnchorType.PREC_READ_NOT:
setupTree(an.target, (state | IN_NOT));
break;
case AnchorType.LOOK_BEHIND:
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
node = setupLookBehind(node);
if (node.getType() != NodeType.ANCHOR) {
continue restart;
}
setupTree(((AnchorNode)node).target, state);
break;
case AnchorType.LOOK_BEHIND_NOT:
if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
}
node = setupLookBehind(node);
if (node.getType() != NodeType.ANCHOR) {
continue restart;
}
setupTree(((AnchorNode)node).target, (state | IN_NOT));
break;
default:
break;
} // inner switch
break;
default:
break;
} // switch
return node;
} // restart: while
}
private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5;
private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd
opt.clear();
opt.setBoundNode(oenv.mmd);
switch (node.getType()) {
case NodeType.LIST: {
final OptEnvironment nenv = new OptEnvironment();
final NodeOptInfo nopt = new NodeOptInfo();
nenv.copy(oenv);
ConsAltNode lin = (ConsAltNode)node;
do {
optimizeNodeLeft(lin.car, nopt, nenv);
nenv.mmd.add(nopt.length);
opt.concatLeftNode(nopt);
} while ((lin = lin.cdr) != null);
break;
}
case NodeType.ALT: {
final NodeOptInfo nopt = new NodeOptInfo();
ConsAltNode aln = (ConsAltNode)node;
do {
optimizeNodeLeft(aln.car, nopt, oenv);
if (aln == node) {
opt.copy(nopt);
} else {
opt.altMerge(nopt, oenv);
}
} while ((aln = aln.cdr) != null);
break;
}
case NodeType.STR: {
final StringNode sn = (StringNode)node;
final int slen = sn.length();
if (!sn.isAmbig()) {
opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
if (slen > 0) {
opt.map.addChar(sn.chars[sn.p]);
}
opt.length.set(slen, slen);
} else {
int max;
if (sn.isDontGetOptInfo()) {
max = sn.length();
} else {
opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
opt.exb.ignoreCase = true;
if (slen > 0) {
opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag);
}
max = slen;
}
opt.length.set(slen, max);
}
if (opt.exb.length == slen) {
opt.exb.reachEnd = true;
}
break;
}
case NodeType.CCLASS: {
final CClassNode cc = (CClassNode)node;
/* no need to check ignore case. (setted in setup_tree()) */
if (cc.mbuf != null || cc.isNot()) {
opt.length.set(1, 1);
} else {
for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
final boolean z = cc.bs.at(i);
if ((z && !cc.isNot()) || (!z && cc.isNot())) {
opt.map.addChar(i);
}
}
opt.length.set(1, 1);
}
break;
}
case NodeType.CANY: {
opt.length.set(1, 1);
break;
}
case NodeType.ANCHOR: {
final AnchorNode an = (AnchorNode)node;
switch (an.type) {
case AnchorType.BEGIN_BUF:
case AnchorType.BEGIN_POSITION:
case AnchorType.BEGIN_LINE:
case AnchorType.END_BUF:
case AnchorType.SEMI_END_BUF:
case AnchorType.END_LINE:
opt.anchor.add(an.type);
break;
case AnchorType.PREC_READ:
final NodeOptInfo nopt = new NodeOptInfo();
optimizeNodeLeft(an.target, nopt, oenv);
if (nopt.exb.length > 0) {
opt.expr.copy(nopt.exb);
} else if (nopt.exm.length > 0) {
opt.expr.copy(nopt.exm);
}
opt.expr.reachEnd = false;
if (nopt.map.value > 0) {
opt.map.copy(nopt.map);
}
break;
case AnchorType.PREC_READ_NOT:
case AnchorType.LOOK_BEHIND: /* Sorry, I can't make use of it. */
case AnchorType.LOOK_BEHIND_NOT:
break;
default:
break;
} // inner switch
break;
}
case NodeType.BREF: {
final BackRefNode br = (BackRefNode)node;
if (br.isRecursion()) {
opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
break;
}
final Node[]nodes = oenv.scanEnv.memNodes;
final int min = getMinMatchLength(nodes[br.backRef]);
final int max = getMaxMatchLength(nodes[br.backRef]);
opt.length.set(min, max);
break;
}
case NodeType.QTFR: {
final NodeOptInfo nopt = new NodeOptInfo();
final QuantifierNode qn = (QuantifierNode)node;
optimizeNodeLeft(qn.target, nopt, oenv);
if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
if (isMultiline(oenv.options)) {
opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
} else {
opt.anchor.add(AnchorType.ANYCHAR_STAR);
}
}
} else {
if (qn.lower > 0) {
opt.copy(nopt);
if (nopt.exb.length > 0) {
if (nopt.exb.reachEnd) {
int i;
for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) {
opt.exb.concat(nopt.exb);
}
if (i < qn.lower) {
opt.exb.reachEnd = false;
}
}
}
if (qn.lower != qn.upper) {
opt.exb.reachEnd = false;
opt.exm.reachEnd = false;
}
if (qn.lower > 1) {
opt.exm.reachEnd = false;
}
}
}
final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
int max;
if (isRepeatInfinite(qn.upper)) {
max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
} else {
max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
}
opt.length.set(min, max);
break;
}
case NodeType.ENCLOSE: {
final EncloseNode en = (EncloseNode)node;
switch (en.type) {
case EncloseType.OPTION:
final int save = oenv.options;
oenv.options = en.option;
optimizeNodeLeft(en.target, opt, oenv);
oenv.options = save;
break;
case EncloseType.MEMORY:
if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
int min = 0;
int max = MinMaxLen.INFINITE_DISTANCE;
if (en.isMinFixed()) {
min = en.minLength;
}
if (en.isMaxFixed()) {
max = en.maxLength;
}
opt.length.set(min, max);
} else { // USE_SUBEXP_CALL
optimizeNodeLeft(en.target, opt, oenv);
if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
}
}
}
break;
case EncloseType.STOP_BACKTRACK:
optimizeNodeLeft(en.target, opt, oenv);
break;
default:
break;
} // inner switch
break;
}
default:
throw new InternalException(ERR_PARSER_BUG);
} // switch
}
@SuppressWarnings("unused")
protected final void setOptimizedInfoFromTree(final Node node) {
final NodeOptInfo opt = new NodeOptInfo();
final OptEnvironment oenv = new OptEnvironment();
oenv.options = regex.options;
oenv.caseFoldFlag = regex.caseFoldFlag;
oenv.scanEnv = env;
oenv.mmd.clear(); // ??
optimizeNodeLeft(node, opt, oenv);
regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
AnchorType.BEGIN_POSITION |
AnchorType.ANYCHAR_STAR |
AnchorType.ANYCHAR_STAR_ML);
regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
AnchorType.SEMI_END_BUF);
if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
regex.anchorDmin = opt.length.min;
regex.anchorDmax = opt.length.max;
}
if (opt.exb.length > 0 || opt.exm.length > 0) {
opt.exb.select(opt.exm);
if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
// !goto set_map;!
regex.setOptimizeMapInfo(opt.map);
regex.setSubAnchor(opt.map.anchor);
} else {
regex.setExactInfo(opt.exb);
regex.setSubAnchor(opt.exb.anchor);
}
} else if (opt.map.value > 0) {
// !set_map:!
regex.setOptimizeMapInfo(opt.map);
regex.setSubAnchor(opt.map.anchor);
} else {
regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
if (opt.length.max == 0) {
regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
}
}
if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
Config.log.println(regex.optimizeInfoToString());
}
}
}