blob: 4aea4acb787bffdbfcf5a1315c7eaaa808e824c9 [file] [log] [blame]
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package jdk.nashorn.internal.runtime.regexp.joni;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
public abstract class Matcher extends IntHolder {
protected final Regex regex;
protected final char[] chars;
protected final int str;
protected final int end;
protected int msaStart;
protected int msaOptions;
protected final Region msaRegion;
protected int msaBestLen;
protected int msaBestS;
protected int msaBegin;
protected int msaEnd;
public Matcher(Regex regex, char[] chars) {
this(regex, chars, 0, chars.length);
}
public Matcher(Regex regex, char[] chars, int p, int end) {
this.regex = regex;
this.chars = chars;
this.str = p;
this.end = end;
this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
}
// main matching method
protected abstract int matchAt(int range, int sstart, int sprev);
protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum);
protected abstract void stateCheckBuffClear();
public final Region getRegion() {
return msaRegion;
}
public final Region getEagerRegion() {
return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd);
}
public final int getBegin() {
return msaBegin;
}
public final int getEnd() {
return msaEnd;
}
protected final void msaInit(int option, int start) {
msaOptions = option;
msaStart = start;
if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1;
}
public final int match(int at, int range, int option) {
msaInit(option, at);
if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
int offset = at = str;
stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction?
} // USE_COMBINATION_EXPLOSION_CHECK
int prev = EncodingHelper.prevCharHead(str, at);
if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
return matchAt(end /*range*/, at, prev);
} else {
return matchAt(range /*range*/, at, prev);
}
}
int low, high; // these are the return values
private boolean forwardSearchRange(char[] chars, int str, int end, int s, int range, IntHolder lowPrev) {
int pprev = -1;
int p = s;
if (Config.DEBUG_SEARCH) {
Config.log.println("forward_search_range: "+
"str: " + str +
", end: " + end +
", s: " + s +
", range: " + range);
}
if (regex.dMin > 0) {
p += regex.dMin;
}
retry:while (true) {
p = regex.searchAlgorithm.search(regex, chars, p, end, range);
if (p != -1 && p < range) {
if (p - regex.dMin < s) {
// retry_gate:
pprev = p;
p++;
continue retry;
}
if (regex.subAnchor != 0) {
switch (regex.subAnchor) {
case AnchorType.BEGIN_LINE:
if (p != str) {
int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
if (!EncodingHelper.isNewLine(chars, prev, end)) {
// goto retry_gate;
pprev = p;
p++;
continue retry;
}
}
break;
case AnchorType.END_LINE:
if (p == end) {
if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) {
// goto retry_gate;
pprev = p;
p++;
continue retry;
}
}
} else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
//if () break;
// goto retry_gate;
pprev = p;
p++;
continue retry;
}
break;
} // switch
}
if (regex.dMax == 0) {
low = p;
if (lowPrev != null) { // ??? // remove null checks
if (low > s) {
lowPrev.value = EncodingHelper.prevCharHead(s, p);
} else {
lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
}
}
} else {
if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
low = p - regex.dMax;
if (low > s) {
low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
if (lowPrev != null && lowPrev.value == -1) {
lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
}
} else {
if (lowPrev != null) {
lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low);
}
}
}
}
/* no needs to adjust *high, *high is used as range check only */
high = p - regex.dMin;
if (Config.DEBUG_SEARCH) {
Config.log.println("forward_search_range success: "+
"low: " + (low - str) +
", high: " + (high - str) +
", dmin: " + regex.dMin +
", dmax: " + regex.dMax);
}
return true; /* success */
}
return false; /* fail */
} //while
}
// low, high
private boolean backwardSearchRange(char[] chars, int str, int end, int s, int range, int adjrange) {
range += regex.dMin;
int p = s;
retry:while (true) {
p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range);
if (p != -1) {
if (regex.subAnchor != 0) {
switch (regex.subAnchor) {
case AnchorType.BEGIN_LINE:
if (p != str) {
int prev = EncodingHelper.prevCharHead(str, p);
if (!EncodingHelper.isNewLine(chars, prev, end)) {
p = prev;
continue retry;
}
}
break;
case AnchorType.END_LINE:
if (p == end) {
if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
int prev = EncodingHelper.prevCharHead(adjrange, p);
if (prev == -1) return false;
if (EncodingHelper.isNewLine(chars, prev, end)) {
p = prev;
continue retry;
}
}
} else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
p = EncodingHelper.prevCharHead(adjrange, p);
if (p == -1) return false;
continue retry;
}
break;
} // switch
}
/* no needs to adjust *high, *high is used as range check only */
if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
low = p - regex.dMax;
high = p - regex.dMin;
}
if (Config.DEBUG_SEARCH) {
Config.log.println("backward_search_range: "+
"low: " + (low - str) +
", high: " + (high - str));
}
return true;
}
if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail.");
return false;
} // while
}
// MATCH_AND_RETURN_CHECK
private boolean matchCheck(int upperRange, int s, int prev) {
if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
//range = upperRange;
if (matchAt(upperRange, s, prev) != -1) {
if (!isFindLongest(regex.options)) return true;
}
} else {
//range = upperRange;
if (matchAt(upperRange, s, prev) != -1) return true;
}
} else {
if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
if (matchAt(end, s, prev) != -1) {
//range = upperRange;
if (!isFindLongest(regex.options)) return true;
}
} else {
//range = upperRange;
if (matchAt(end, s, prev) != -1) return true;
}
}
return false;
}
public final int search(int start, int range, int option) {
int s, prev;
int origStart = start;
int origRange = range;
if (Config.DEBUG_SEARCH) {
Config.log.println("onig_search (entry point): "+
"str: " + str +
", end: " + (end - str) +
", start: " + (start - str) +
", range " + (range - str));
}
if (start > end || start < str) return -1;
/* anchor optimize: resume search range */
if (regex.anchor != 0 && str < end) {
int minSemiEnd, maxSemiEnd;
if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
/* search start-position only */
// !begin_position:!
if (range > start) {
range = start + 1;
} else {
range = start;
}
} else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
/* search str-position only */
if (range > start) {
if (start != str) return -1; // mismatch_no_msa;
range = str + 1;
} else {
if (range <= str) {
start = str;
range = str;
} else {
return -1; // mismatch_no_msa;
}
}
} else if ((regex.anchor & AnchorType.END_BUF) != 0) {
minSemiEnd = maxSemiEnd = end;
// !end_buf:!
if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
} else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
int preEnd = EncodingHelper.stepBack(str, end, 1);
maxSemiEnd = end;
if (EncodingHelper.isNewLine(chars, preEnd, end)) {
minSemiEnd = preEnd;
if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
preEnd = EncodingHelper.stepBack(str, preEnd, 1);
if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
minSemiEnd = preEnd;
}
}
if (minSemiEnd > str && start <= minSemiEnd) {
// !goto end_buf;!
if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
}
} else {
minSemiEnd = end;
// !goto end_buf;!
if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
}
} else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
// goto !begin_position;!
if (range > start) {
range = start + 1;
} else {
range = start;
}
}
} else if (str == end) { /* empty string */
// empty address ?
if (Config.DEBUG_SEARCH) {
Config.log.println("onig_search: empty string.");
}
if (regex.thresholdLength == 0) {
s = start = str;
prev = -1;
msaInit(option, start);
if (Config.USE_COMBINATION_EXPLOSION_CHECK) stateCheckBuffClear();
if (matchCheck(end, s, prev)) return match(s);
return mismatch();
}
return -1; // goto mismatch_no_msa;
}
if (Config.DEBUG_SEARCH) {
Config.log.println("onig_search(apply anchor): " +
"end: " + (end - str) +
", start " + (start - str) +
", range " + (range - str));
}
msaInit(option, origStart);
if (Config.USE_COMBINATION_EXPLOSION_CHECK) {
int offset = Math.min(start, range) - str;
stateCheckBuffInit(end - str, offset, regex.numCombExpCheck);
}
s = start;
if (range > start) { /* forward search */
if (s > str) {
prev = EncodingHelper.prevCharHead(str, s);
} else {
prev = 0; // -1
}
if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
int schRange = range;
if (regex.dMax != 0) {
if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
schRange = end;
} else {
schRange += regex.dMax;
if (schRange > end) schRange = end;
}
}
if ((end - start) < regex.thresholdLength) return mismatch();
if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
do {
if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev
if (s < low) {
s = low;
prev = value;
}
while (s <= high) {
if (matchCheck(origRange, s, prev)) return match(s); // ???
prev = s;
s++;
}
} while (s < range);
return mismatch();
} else { /* check only. */
if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch();
if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
do {
if (matchCheck(origRange, s, prev)) return match(s);
prev = s;
s++;
} while (s < range);
return mismatch();
}
}
}
do {
if (matchCheck(origRange, s, prev)) return match(s);
prev = s;
s++;
} while (s < range);
if (s == range) { /* because empty match with /$/. */
if (matchCheck(origRange, s, prev)) return match(s);
}
} else { /* backward search */
if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
if (origStart < end) {
origStart++; // /* is upper range */
}
}
if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
int adjrange;
if (range < end) {
adjrange = range;
} else {
adjrange = end;
}
if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
do {
int schStart = s + regex.dMax;
if (schStart > end) schStart = end;
if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high
if (s > high) s = high;
while (s != -1 && s >= low) {
prev = EncodingHelper.prevCharHead(str, s);
if (matchCheck(origStart, s, prev)) return match(s);
s = prev;
}
} while (s >= range);
return mismatch();
} else { /* check only. */
if ((end - range) < regex.thresholdLength) return mismatch();
int schStart = s;
if (regex.dMax != 0) {
if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
schStart = end;
} else {
schStart += regex.dMax;
if (schStart > end) {
schStart = end;
}
}
}
if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch();
}
}
do {
prev = EncodingHelper.prevCharHead(str, s);
if (matchCheck(origStart, s, prev)) return match(s);
s = prev;
} while (s >= range);
}
return mismatch();
}
private boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) {
if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa;
if (range > start) {
if ((minSemiEnd - start) > regex.anchorDmax) {
start = minSemiEnd - regex.anchorDmax;
if (start >= end) {
/* match with empty at end */
start = EncodingHelper.prevCharHead(str, end);
}
}
if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
range = maxSemiEnd - regex.anchorDmin + 1;
}
if (start >= range) return true; // mismatch_no_msa;
} else {
if ((minSemiEnd - range) > regex.anchorDmax) {
range = minSemiEnd - regex.anchorDmax;
}
if ((maxSemiEnd - start) < regex.anchorDmin) {
start = maxSemiEnd - regex.anchorDmin;
}
if (range > start) return true; // mismatch_no_msa;
}
return false;
}
private int match(int s) {
return s - str; // sstart ???
}
private int mismatch() {
if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
if (msaBestLen >= 0) {
int s = msaBestS;
return match(s);
}
}
// falls through finish:
return -1;
}
}