blob: bd335bb74b79d6f61c52535774dd1b55724dab96 [file] [log] [blame]
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package jdk.nashorn.internal.runtime.regexp.joni;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
@SuppressWarnings("javadoc")
public final class Regex implements RegexState {
int[] code; /* compiled pattern */
int codeLength;
boolean stackNeeded;
Object[] operands; /* e.g. shared CClassNode */
int operandLength;
int numMem; /* used memory(...) num counted from 1 */
int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
int captureHistory; /* (?@...) flag (1-31) */
int btMemStart; /* need backtrack flag */
int btMemEnd; /* need backtrack flag */
int stackPopLevel;
int[] repeatRangeLo;
int[] repeatRangeHi;
WarnCallback warnings;
MatcherFactory factory;
protected Analyser analyser;
int options;
final int caseFoldFlag;
/* optimization info (string search, char-map and anchors) */
SearchAlgorithm searchAlgorithm; /* optimize flag */
int thresholdLength; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
int anchorDmin; /* (SEMI_)END_BUF anchor distance */
int anchorDmax; /* (SEMI_)END_BUF anchor distance */
int subAnchor; /* start-anchor for exact or map */
char[] exact;
int exactP;
int exactEnd;
byte[] map; /* used as BM skip or char-map */
int[] intMap; /* BM skip for exact_len > 255 */
int[] intMapBackward; /* BM skip for backward search */
int dMin; /* min-distance of exact or map */
int dMax; /* max-distance of exact or map */
char[][] templates;
int templateNum;
public Regex(final CharSequence cs) {
this(cs.toString());
}
public Regex(final String str) {
this(str.toCharArray(), 0, str.length(), 0);
}
public Regex(final char[] chars) {
this(chars, 0, chars.length, 0);
}
public Regex(final char[] chars, final int p, final int end) {
this(chars, p, end, 0);
}
public Regex(final char[] chars, final int p, final int end, final int option) {
this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT);
}
// onig_new
public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) {
this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT);
}
public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) {
this(chars, p, end, option, Syntax.RUBY, warnings);
}
// onig_new
public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) {
this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings);
}
// onig_alloc_init
public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) {
int option = optionp;
if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
(Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
}
if ((option & Option.NEGATE_SINGLELINE) != 0) {
option |= syntax.options;
option &= ~Option.SINGLELINE;
} else {
option |= syntax.options;
}
this.options = option;
this.caseFoldFlag = caseFoldFlag;
this.warnings = warnings;
this.analyser = new Analyser(new ScanEnvironment(this, syntax), chars, p, end);
this.analyser.compile();
this.warnings = null;
}
public synchronized MatcherFactory compile() {
if (factory == null && analyser != null) {
new ArrayCompiler(analyser).compile();
analyser = null; // only do this once
}
assert factory != null;
return factory;
}
public Matcher matcher(final char[] chars) {
return matcher(chars, 0, chars.length);
}
public Matcher matcher(final char[] chars, final int p, final int end) {
MatcherFactory matcherFactory = factory;
if (matcherFactory == null) {
matcherFactory = compile();
}
return matcherFactory.create(this, chars, p, end);
}
public WarnCallback getWarnings() {
return warnings;
}
public int numberOfCaptures() {
return numMem;
}
/* set skip map for Boyer-Moor search */
void setupBMSkipMap() {
final char[] chars = exact;
final int p = exactP;
final int end = exactEnd;
final int len = end - p;
if (len < Config.CHAR_TABLE_SIZE) {
// map/skip
if (map == null) {
map = new byte[Config.CHAR_TABLE_SIZE];
}
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
map[i] = (byte)len;
}
for (int i=0; i<len-1; i++)
{
map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
}
} else {
if (intMap == null) {
intMap = new int[Config.CHAR_TABLE_SIZE];
}
for (int i=0; i<len-1; i++)
{
intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ??
}
}
}
void setExactInfo(final OptExactInfo e) {
if (e.length == 0) {
return;
}
// shall we copy that ?
exact = e.chars;
exactP = 0;
exactEnd = e.length;
if (e.ignoreCase) {
searchAlgorithm = new SearchAlgorithm.SLOW_IC(this);
} else {
if (e.length >= 2) {
setupBMSkipMap();
searchAlgorithm = SearchAlgorithm.BM;
} else {
searchAlgorithm = SearchAlgorithm.SLOW;
}
}
dMin = e.mmd.min;
dMax = e.mmd.max;
if (dMin != MinMaxLen.INFINITE_DISTANCE) {
thresholdLength = dMin + (exactEnd - exactP);
}
}
void setOptimizeMapInfo(final OptMapInfo m) {
map = m.map;
searchAlgorithm = SearchAlgorithm.MAP;
dMin = m.mmd.min;
dMax = m.mmd.max;
if (dMin != MinMaxLen.INFINITE_DISTANCE) {
thresholdLength = dMin + 1;
}
}
void setSubAnchor(final OptAnchorInfo anc) {
subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
}
void clearOptimizeInfo() {
searchAlgorithm = SearchAlgorithm.NONE;
anchor = 0;
anchorDmax = 0;
anchorDmin = 0;
subAnchor = 0;
exact = null;
exactP = exactEnd = 0;
}
public String optimizeInfoToString() {
final StringBuilder s = new StringBuilder();
s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor));
if ((anchor & AnchorType.END_BUF_MASK) != 0) {
s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
}
s.append("\n");
if (searchAlgorithm != SearchAlgorithm.NONE) {
s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
}
s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
s.append("threshold length: ").append(thresholdLength).append("\n");
if (exact != null) {
s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
} else if (searchAlgorithm == SearchAlgorithm.MAP) {
int n=0;
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
if (map[i] != 0) {
n++;
}
}
s.append("map: n = ").append(n).append("\n");
if (n > 0) {
int c=0;
s.append("[");
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
if (map[i] != 0) {
if (c > 0) {
s.append(", ");
}
c++;
// TODO if (enc.isPrint(i)
s.append((char)i);
}
}
s.append("]\n");
}
}
return s.toString();
}
public int getOptions() {
return options;
}
public String dumpTree() {
return analyser == null ? null : analyser.root.toString();
}
public String dumpByteCode() {
compile();
return new ByteCodePrinter(this).byteCodeListToString();
}
}