blob: bcf7aa079a995caeebb5ed974d86d348869e8c6b [file] [log] [blame]
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package jdk.nashorn.internal.runtime.regexp.joni.ast;
import jdk.nashorn.internal.runtime.regexp.joni.BitSet;
import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer;
import jdk.nashorn.internal.runtime.regexp.joni.Config;
import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper;
import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
@SuppressWarnings("javadoc")
public final class CClassNode extends Node {
private static final int FLAG_NCCLASS_NOT = 1<<0;
private static final int FLAG_NCCLASS_SHARE = 1<<1;
int flags;
public final BitSet bs = new BitSet(); // conditional creation ?
public CodeRangeBuffer mbuf; /* multi-byte info or NULL */
private int ctype; // for hashing purposes
private final static short AsciiCtypeTable[] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
// node_new_cclass
public CClassNode() {}
public void clear() {
bs.clear();
flags = 0;
mbuf = null;
}
@Override
public int getType() {
return CCLASS;
}
@Override
public String getName() {
return "Character Class";
}
@Override
public boolean equals(final Object other) {
if (!(other instanceof CClassNode)) {
return false;
}
final CClassNode cc = (CClassNode)other;
return ctype == cc.ctype && isNot() == cc.isNot();
}
@Override
public int hashCode() {
if (Config.USE_SHARED_CCLASS_TABLE) {
int hash = 0;
hash += ctype;
if (isNot()) {
hash++;
}
return hash + (hash >> 5);
}
return super.hashCode();
}
@Override
public String toString(final int level) {
final StringBuilder value = new StringBuilder();
value.append("\n flags: " + flagsToString());
value.append("\n bs: " + pad(bs, level + 1));
value.append("\n mbuf: " + pad(mbuf, level + 1));
return value.toString();
}
public String flagsToString() {
final StringBuilder f = new StringBuilder();
if (isNot()) {
f.append("NOT ");
}
if (isShare()) {
f.append("SHARE ");
}
return f.toString();
}
public boolean isEmpty() {
return mbuf == null && bs.isEmpty();
}
public void addCodeRangeToBuf(final int from, final int to) {
mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
}
public void addCodeRange(final ScanEnvironment env, final int from, final int to) {
mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
}
public void addAllMultiByteRange() {
mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
}
public void clearNotFlag() {
if (isNot()) {
bs.invert();
mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
clearNot();
}
}
// and_cclass
public void and(final CClassNode other) {
final boolean not1 = isNot();
BitSet bsr1 = bs;
final CodeRangeBuffer buf1 = mbuf;
final boolean not2 = other.isNot();
BitSet bsr2 = other.bs;
final CodeRangeBuffer buf2 = other.mbuf;
if (not1) {
final BitSet bs1 = new BitSet();
bsr1.invertTo(bs1);
bsr1 = bs1;
}
if (not2) {
final BitSet bs2 = new BitSet();
bsr2.invertTo(bs2);
bsr2 = bs2;
}
bsr1.and(bsr2);
if (bsr1 != bs) {
bs.copy(bsr1);
bsr1 = bs;
}
if (not1) {
bs.invert();
}
CodeRangeBuffer pbuf = null;
if (not1 && not2) {
pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
} else {
pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
if (not1) {
pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
}
}
mbuf = pbuf;
}
// or_cclass
public void or(final CClassNode other) {
final boolean not1 = isNot();
BitSet bsr1 = bs;
final CodeRangeBuffer buf1 = mbuf;
final boolean not2 = other.isNot();
BitSet bsr2 = other.bs;
final CodeRangeBuffer buf2 = other.mbuf;
if (not1) {
final BitSet bs1 = new BitSet();
bsr1.invertTo(bs1);
bsr1 = bs1;
}
if (not2) {
final BitSet bs2 = new BitSet();
bsr2.invertTo(bs2);
bsr2 = bs2;
}
bsr1.or(bsr2);
if (bsr1 != bs) {
bs.copy(bsr1);
bsr1 = bs;
}
if (not1) {
bs.invert();
}
CodeRangeBuffer pbuf = null;
if (not1 && not2) {
pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
} else {
pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
if (not1) {
pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
}
}
mbuf = pbuf;
}
// add_ctype_to_cc_by_range // Encoding out!
public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) {
final int n = mbr[0];
if (!not) {
for (int i=0; i<n; i++) {
for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
if (j >= sbOut) {
if (Config.VANILLA) {
if (j == mbr[i * 2 + 2]) {
i++;
} else if (j > mbr[i * 2 + 1]) {
addCodeRangeToBuf(j, mbr[i * 2 + 2]);
i++;
}
} else {
if (j >= mbr[i * 2 + 1]) {
addCodeRangeToBuf(j, mbr[i * 2 + 2]);
i++;
}
}
// !goto sb_end!, remove duplication!
for (; i<n; i++) {
addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
}
return;
}
bs.set(j);
}
}
// !sb_end:!
for (int i=0; i<n; i++) {
addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
}
} else {
int prev = 0;
for (int i=0; i<n; i++) {
for (int j=prev; j < mbr[2 * i + 1]; j++) {
if (j >= sbOut) {
// !goto sb_end2!, remove duplication
prev = sbOut;
for (i=0; i<n; i++) {
if (prev < mbr[2 * i + 1]) {
addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
}
prev = mbr[i * 2 + 2] + 1;
}
if (prev < 0x7fffffff/*!!!*/) {
addCodeRangeToBuf(prev, 0x7fffffff);
}
return;
}
bs.set(j);
}
prev = mbr[2 * i + 2] + 1;
}
for (int j=prev; j<sbOut; j++) {
bs.set(j);
}
// !sb_end2:!
prev = sbOut;
for (int i=0; i<n; i++) {
if (prev < mbr[2 * i + 1]) {
addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
}
prev = mbr[i * 2 + 2] + 1;
}
if (prev < 0x7fffffff/*!!!*/) {
addCodeRangeToBuf(prev, 0x7fffffff);
}
}
}
public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) {
int ct = ctp;
if (Config.NON_UNICODE_SDW) {
switch (ct) {
case CharacterType.D:
case CharacterType.S:
case CharacterType.W:
ct ^= CharacterType.SPECIAL_MASK;
if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) {
// \s in JavaScript includes unicode characters.
break;
}
if (not) {
for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
// if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
if ((AsciiCtypeTable[c] & (1 << ct)) == 0) {
bs.set(c);
}
}
addAllMultiByteRange();
} else {
for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
// if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
if ((AsciiCtypeTable[c] & (1 << ct)) != 0) {
bs.set(c);
}
}
}
return;
default:
break;
}
}
final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut);
if (ranges != null) {
addCTypeByRange(ct, not, sbOut.value, ranges);
return;
}
switch(ct) {
case CharacterType.ALPHA:
case CharacterType.BLANK:
case CharacterType.CNTRL:
case CharacterType.DIGIT:
case CharacterType.LOWER:
case CharacterType.PUNCT:
case CharacterType.SPACE:
case CharacterType.UPPER:
case CharacterType.XDIGIT:
case CharacterType.ASCII:
case CharacterType.ALNUM:
if (not) {
for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
if (!EncodingHelper.isCodeCType(c, ct)) {
bs.set(c);
}
}
addAllMultiByteRange();
} else {
for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
if (EncodingHelper.isCodeCType(c, ct)) {
bs.set(c);
}
}
}
break;
case CharacterType.GRAPH:
case CharacterType.PRINT:
if (not) {
for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
if (!EncodingHelper.isCodeCType(c, ct)) {
bs.set(c);
}
}
} else {
for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
if (EncodingHelper.isCodeCType(c, ct)) {
bs.set(c);
}
}
addAllMultiByteRange();
}
break;
case CharacterType.WORD:
if (!not) {
for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
if (EncodingHelper.isWord(c)) {
bs.set(c);
}
}
addAllMultiByteRange();
} else {
for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
if (!EncodingHelper.isWord(c)) {
bs.set(c);
}
}
}
break;
default:
throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
} // switch
}
public static final class CCStateArg {
public int v;
public int vs;
public boolean vsIsRaw;
public boolean vIsRaw;
public CCVALTYPE inType;
public CCVALTYPE type;
public CCSTATE state;
}
public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) {
if (arg.state == CCSTATE.RANGE) {
throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
}
if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
if (arg.type == CCVALTYPE.SB) {
bs.set(arg.vs);
} else if (arg.type == CCVALTYPE.CODE_POINT) {
addCodeRange(env, arg.vs, arg.vs);
}
}
arg.state = CCSTATE.VALUE;
arg.type = CCVALTYPE.CLASS;
}
public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) {
switch(arg.state) {
case VALUE:
if (arg.type == CCVALTYPE.SB) {
if (arg.vs > 0xff) {
throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
}
bs.set(arg.vs);
} else if (arg.type == CCVALTYPE.CODE_POINT) {
addCodeRange(env, arg.vs, arg.vs);
}
break;
case RANGE:
if (arg.inType == arg.type) {
if (arg.inType == CCVALTYPE.SB) {
if (arg.vs > 0xff || arg.v > 0xff) {
throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
}
if (arg.vs > arg.v) {
if (env.syntax.allowEmptyRangeInCC()) {
// goto ccs_range_end
arg.state = CCSTATE.COMPLETE;
break;
}
throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
}
bs.setRange(arg.vs, arg.v);
} else {
addCodeRange(env, arg.vs, arg.v);
}
} else {
if (arg.vs > arg.v) {
if (env.syntax.allowEmptyRangeInCC()) {
// goto ccs_range_end
arg.state = CCSTATE.COMPLETE;
break;
}
throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
}
bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
addCodeRange(env, arg.vs, arg.v);
}
// ccs_range_end:
arg.state = CCSTATE.COMPLETE;
break;
case COMPLETE:
case START:
arg.state = CCSTATE.VALUE;
break;
default:
break;
} // switch
arg.vsIsRaw = arg.vIsRaw;
arg.vs = arg.v;
arg.type = arg.inType;
}
// onig_is_code_in_cc_len
public boolean isCodeInCCLength(final int code) {
boolean found;
if (code > 0xff) {
found = mbuf != null && mbuf.isInCodeRange(code);
} else {
found = bs.at(code);
}
if (isNot()) {
return !found;
}
return found;
}
// onig_is_code_in_cc
public boolean isCodeInCC(final int code) {
return isCodeInCCLength(code);
}
public void setNot() {
flags |= FLAG_NCCLASS_NOT;
}
public void clearNot() {
flags &= ~FLAG_NCCLASS_NOT;
}
public boolean isNot() {
return (flags & FLAG_NCCLASS_NOT) != 0;
}
public void setShare() {
flags |= FLAG_NCCLASS_SHARE;
}
public void clearShare() {
flags &= ~FLAG_NCCLASS_SHARE;
}
public boolean isShare() {
return (flags & FLAG_NCCLASS_SHARE) != 0;
}
}