blob: d3e54947aeae2e8f6bb44e3c0ebfbd28949f95db [file] [log] [blame]
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package jdk.nashorn.internal.runtime.regexp.joni;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
import java.util.Arrays;
public class EncodingHelper {
public final static char NEW_LINE = 0xa;
public final static char RETURN = 0xd;
final static char[] EMPTYCHARS = new char[0];
final static int[][] codeRanges = new int[15][];
public static int digitVal(int code) {
return code - '0';
}
public static int odigitVal(int code) {
return digitVal(code);
}
public static boolean isXDigit(int code) {
return Character.isDigit(code) || (code >= 'a' && code <= 'f') || (code >= 'A' && code <= 'F');
}
public static int xdigitVal(int code) {
if (Character.isDigit(code)) {
return code - '0';
} else if (code >= 'a' && code <= 'f') {
return code - 'a' + 10;
} else {
return code - 'A' + 10;
}
}
public static boolean isDigit(int code) {
return code >= '0' && code <= '9';
}
public static boolean isWord(int code) {
// letter, digit, or '_'
return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
}
public static boolean isNewLine(int code) {
return code == NEW_LINE;
}
public static boolean isNewLine(char[] chars, int p, int end) {
return p < end && chars[p] == NEW_LINE;
}
public static boolean isCrnl(char[] chars, int p, int end) {
return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE;
}
// Encoding.prevCharHead
public static int prevCharHead(int p, int s) {
return s <= p ? -1 : s - 1;
}
/* onigenc_get_right_adjust_char_head_with_prev */
public static int rightAdjustCharHeadWithPrev(int s, IntHolder prev) {
if (prev != null) prev.value = -1; /* Sorry */
return s;
}
// Encoding.stepBack
public static int stepBack(int p, int s, int n) {
while (s != -1 && n-- > 0) {
if (s <= p) return -1;
s--;
}
return s;
}
/* onigenc_with_ascii_strncmp */
public static int strNCmp(char[] chars1, int p1, int end, char[] chars2, int p2, int n) {
while (n-- > 0) {
if (p1 >= end) return chars2[p2];
int c = chars1[p1];
int x = chars2[p2] - c;
if (x != 0) return x;
p2++;
p1++;
}
return 0;
}
public static int mbcToCode(byte[] bytes, int p, int end) {
int code = 0;
for (int i = p; i < end; i++) {
code = (code << 8) | (bytes[i] & 0xff);
}
return code;
}
public static int mbcodeStartPosition() {
return 0x80;
}
public static char[] caseFoldCodesByString(int flag, char c) {
if (Character.isUpperCase(c)) {
return new char[] {Character.toLowerCase(c)};
} else if (Character.isLowerCase(c)) {
return new char[] {Character.toUpperCase(c)};
} else {
return EMPTYCHARS;
}
}
public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) {
int[] code = new int[1];
for (int c = 0; c < 0xffff; c++) {
if (Character.getType(c) == Character.LOWERCASE_LETTER) {
int upper = code[0] = Character.toUpperCase(c);
fun.apply(c, code, 1, arg);
code[0] = c;
fun.apply(upper, code, 1, arg);
}
}
}
// CodeRange.isInCodeRange
public static boolean isInCodeRange(int[]p, int code) {
int low = 0;
int n = p[0];
int high = n;
while (low < high) {
int x = (low + high) >> 1;
if (code > p[(x << 1) + 2]) {
low = x + 1;
} else {
high = x;
}
}
return low < n && code >= p[(low << 1) + 1];
}
public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
sbOut.value = 0x100; // use bitset for codes smaller than 256
int[] range = null;
if (ctype < codeRanges.length) {
range = codeRanges[ctype];
if (range == null) {
// format: [numberOfRanges, rangeStart, rangeEnd, ...]
range = new int[16];
int rangeCount = 0;
int lastCode = -2;
for (int code = 0; code <= 0xffff; code++) {
if (isCodeCType(code, ctype)) {
if (lastCode < code -1) {
if (rangeCount * 2 + 2 >= range.length) {
range = Arrays.copyOf(range, range.length * 2);
}
range[rangeCount * 2 + 1] = code;
rangeCount++;
}
range[rangeCount * 2] = lastCode = code;
}
}
if (rangeCount * 2 + 1 < range.length) {
range = Arrays.copyOf(range, rangeCount * 2 + 1);
}
range[0] = rangeCount;
codeRanges[ctype] = range;
}
}
return range;
}
// CodeRange.isInCodeRange
public static boolean isInCodeRange(int[]p, int offset, int code) {
int low = 0;
int n = p[offset];
int high = n ;
while (low < high) {
int x = (low + high) >> 1;
if (code > p[(x << 1) + 2 + offset]) {
low = x + 1;
} else {
high = x;
}
}
return low < n && code >= p[(low << 1) + 1 + offset];
}
/**
* @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
*/
public static boolean isCodeCType(int code, int ctype) {
int type;
switch (ctype) {
case CharacterType.NEWLINE:
return code == EncodingHelper.NEW_LINE;
case CharacterType.ALPHA:
return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
case CharacterType.BLANK:
return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
case CharacterType.CNTRL:
type = Character.getType(code);
return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
case CharacterType.DIGIT:
return EncodingHelper.isDigit(code);
case CharacterType.GRAPH:
switch (code) {
case 0x09:
case 0x0a:
case 0x0b:
case 0x0c:
case 0x0d:
return false;
default:
type = Character.getType(code);
return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
}
case CharacterType.LOWER:
return Character.isLowerCase(code);
case CharacterType.PRINT:
type = Character.getType(code);
return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
case CharacterType.PUNCT:
return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
case CharacterType.SPACE:
// ECMA 7.2 and 7.3
switch (code) {
case 0x09:
case 0x0a:
case 0x0b:
case 0x0c:
case 0x0d:
return true;
default:
// true if Unicode separator or BOM
return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
}
case CharacterType.UPPER:
return Character.isUpperCase(code);
case CharacterType.XDIGIT:
return EncodingHelper.isXDigit(code);
case CharacterType.WORD:
return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
case CharacterType.ALNUM:
return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
case CharacterType.ASCII:
return code < 0x80;
default:
throw new RuntimeException("illegal character type: " + ctype);
}
}
}