blob: fe454db75fba99b5cc55a45db46fecabb4e97577 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.harmony.regex.tests.java.util.regex;
import java.io.Serializable;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import libcore.junit.junit3.TestCaseWithRules;
import libcore.junit.util.SwitchTargetSdkVersionRule;
import libcore.junit.util.SwitchTargetSdkVersionRule.TargetSdkVersion;
import org.apache.harmony.testframework.serialization.SerializationTest;
import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert;
import org.junit.Rule;
import org.junit.rules.TestRule;
import static java.util.Arrays.asList;
public class PatternTest extends TestCaseWithRules {
@Rule
public TestRule switchTargetSdkVersionRule = SwitchTargetSdkVersionRule.getInstance();
String[] testPatterns = {
"(a|b)*abb",
"(1*2*3*4*)*567",
"(a|b|c|d)*aab",
"(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*",
"(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*",
"(a|b)*(a|b)*A(a|b)*lice.*",
"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|"
+ "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do",
// BEGIN Android-changed
// We don't have canonical equivalence.
// "x(?c)y", "x(?cc)y"
// "x(?:c)y"
// END Android-changed
};
String[] testPatternsAlt = {
/*
* According to JavaDoc 2 and 3 oct digit sequences like \\o70\\o347
* should be OK, but test is failed for them
*/
"[ab]\\b\\\\o5\\xF9\\u1E7B\\t\\n\\f\\r\\a\\e[yz]",
"^\\p{Lower}*\\p{Upper}*\\p{ASCII}?\\p{Alpha}?\\p{Digit}*\\p{Alnum}\\p{Punct}\\p{Graph}\\p{Print}\\p{Blank}\\p{Cntrl}\\p{XDigit}\\p{Space}",
"$\\p{javaLowerCase}\\p{javaUpperCase}\\p{javaWhitespace}\\p{javaMirrored}",
"\\p{InGreek}\\p{Lu}\\p{Sc}\\P{InGreek}[\\p{L}&&[^\\p{Lu}]]" };
String[] wrongTestPatterns = { "\\o9A", "\\p{Lawer}", "\\xG0" };
final static int[] flagsSet = { Pattern.CASE_INSENSITIVE,
Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE
/* , Pattern.CANON_EQ */ };
/*
* Based on RI implenetation documents. Need to check this set regarding
* actual implementation.
*/
final static int[] wrongFlagsSet = { 256, 512, 1024 };
final static int DEFAULT_FLAGS = 0;
public void testMatcher() {
// some very simple test
Pattern p = Pattern.compile("a");
assertNotNull(p.matcher("bcde"));
assertNotSame(p.matcher("a"), p.matcher("a"));
}
public void testSplitCharSequenceBug6193() {
// splitting CharSequence which ends with pattern
assertEquals(",,".split(",", 3).length, 3);
assertEquals(",,".split(",", 4).length, 3);
}
public void testSplitCharSequenceBug5391() {
assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
}
public void testSplitCharSequence() {
Pattern pat = Pattern.compile("x");
String[] s;
s = pat.split("zxx:zzz:zxx", 10);
assertEquals(s.length, 5);
s = pat.split("zxx:zzz:zxx", 3);
assertEquals(s.length, 3);
s = pat.split("zxx:zzz:zxx", -1);
assertEquals(s.length, 5);
s = pat.split("zxx:zzz:zxx", 0);
assertEquals(s.length, 3);
pat = Pattern.compile("b");
s = pat.split("abccbadfebb");
assertEquals(s.length, 3);
s = pat.split("");
assertEquals(s.length, 1);
pat = Pattern.compile("");
s = pat.split("");
assertEquals(s.length, 1);
s = pat.split("abccbadfe");
assertEquals(s.length, 9);
}
public void testBug6544() {
String s = "";
String[] arr = s.split(":");
assertEquals(arr.length, 1);
}
public void testSplitCharSequenceNegativeLimit() {
// negative limit
Pattern pat = Pattern.compile("b");
String[] s;
s = pat.split("abccbadfebb", -1);
assertEquals(s.length, 5);
s = pat.split("", -1);
assertEquals(s.length, 1);
pat = Pattern.compile("");
s = pat.split("", -1);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", -1);
assertEquals(s.length, 10);
}
public void testSplitCharSequenceZeroLimit() {
String[] s;
Pattern pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 0);
assertEquals(s.length, 3);
s = pat.split("", 0);
assertEquals(s.length, 1);
pat = Pattern.compile("");
s = pat.split("", 0);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", 0);
assertEquals(s.length, 9);
}
public void testSplitCharSequencePositiveLimitCase1() {
String[] s;
Pattern pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 12);
assertEquals(s.length, 5);
s = pat.split("", 6);
assertEquals(s.length, 1);
pat = Pattern.compile("");
s = pat.split("", 11);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", 15);
assertEquals(s.length, 10);
}
public void testSplitCharSequencePositiveLimitCase2() {
String[] s;
Pattern pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 5);
assertEquals(s.length, 5);
s = pat.split("", 1);
assertEquals(s.length, 1);
pat = Pattern.compile("");
s = pat.split("", 1);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", 10);
assertEquals(s.length, 10);
}
public void testSplitCharSequencePositiveLimitCase3() {
Pattern pat = Pattern.compile("b");
String[] s;
s = pat.split("abccbadfebb", 3);
assertEquals(s.length, 3);
pat = Pattern.compile("");
s = pat.split("abccbadfe", 5);
assertEquals(s.length, 5);
}
public void testSplitOnEmptyPattern_apiCurrent() {
assertEquals(asList("t", "e", "s", "t"), asList("test".split("")));
assertEquals(asList(""), asList("".split("")));
assertEquals(asList(""), asList(Pattern.compile("").split("")));
assertEquals(asList(""), asList("".split("", -1)));
}
@TargetSdkVersion(28)
public void testSplitOnEmptyPattern_api28() {
assertEquals(asList("", "t", "e", "s", "t"), asList("test".split("")));
assertEquals(asList(""), asList("".split("")));
assertEquals(asList(""), asList(Pattern.compile("").split("")));
assertEquals(asList(""), asList("".split("", -1)));
}
/**
* Tests that a match at the beginning of the input string only produces
* a "" if the match is positive-width.
*/
public void testMatchBeginningOfInputSequence_apiCurrent() {
// Positive-width match at the beginning of the input.
assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a")));
assertEquals(asList("", "anana"), asList("banana".split("b")));
// Zero-width match at the beginning of the input
assertEquals(asList("a", "ardv", "ark"), asList("aardvark".split("(?=a)")));
assertEquals(asList("banana"), asList("banana".split("(?=b)")));
// For comparison, matches in the middle of the input never yield an empty substring:
assertEquals(asList("aar", "vark"), asList("aardvark".split("d")));
assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)")));
}
@TargetSdkVersion(28)
public void testMatchBeginningOfInputSequence_api28() {
// Positive-width match at the beginning of the input.
assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a")));
assertEquals(asList("", "anana"), asList("banana".split("b")));
// Zero-width match at the beginning of the input
assertEquals(asList("", "a", "ardv", "ark"), asList("aardvark".split("(?=a)")));
assertEquals(asList("banana"), asList("banana".split("(?=b)")));
// For comparison, matches in the middle of the input never yield an empty substring:
assertEquals(asList("aar", "vark"), asList("aardvark".split("d")));
assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)")));
}
public void testPattern() {
/* Positive assertion test. */
for (String aPattern : testPatterns) {
Pattern p = Pattern.compile(aPattern);
try {
assertTrue(p.pattern().equals(aPattern));
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
public void testCompile_Valid() {
for (String aPattern : testPatterns) {
try {
Pattern p = Pattern.compile(aPattern);
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
for (String aPattern : testPatternsAlt) {
try {
Pattern p = Pattern.compile(aPattern);
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
public void testCompile_WrongPatternsFail() {
for (String aPattern : wrongTestPatterns) {
try {
Pattern p = Pattern.compile(aPattern);
fail("PatternSyntaxException is expected");
} catch (PatternSyntaxException pse) {
/* OKAY */
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
public void testFlagsCase1() {
String baseString = "((?i)|b)a";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testFlagsCase2() {
String baseString = "(?i)a|b";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase3() {
String baseString = "(?i)a|b";
String testString = "B";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase4() {
String baseString = "c|(?i)a|b";
String testString = "B";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase5() {
String baseString = "(?i)a|(?s)b";
String testString = "B";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase6() {
String baseString = "(?i)a|(?-i)b";
String testString = "B";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testFlagsCase7() {
String baseString = "(?i)a|(?-i)c|b";
String testString = "B";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testFlagsCase8() {
String baseString = "(?i)a|(?-i)c|(?i)b";
String testString = "B";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase9() {
String baseString = "(?i)a|(?-i)b";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase10() {
String baseString = "((?i))a";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testFlagsCase11() {
String baseString = "|(?i)|a";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase12() {
String baseString = "(?i)((?s)a.)";
String testString = "A\n";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase13() {
String baseString = "(?i)((?-i)a)";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testFlagsCase14() {
String baseString = "(?i)(?s:a.)";
String testString = "A\n";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase15() {
String baseString = "(?i)fgh(?s:aa)";
String testString = "fghAA";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase16() {
String baseString = "(?i)((?-i))a";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase17() {
String baseString = "abc(?i)d";
String testString = "ABCD";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testFlagsCase18() {
String baseString = "abc(?i)d";
String testString = "abcD";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase19() {
String baseString = "a(?i)a(?-i)a(?i)a(?-i)a";
String testString = "aAaAa";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testFlagsCase20() {
String baseString = "a(?i)a(?-i)a(?i)a(?-i)a";
String testString = "aAAAa";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
// BEGIN Android-removed
// The flags() method should only return those flags that were explicitly
// passed during the compilation. The JDK also accepts the ones implicitly
// contained in the pattern, but ICU doesn't do this.
//
// public void testFlagsMethod() {
// String baseString;
// Pattern pat;
//
// /*
// * These tests are for compatibility with RI only. Logically we have to
// * return only flags specified during the compilation. For example
// * pat.flags() == 0 when we compile Pattern pat =
// * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
// * in a case insensitive manner. So there is little sense to do calls to
// * flags() now.
// */
// baseString = "(?-i)";
// pat = Pattern.compile(baseString);
//
// baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
// pat = Pattern.compile(baseString);
// assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
//
// baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
// pat = Pattern.compile(baseString);
// assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
//
// baseString = "(?is)a((?x)b.)";
// pat = Pattern.compile(baseString);
// assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
//
// baseString = "(?i)a((?-i))";
// pat = Pattern.compile(baseString);
// assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
//
// baseString = "((?i)a)";
// pat = Pattern.compile(baseString);
// assertEquals(pat.flags(), 0);
//
// pat = Pattern.compile("(?is)abc");
// assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
// }
//END Android-removed
/*
* Check default flags when they are not specified in pattern. Based on RI
* since could not find that info
*/
public void testFlagsCompileDefault() {
for (String pat : testPatternsAlt) {
try {
Pattern p = Pattern.compile(pat);
assertEquals(p.flags(), DEFAULT_FLAGS);
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
/*
* Check that flags specified during compile are set properly This is a
* simple implementation that does not use flags combinations. Need to
* improve.
*/
public void testFlagsCompileValid() {
for (String pat : testPatternsAlt) {
for (int flags : flagsSet) {
try {
Pattern p = Pattern.compile(pat, flags);
assertEquals(p.flags(), flags);
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
}
public void testCompileStringInt() {
/*
* these tests are needed to verify that appropriate exceptions are
* thrown
*/
String pattern = "b)a";
try {
Pattern.compile(pattern);
fail("Expected a PatternSyntaxException when compiling pattern: "
+ pattern);
} catch (PatternSyntaxException e) {
// pass
}
pattern = "bcde)a";
try {
Pattern.compile(pattern);
fail("Expected a PatternSyntaxException when compiling pattern: "
+ pattern);
} catch (PatternSyntaxException e) {
// pass
}
pattern = "bbg())a";
try {
Pattern pat = Pattern.compile(pattern);
fail("Expected a PatternSyntaxException when compiling pattern: "
+ pattern);
} catch (PatternSyntaxException e) {
// pass
}
pattern = "cdb(?i))a";
try {
Pattern pat = Pattern.compile(pattern);
fail("Expected a PatternSyntaxException when compiling pattern: "
+ pattern);
} catch (PatternSyntaxException e) {
// pass
}
/*
* This pattern should compile - HARMONY-2127
* icu4c doesn't support canonical equivalence.
*/
// pattern = "x(?c)y";
// Pattern.compile(pattern);
/*
* this pattern doesn't match any string, but should be compiled anyway
*/
pattern = "(b\\1)a";
Pattern.compile(pattern);
}
/*
* Class under test for Pattern compile(String)
*/
public void testQuantCompileNeg() {
String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh",
"{5,3shdfkjh}" };
for (String element : patterns) {
try {
Pattern.compile(element);
fail("PatternSyntaxException was expected, but compilation succeeds");
} catch (PatternSyntaxException pse) {
continue;
}
}
// Regression for HARMONY-1365
// BEGIN Android-changed
// Original regex contained some illegal stuff. Changed it slightly,
// while maintaining the wicked character of this "mother of all
// regexes".
// String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()";
String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\.*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\.*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]{1,5})|(?x-xd:^{5}+)()";
// END Android-changed
assertNotNull(Pattern.compile(pattern));
}
public void testQuantCompilePos() {
String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" };
for (String element : patterns) {
Pattern.compile(element);
}
}
public void testQuantComposition() {
String pattern = "(a{1,3})aab";
java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern);
java.util.regex.Matcher mat = pat.matcher("aaab");
mat.matches();
mat.start(1);
mat.group(1);
}
public void testMatches() {
String[][] posSeq = {
{ "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
{ "213567", "12324567", "1234567", "213213567",
"21312312312567", "444444567" },
{ "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
{ "213234567", "3458", "0987654", "7689546432", "0398576",
"98432", "5" },
{
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
{ "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
"abbbAbbbliceaaa", "Alice" },
{ "a123", "bnxnvgds156", "for", "while", "if", "struct" },
{ "xy" }, { "xy" }, { "xcy" }
};
for (int i = 0; i < testPatterns.length; i++) {
for (int j = 0; j < posSeq[i].length; j++) {
assertTrue("Incorrect match: " + testPatterns[i] + " vs "
+ posSeq[i][j], Pattern.matches(testPatterns[i],
posSeq[i][j]));
}
}
}
public void testMatchesException() {
/* Negative assertion test. */
for (String aPattern : wrongTestPatterns) {
try {
Pattern.matches(aPattern, "Foo");
fail("PatternSyntaxException is expected");
} catch (PatternSyntaxException pse) {
/* OKAY */
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
public void testTimeZoneIssue() {
Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?");
Matcher m = p.matcher("GMT-9:45");
assertTrue(m.matches());
assertEquals("-", m.group(1));
assertEquals("9", m.group(2));
assertEquals(":45", m.group(3));
assertEquals("45", m.group(4));
}
// BEGIN Android-changed
// Removed one pattern that is buggy on the JDK. We don't want to duplicate that.
public void testCompileRanges() {
String[] correctTestPatterns = { "[^]*abb]*", /* "[^a-d[^m-p]]*abb", */
"[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb",
"[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb",
"[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" };
String[] inputSecuence = { "kkkk", /* "admpabb", */ "abcabcd124654abb",
"abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb",
"accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb",
"jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" };
Pattern pat;
for (int i = 0; i < correctTestPatterns.length; i++) {
assertTrue("pattern: " + correctTestPatterns[i] + " input: "
+ inputSecuence[i], Pattern.matches(correctTestPatterns[i],
inputSecuence[i]));
}
String[] wrongInputSecuence = { "]", /* "admpkk", */ "abcabcd124k654abb",
"abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb",
"acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb",
"jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" };
for (int i = 0; i < correctTestPatterns.length; i++) {
assertFalse("pattern: " + correctTestPatterns[i] + " input: "
+ wrongInputSecuence[i], Pattern.matches(
correctTestPatterns[i], wrongInputSecuence[i]));
}
}
public void testRangesSpecialCases() {
String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" };
for (String element : neg_patterns) {
try {
Pattern.compile(element);
fail("PatternSyntaxException was expected: " + element);
} catch (PatternSyntaxException pse) {
}
}
String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--",
"[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" };
for (int i = 0; i < pos_patterns.length; i++) {
String pat = pos_patterns[i++];
String inp = pos_patterns[i];
assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches(
pat, inp));
}
}
// END Android-changed
public void testZeroSymbols() {
assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb"));
}
public void testEscapes() {
Pattern pat = Pattern.compile("\\Q{]()*?");
Matcher mat = pat.matcher("{]()*?");
assertTrue(mat.matches());
}
public void test_bug_181() {
Pattern.compile("[\\t-\\r]");
}
// https://code.google.com/p/android/issues/detail?id=40103
public void test_bug_40103() {
Pattern.compile("(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl");
// Looks like harmony had a similar "Bug187"...
Pattern.compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))");
}
public void test_bug_4472() {
// HARMONY-4472
Pattern.compile("a*.+");
}
public void test_bug_5858() {
// HARMONY-5858
Pattern.compile("\\u6211", Pattern.LITERAL);
}
public void testOrphanQuantifiers() {
try {
Pattern.compile("+++++");
fail("PatternSyntaxException expected");
} catch (PatternSyntaxException pse) {
}
}
public void testOrphanQuantifiers2() {
try {
Pattern pat = Pattern.compile("\\d+*");
fail("PatternSyntaxException expected");
} catch (PatternSyntaxException pse) {
}
}
public void testBug197() {
Object[] vals = { ":", new Integer(2),
new String[] { "boo", "and:foo" }, ":", new Integer(5),
new String[] { "boo", "and", "foo" }, ":", new Integer(-2),
new String[] { "boo", "and", "foo" }, ":", new Integer(3),
new String[] { "boo", "and", "foo" }, ":", new Integer(1),
new String[] { "boo:and:foo" }, "o", new Integer(5),
new String[] { "b", "", ":and:f", "", "" }, "o",
new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o",
new Integer(-2), new String[] { "b", "", ":and:f", "", "" },
"o", new Integer(0), new String[] { "b", "", ":and:f" } };
for (int i = 0; i < vals.length / 3;) {
String[] res = Pattern.compile(vals[i++].toString()).split(
"boo:and:foo", ((Integer) vals[i++]).intValue());
String[] expectedRes = (String[]) vals[i++];
assertEquals(expectedRes.length, res.length);
for (int j = 0; j < expectedRes.length; j++) {
assertEquals(expectedRes[j], res[j]);
}
}
}
public void testURIPatterns() {
String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$";
String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$";
String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$";
String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$";
String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*";
Pattern.compile(URI_REGEXP_STR);
Pattern.compile(REL_URI_REGEXP_STR);
Pattern.compile(SCHEME_REGEXP_STR);
Pattern.compile(IPV4_REGEXP_STR);
Pattern.compile(IPV6_REGEXP_STR);
Pattern.compile(IPV6_REGEXP_STR2);
Pattern.compile(HOSTNAME_REGEXP_STR);
}
public void testFindBoundaryCases1() {
Pattern pat = Pattern.compile(".*\n");
Matcher mat = pat.matcher("a\n");
mat.find();
assertEquals("a\n", mat.group());
}
public void testFindBoundaryCases2() {
Pattern pat = Pattern.compile(".*A");
Matcher mat = pat.matcher("aAa");
mat.find();
assertEquals("aA", mat.group());
}
public void testFindBoundaryCases3() {
Pattern pat = Pattern.compile(".*A");
Matcher mat = pat.matcher("a\naA\n");
mat.find();
assertEquals("aA", mat.group());
}
public void testFindBoundaryCases4() {
Pattern pat = Pattern.compile("A.*");
Matcher mat = pat.matcher("A\n");
mat.find();
assertEquals("A", mat.group());
}
public void testFindBoundaryCases5() {
Pattern pat = Pattern.compile(".*A.*");
Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n");
// Matcher mat = pat.matcher("\nA\n");
String[] res = { "A", "A", "aaAaa", "aaaA" };
int k = 0;
for (; mat.find(); k++) {
assertEquals(res[k], mat.group());
}
}
public void testFindBoundaryCases6() {
String[] res = { "", "a", "", "" };
Pattern pat = Pattern.compile(".*");
Matcher mat = pat.matcher("\na\n");
int k = 0;
for (; mat.find(); k++) {
assertEquals(res[k], mat.group());
}
assertEquals(4, k);
}
public void testBackReferences() {
Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))");
Matcher mat = pat.matcher("(start1: word :start1)(start2: word :start2)");
int k = 1;
for (; mat.find(); k++) {
assertEquals("start" + k, mat.group(2));
assertEquals(" word ", mat.group(3));
assertEquals("start" + k, mat.group(4));
}
assertEquals(3, k);
pat = Pattern.compile(".*(.)\\1");
mat = pat.matcher("saa");
assertTrue(mat.matches());
}
public void testNewLine() {
Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE);
Matcher mat = pat.matcher("\r\n\n");
int counter = 0;
while (mat.find()) {
counter++;
}
assertEquals(2, counter);
}
public void testFindGreedy() {
Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL);
Matcher mat = pat.matcher("aaaa\naaa\naaaaaa");
mat.matches();
assertEquals(15, mat.end());
}
public void testSerialization() throws Exception {
Pattern pat = Pattern.compile("a*bc");
SerializableAssert comparator = new SerializableAssert() {
public void assertDeserialized(Serializable initial,
Serializable deserialized) {
assertEquals(((Pattern) initial).toString(),
((Pattern) deserialized).toString());
}
};
SerializationTest.verifyGolden(this, pat, comparator);
SerializationTest.verifySelf(pat, comparator);
}
public void testSOLQuant() {
Pattern pat = Pattern.compile("$*", Pattern.MULTILINE);
Matcher mat = pat.matcher("\n\n");
int counter = 0;
while (mat.find()) {
counter++;
}
assertEquals(3, counter);
}
public void testIllegalEscape() {
try {
Pattern.compile("\\y");
fail("PatternSyntaxException expected");
} catch (PatternSyntaxException pse) {
}
}
public void testEmptyFamily() {
Pattern.compile("\\p{Lower}");
}
public void testCapture_Flags() {
Pattern pat = Pattern.compile("(?i)b*(?-i)a*");
assertTrue(pat.matcher("bBbBaaaa").matches());
assertFalse(pat.matcher("bBbBAaAa").matches());
}
public void testCapture_NonCaptGroups() {
Pattern pat = Pattern.compile("(?i:b*)a*");
assertTrue(pat.matcher("bBbBaaaa").matches());
assertFalse(pat.matcher("bBbBAaAa").matches());
}
public void testCapture() {
Pattern pat = Pattern
// 1 2 3 4 5 6 7 8 9 10 11
.compile(
"(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?");
Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81");
assertTrue(mat.matches());
assertEquals("-1234", mat.group(1));
assertEquals("21", mat.group(2));
assertEquals("31", mat.group(3));
assertEquals("T", mat.group(4));
assertEquals("41", mat.group(5));
assertEquals("51", mat.group(6));
assertEquals("61", mat.group(7));
assertEquals(".789", mat.group(8));
assertEquals("+71", mat.group(9));
assertEquals("81", mat.group(10));
}
public void testCapture_PositiveLookahead() {
Pattern pat = Pattern.compile(".*\\.(?=log$).*$");
assertTrue(pat.matcher("a.b.c.log").matches());
assertFalse(pat.matcher("a.b.c.log.").matches());
}
public void testCapture_NegativeLookahead() {
Pattern pat = Pattern.compile(".*\\.(?!log$).*$");
assertFalse(pat.matcher("abc.log").matches());
assertTrue(pat.matcher("abc.logg").matches());
}
public void testCapture_PositiveLookbehind() {
Pattern pat = Pattern.compile(".*(?<=abc)\\.log$");
assertFalse(pat.matcher("cde.log").matches());
assertTrue(pat.matcher("abc.log").matches());
}
public void testCapture_NegativeLookbehind() {
Pattern pat = Pattern.compile(".*(?<!abc)\\.log$");
assertTrue(pat.matcher("cde.log").matches());
assertFalse(pat.matcher("abc.log").matches());
}
public void testCapture_AtomicGroupCase1() {
Pattern pat = Pattern.compile("(?>a*)abb");
assertFalse(pat.matcher("aaabb").matches());
}
public void testCapture_AtomicGroupCase2() {
Pattern pat = Pattern.compile("(?>a*)bb");
assertTrue(pat.matcher("aaabb").matches());
}
public void testCapture_AtomicGroupCase3() {
Pattern pat = Pattern.compile("(?>a|aa)aabb");
assertTrue(pat.matcher("aaabb").matches());
}
public void testCapture_AtomicGroupCase4() {
Pattern pat = Pattern.compile("(?>aa|a)aabb");
assertFalse(pat.matcher("aaabb").matches());
// BEGIN Android-removed
// Questionable constructs that ICU doesn't support.
// // quantifiers over look ahead
// pat = Pattern.compile(".*(?<=abc)*\\.log$");
// assertTrue(pat.matcher("cde.log").matches());
// pat = Pattern.compile(".*(?<=abc)+\\.log$");
// assertFalse(pat.matcher("cde.log").matches());
// END Android-removed
}
public void testCorrectReplacementBackreferencedJointSet() {
Pattern.compile("ab(a)*\\1");
Pattern.compile("abc(cd)fg");
Pattern.compile("aba*cd");
Pattern.compile("ab(a)*+cd");
Pattern.compile("ab(a)*?cd");
Pattern.compile("ab(a)+cd");
Pattern.compile(".*(.)\\1");
Pattern.compile("ab((a)|c|d)e");
Pattern.compile("abc((a(b))cd)");
Pattern.compile("ab(a)++cd");
Pattern.compile("ab(a)?(c)d");
Pattern.compile("ab(a)?+cd");
Pattern.compile("ab(a)??cd");
Pattern.compile("ab(a)??cd");
Pattern.compile("ab(a){1,3}?(c)d");
}
public void testCompilePatternWithTerminatorMark() {
Pattern pat = Pattern.compile("a\u0000\u0000cd");
Matcher mat = pat.matcher("a\u0000\u0000cd");
assertTrue(mat.matches());
}
public void testAlternationsCase1() {
String baseString = "|a|bc";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("");
assertTrue(mat.matches());
}
public void testAlternationsCase2() {
String baseString = "a||bc";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("");
assertTrue(mat.matches());
}
public void testAlternationsCase3() {
String baseString = "a|bc|";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("");
assertTrue(mat.matches());
}
public void testAlternationsCase4() {
String baseString = "a|b|";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("");
assertTrue(mat.matches());
}
public void testAlternationsCase5() {
String baseString = "a(|b|cd)e";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("ae");
assertTrue(mat.matches());
}
public void testAlternationsCase6() {
String baseString = "a(b||cd)e";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("ae");
assertTrue(mat.matches());
}
public void testAlternationsCase7() {
String baseString = "a(b|cd|)e";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("ae");
assertTrue(mat.matches());
}
public void testAlternationsCase8() {
String baseString = "a(b|c|)e";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("ae");
assertTrue(mat.matches());
}
public void testAlternationsCase9() {
String baseString = "a(|)e";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("ae");
assertTrue(mat.matches());
}
public void testAlternationsCase10() {
String baseString = "|";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("");
assertTrue(mat.matches());
}
public void testAlternationsCase11() {
String baseString = "a(?:|)e";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("ae");
assertTrue(mat.matches());
}
public void testAlternationsCase12() {
String baseString = "a||||bc";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("");
assertTrue(mat.matches());
}
public void testAlternationsCase13() {
String baseString = "(?i-is)|a";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher("a");
assertTrue(mat.matches());
}
public void testMatchWithGroups14() {
String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr";
String pattern = ".*(..).*\\1.*";
assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
}
public void testMatchWithGroups15() {
String baseString = "saa";
String pattern = ".*(.)\\1";
assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
assertTrue(Pattern.compile(pattern).matcher(baseString).find());
}
public void testSplitEmptyCharSequence() {
String s = "";
String[] arr = s.split(":");
assertEquals(arr.length, 1);
}
public void testSplitEndsWithPattern() {
assertEquals(",,".split(",", 3).length, 3);
assertEquals(",,".split(",", 4).length, 3);
assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
}
public void testCaseInsensitiveFlag() {
assertTrue(Pattern.matches("(?i-:AbC)", "ABC"));
}
public void testEmptyGroupsCase1() {
Pattern pat = Pattern.compile("ab(?>)cda");
Matcher mat = pat.matcher("abcda");
assertTrue(mat.matches());
}
public void testEmptyGroupsCase2() {
Pattern pat = Pattern.compile("ab()");
Matcher mat = pat.matcher("ab");
assertTrue(mat.matches());
}
public void testEmptyGroupsCase3() {
Pattern pat = Pattern.compile("abc(?:)(..)");
Matcher mat = pat.matcher("abcgf");
assertTrue(mat.matches());
}
public void testEmbeddedFlagsCase1() {
String baseString = "(?i)((?s)a)";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testEmbeddedFlagsCase2() {
String baseString = "(?x)(?i)(?s)(?d)a";
String testString = "A";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testEmbeddedFlagsCase3() {
String baseString = "(?x)(?i)(?s)(?d)a.";
String testString = "a\n";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testEmbeddedFlagsCase4() {
String baseString = "abc(?x:(?i)(?s)(?d)a.)";
String testString = "abcA\n";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testEmbeddedFlagsCase5() {
String baseString = "abc((?x)d)(?i)(?s)a";
String testString = "abcdA";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testAltWithFlags() {
Pattern.compile("|(?i-xi)|()");
}
public void testRestoreFlagsAfterGroup() {
String baseString = "abc((?x)d) a";
String testString = "abcd a";
Pattern pat = Pattern.compile(baseString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
/*
* Verify if the Pattern support the following character classes:
* \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored}
*/
public void testCompileCharacterClass() {
// Regression for HARMONY-606, 696
Pattern pattern = Pattern.compile("\\p{javaLowerCase}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaUpperCase}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaWhitespace}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaMirrored}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaDefined}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaDigit}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaIdentifierIgnorable}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaISOControl}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaJavaIdentifierPart}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaJavaIdentifierStart}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaLetter}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaLetterOrDigit}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaSpaceChar}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaTitleCase}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}");
assertNotNull(pattern);
pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}");
assertNotNull(pattern);
}
public void testRangesWithSurrogatesSupplementaryCase1() {
String patString = "[abc\uD8D2]";
String testString = "\uD8D2";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase2() {
String patString = "[abc\uD8D2]";
String testString = "a";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase3() {
String patString = "[abc\uD8D2]";
String testString = "ef\uD8D2\uDD71gh";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testRangesWithSurrogatesSupplementaryCase4() {
String patString = "[abc\uD8D2]";
String testString = "ef\uD8D2gh";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testRangesWithSurrogatesSupplementaryCase5() {
String patString = "[abc\uD8D3&&[c\uD8D3]]";
String testString = "c";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase6() {
String patString = "[abc\uD8D3&&[c\uD8D3]]";
String testString = "a";;
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase7() {
String patString = "[abc\uD8D3&&[c\uD8D3]]";
String testString = "ef\uD8D3\uDD71gh";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testRangesWithSurrogatesSupplementaryCase8() {
String patString = "[abc\uD8D3&&[c\uD8D3]]";
String testString = "ef\uD8D3gh";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testRangesWithSurrogatesSupplementaryCase9() {
String patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
String testString = "c";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase10() {
String patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
String testString = "\uDBEE\uDF0C";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase11() {
String patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
String testString = "ef\uD8D3\uDD71gh";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testRangesWithSurrogatesSupplementaryCase12() {
String patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
String testString = "ef\uD8D3gh";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testRangesWithSurrogatesSupplementaryCase13() {
String patString = "[abc\uDBFC]\uDDC2cd";
String testString = "\uDBFC\uDDC2cd";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testRangesWithSurrogatesSupplementaryCase14() {
String patString = "[abc\uDBFC]\uDDC2cd";
String testString = "a\uDDC2cd";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testSequencesWithSurrogatesSupplementaryCase1() {
String patString = "abcd\uD8D3";
String testString = "abcd\uD8D3\uDFFC";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
// BEGIN Android-changed
// This one really doesn't make sense, as the above is a corrupt surrogate.
// Even if it's matched by the JDK, it's more of a bug than of a behavior one
// might want to duplicate.
// assertFalse(mat.find());
// END Android-changed
}
public void testSequencesWithSurrogatesSupplementaryCase2() {
String patString = "abcd\uD8D3";
String testString = "abcd\uD8D3abc";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testSequencesWithSurrogatesSupplementaryCase3() {
String patString = "ab\uDBEFcd";
String testString = "ab\uDBEFcd";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testSequencesWithSurrogatesSupplementaryCase4() {
String patString = "\uDFFCabcd";
String testString = "\uD8D3\uDFFCabcd";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testSequencesWithSurrogatesSupplementaryCase5() {
String patString = "\uDFFCabcd";
String testString = "abc\uDFFCabcdecd";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testSequencesWithSurrogatesSupplementaryCase6String () {
String patString = "\uD8D3\uDFFCabcd";
String testString = "abc\uD8D3\uD8D3\uDFFCabcd";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase1() {
String patString = "[123\\D]";
String testString = "a";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase2() {
String patString = "[123\\D]";
String testString = "5";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase3() {
String patString = "[123\\D]";
String testString = "3";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase4() {
// low surrogate
String patString = "[123\\D]";
String testString = "\uDFC4";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase5() {
// high surrogate
String patString = "[123\\D]";
String testString = "\uDADA";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase6() {
String patString = "[123\\D]";
String testString = "\uDADA\uDFC4";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase7() {
String patString = "[123[^\\p{javaDigit}]]";
String testString = "a";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase8() {
String patString = "[123[^\\p{javaDigit}]]";
String testString = "5";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase9() {
String patString = "[123[^\\p{javaDigit}]]";
String testString = "3";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase10() {
// low surrogate
String patString = "[123[^\\p{javaDigit}]]";
String testString = "\uDFC4";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase11() {
// high surrogate
String patString = "[123[^\\p{javaDigit}]]";
String testString = "\uDADA";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase12() {
String patString = "[123[^\\p{javaDigit}]]";
String testString = "\uDADA\uDFC4";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase13() {
// surrogate characters
String patString = "\\p{Cs}";
String testString = "\uD916\uDE27";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
/*
* see http://www.unicode.org/reports/tr18/#Supplementary_Characters we
* have to treat text as code points not code units. \\p{Cs} matches any
* surrogate character but here testString is a one code point
* consisting of two code units (two surrogate characters) so we find
* nothing
*/
// assertFalse(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase14() {
// swap low and high surrogates
String patString = "\\p{Cs}";
String testString = "\uDE27\uD916";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase15() {
String patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
String testString = "1";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase16() {
String patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
String testString = "\uD916";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase17() {
String patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
String testString = "\uD916\uDE27";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.find());
}
public void testPredefinedClassesWithSurrogatesSupplementaryCase18() {
// \uD9A0\uDE8E=\u7828E
// \u78281=\uD9A0\uDE81
String patString = "[a-\uD9A0\uDE8E]";
String testString = "\uD9A0\uDE81";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase1() {
String patString = ".";
String testString = "\uD9A0\uDE81";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase2() {
String patString = ".";
String testString = "\uDE81";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase3() {
String patString = ".";
String testString = "\uD9A0";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase4() {
String patString = ".";
String testString = "\n";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase5() {
String patString = ".*\uDE81";
String testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase6() {
String patString = ".*\uDE81";
String testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testDotConstructionWithSurrogatesSupplementaryCase7() {
String patString = ".*";
String testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81";
Pattern pat = Pattern.compile(patString, Pattern.DOTALL);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void test_quoteLjava_lang_String() {
for (String aPattern : testPatterns) {
Pattern p = Pattern.compile(aPattern);
try {
assertEquals("quote was wrong for plain text", "\\Qtest\\E", p
.quote("test"));
assertEquals("quote was wrong for text with quote sign",
"\\Q\\Qtest\\E", p.quote("\\Qtest"));
assertEquals("quote was wrong for quotted text",
"\\Q\\Qtest\\E\\\\E\\Q\\E", p.quote("\\Qtest\\E"));
} catch (Exception e) {
fail("Unexpected exception: " + e);
}
}
}
public void test_matcherLjava_lang_StringLjava_lang_CharSequence() {
String[][] posSeq = {
{ "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
{ "213567", "12324567", "1234567", "213213567",
"21312312312567", "444444567" },
{ "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
{ "213234567", "3458", "0987654", "7689546432", "0398576",
"98432", "5" },
{
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
{ "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
"abbbAbbbliceaaa", "Alice" },
{ "a123", "bnxnvgds156", "for", "while", "if", "struct" },
{ "xy" }, { "xy" }, { "xcy" }
};
for (int i = 0; i < testPatterns.length; i++) {
for (int j = 0; j < posSeq[i].length; j++) {
assertTrue("Incorrect match: " + testPatterns[i] + " vs "
+ posSeq[i][j], Pattern.compile(testPatterns[i])
.matcher(posSeq[i][j]).matches());
}
}
}
public void testQuantifiersWithSurrogatesSupplementary() {
String patString = "\uD9A0\uDE81*abc";
String testString = "\uD9A0\uDE81\uD9A0\uDE81abc";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
patString = "\uD9A0\uDE81*abc";
testString = "abc";
pat = Pattern.compile(patString);
mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testAlternationsWithSurrogatesSupplementary() {
String patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
String testString = "\uD9A0";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
testString = "\uDE81";
pat = Pattern.compile(patString);
mat = pat.matcher(testString);
assertTrue(mat.matches());
patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
testString = "\uD9A0\uDE81";
pat = Pattern.compile(patString);
mat = pat.matcher(testString);
assertTrue(mat.matches());
patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
testString = "\uDE81\uD9A0";
pat = Pattern.compile(patString);
mat = pat.matcher(testString);
assertFalse(mat.matches());
}
public void testGroupsWithSurrogatesSupplementary() {
//this pattern matches nothing
String patString = "(\uD9A0)\uDE81";
String testString = "\uD9A0\uDE81";
Pattern pat = Pattern.compile(patString);
Matcher mat = pat.matcher(testString);
assertFalse(mat.matches());
patString = "(\uD9A0)";
testString = "\uD9A0\uDE81";
pat = Pattern.compile(patString, Pattern.DOTALL);
mat = pat.matcher(testString);
assertFalse(mat.find());
}
/*
* Regression test for HARMONY-688
*/
public void testUnicodeCategoryWithSurrogatesSupplementary() {
Pattern p = Pattern.compile("\\p{javaLowerCase}");
Matcher matcher = p.matcher("\uD801\uDC28");
assertTrue(matcher.find());
}
public void testSplitEmpty() {
Pattern pat = Pattern.compile("");
String[] s = pat.split("", -1);
assertEquals(1, s.length);
assertEquals("", s[0]);
}
public void testToString() {
for (int i = 0; i < testPatterns.length; i++) {
Pattern p = Pattern.compile(testPatterns[i]);
assertEquals(testPatterns[i], p.toString());
}
}
// http://code.google.com/p/android/issues/detail?id=19308
public void test_hitEnd() {
Pattern p = Pattern.compile("^2(2[4-9]|3\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
Matcher m = p.matcher("224..");
boolean isPartialMatch = !m.matches() && m.hitEnd();
assertFalse(isPartialMatch);
}
public void testCommentsInPattern() {
Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS);
assertTrue(p.matcher("abcd").matches());
}
public void testCompileNonCaptGroup() {
// icu4c doesn't support CANON_EQ.
Pattern.compile("(?:)"/*, Pattern.CANON_EQ*/);
Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.DOTALL);
Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.CASE_INSENSITIVE);
Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.COMMENTS | Pattern.UNIX_LINES);
}
public void testFlagsMethod() {
// icu4c doesn't count inline flags that span the entire regex as being global flags.
// Android just returns those flags actually passed to Pattern.compile.
if (true) {
return;
}
String baseString;
Pattern pat;
// These tests are for compatibility with RI only. Logically we have to
// return only flags specified during the compilation. For example
// pat.flags() == 0 when we compile Pattern pat =
// Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
// in a case insensitive manner. So there is little sense to do calls to
// flags() now.
baseString = "(?-i)";
pat = Pattern.compile(baseString);
baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
pat = Pattern.compile(baseString);
assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
pat = Pattern.compile(baseString);
assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
baseString = "(?is)a((?x)b.)";
pat = Pattern.compile(baseString);
assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
baseString = "(?i)a((?-i))";
pat = Pattern.compile(baseString);
assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
baseString = "((?i)a)";
pat = Pattern.compile(baseString);
assertEquals(pat.flags(), 0);
pat = Pattern.compile("(?is)abc");
assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
}
public void testCanonEqFlag() {
// icu4c doesn't support CANON_EQ.
if (true) {
return;
}
// for decompositions see
// http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt
// http://www.unicode.org/reports/tr15/#Decomposition
String baseString;
String testString;
Pattern pat;
Matcher mat;
baseString = "ab(a*)\\1";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
baseString = "a(abcdf)d";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
baseString = "aabcdfd";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
// \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304
// \u00CC -> \u0049\u0300
baseString = "\u01E0\u00CCcdb(ac)";
testString = "\u0226\u0304\u0049\u0300cdbac";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\u01E0cdb(a\u00CCc)";
testString = "\u0041\u0307\u0304cdba\u0049\u0300c";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "a\u00CC";
testString = "a\u0049\u0300";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\u0226\u0304cdb(ac\u0049\u0300)";
testString = "\u01E0cdbac\u00CC";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "cdb(?:\u0041\u0307\u0304\u00CC)";
testString = "cdb\u0226\u0304\u0049\u0300";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)";
testString = "\u01E0b\u00CCcdbac";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\u01E0|\u00CCcdb(ac)";
testString = "\u0041\u0307\u0304";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]";
testString = "cdb\u0041\u0307\u0304b";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "a\u0300";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher("a\u00E0a");
assertTrue(mat.find());
baseString = "\u7B20\uF9F8abc";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher("\uF9F8\uF9F8abc");
assertTrue(mat.matches());
// \u01F9 -> \u006E\u0300
// \u00C3 -> \u0041\u0303
baseString = "cdb(?:\u00C3\u006E\u0300)";
testString = "cdb\u0041\u0303\u01F9";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
// \u014C -> \u004F\u0304
// \u0163 -> \u0074\u0327
baseString = "cdb(?:\u0163\u004F\u0304)";
testString = "cdb\u0074\u0327\u014C";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
// \u00E1->a\u0301
// canonical ordering takes place \u0301\u0327 -> \u0327\u0301
baseString = "c\u0327\u0301";
testString = "c\u0301\u0327";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
/*
Hangul decompositions
*/
// \uD4DB->\u1111\u1171\u11B6
// \uD21E->\u1110\u116D\u11B5
// \uD264->\u1110\u1170
// not Hangul:\u0453->\u0433\u0301
baseString = "a\uD4DB\u1111\u1171\u11B6\uD264";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
baseString = "\u0453c\uD4DB";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
baseString = "a\u1110\u116D\u11B5b\uD21Ebc";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)";
testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\uD4DB\uD264cdb(a\uD21Ec)";
testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "a\uD4DB";
testString = "a\u1111\u1171\u11B6";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "a\uD21E";
testString = "a\u1110\u116D\u11B5";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)";
testString = "\uD4DBcdbac\uD21E";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)";
testString = "cdb\uD4DB\u1110\u116D\u11B5";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)";
testString = "\uD4DBb\uD21Ecdbac";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\uD4DB|\u00CCcdb(ac)";
testString = "\u1111\u1171\u11B6";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\uD4DB|\u00CCcdb(ac)";
testString = "\u1111\u1171";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertFalse(mat.matches());
baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]";
testString = "cdb\u1111\u1171\u11B6b";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
baseString = "\uD4DB";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher("a\u1111\u1171\u11B6a");
assertTrue(mat.find());
baseString = "\u1111";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher("bcda\uD4DBr");
assertFalse(mat.find());
}
public void testIndexesCanonicalEq() {
// icu4c doesn't support CANON_EQ.
if (true) {
return;
}
String baseString;
String testString;
Pattern pat;
Matcher mat;
baseString = "\uD4DB";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher("bcda\u1111\u1171\u11B6awr");
assertTrue(mat.find());
assertEquals(mat.start(), 4);
assertEquals(mat.end(), 7);
baseString = "\uD4DB\u1111\u1171\u11B6";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr");
assertTrue(mat.find());
assertEquals(mat.start(), 4);
assertEquals(mat.end(), 8);
baseString = "\uD4DB\uD21E\u1110\u1170";
testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
pat = Pattern.compile(baseString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.find());
assertEquals(mat.start(), 6);
assertEquals(mat.end(), 13);
}
public void testCanonEqFlagWithSupplementaryCharacters() {
// icu4c doesn't support CANON_EQ.
if (true) {
return;
}
/*
\u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32
\uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F
->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16
*/
String patString = "abc\uD834\uDDBFef";
String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ);
Matcher mat = pat.matcher(testString);
assertTrue(mat.matches());
testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
mat = pat.matcher(testString);
assertTrue(mat.matches());
patString = "abc\uD834\uDDBB\uD834\uDD6Fef";
testString = "abc\uD834\uDDBFef";
pat = Pattern.compile(patString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
mat = pat.matcher(testString);
assertTrue(mat.matches());
patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
testString = "abc\uD834\uDDBFef";
pat = Pattern.compile(patString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
mat = pat.matcher(testString);
assertTrue(mat.matches());
// Test supplementary characters with no decomposition
patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef";
testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef";
pat = Pattern.compile(patString, Pattern.CANON_EQ);
mat = pat.matcher(testString);
assertTrue(mat.matches());
}
public void testAsPredicate() {
String[][] posSeq = {
{ "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
{ "213567", "12324567", "1234567", "213213567",
"21312312312567", "444444567" },
{ "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
{ "213234567", "3458", "0987654", "7689546432", "0398576",
"98432", "5" },
{
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
{ "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
"abbbAbbbliceaaa", "Alice" },
{ "a123", "bnxnvgds156", "for", "while", "if", "struct" },
{ "xy" }, { "xy" }, { "xcy" }
};
for (int i = 0; i < testPatterns.length; i++) {
Pattern p = Pattern.compile(testPatterns[i]);
for (int j = 0; j < posSeq[i].length; j++) {
assertTrue(p.asPredicate().test(posSeq[i][j]));
}
}
}
public void testSplitAsStream() {
Pattern pat;
String[] s;
pat = Pattern.compile("b");
s = pat.splitAsStream("abccbadfebb").toArray(String[]::new);
assertEquals(s.length, 3);
pat = Pattern.compile("b");
s = pat.splitAsStream("").toArray(String[]::new);
assertEquals(s.length, 0);
pat = Pattern.compile("");
s = pat.splitAsStream("").toArray(String[]::new);
assertEquals(s.length, 0);
pat = Pattern.compile("");
s = pat.splitAsStream("abccbadfe").toArray(String[]::new);
assertEquals(s.length, 9);
}
}