blob: c440ebe1bae5b7cd3f478250676fdc7e6760c0ca [file] [log] [blame]
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package libcore.java.lang;
import android.icu.lang.UCharacter;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.ReadOnlyBufferException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.ModifiedUtf8;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Locale;
import junit.framework.TestCase;
public class StringTest extends TestCase {
public void testIsEmpty() {
assertTrue("".isEmpty());
assertFalse("x".isEmpty());
}
// The evil decoder keeps hold of the CharBuffer it wrote to.
private static final class EvilCharsetDecoder extends CharsetDecoder {
private static char[] chars;
public EvilCharsetDecoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
chars = out.array();
int inLength = in.remaining();
for (int i = 0; i < inLength; ++i) {
in.put((byte) 'X');
out.put('Y');
}
return CoderResult.UNDERFLOW;
}
public static void corrupt() {
for (int i = 0; i < chars.length; ++i) {
chars[i] = '$';
}
}
}
// The evil encoder tries to write to the CharBuffer it was given to
// read from.
private static final class EvilCharsetEncoder extends CharsetEncoder {
public EvilCharsetEncoder(Charset cs) {
super(cs, 1.0f, 1.0f);
}
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
int inLength = in.remaining();
for (int i = 0; i < inLength; ++i) {
in.put('x');
out.put((byte) 'y');
}
return CoderResult.UNDERFLOW;
}
}
private static final Charset EVIL_CHARSET = new Charset("evil", null) {
public boolean contains(Charset charset) { return false; }
public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); }
public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); }
};
public void testGetBytes_MaliciousCharset() {
try {
String s = "hi";
// Check that our encoder can't write to the input CharBuffer
// it was given.
s.getBytes(EVIL_CHARSET);
fail(); // We shouldn't have got here!
} catch (ReadOnlyBufferException expected) {
// We caught you trying to be naughty!
}
}
public void testString_BII() throws Exception {
byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2));
}
public void testString_BIIString() throws Exception {
byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, "UTF-8"));
}
public void testString_BIICharset() throws Exception {
byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, Charset.forName("UTF-8")));
}
public void testString_BCharset() throws Exception {
byte[] bytes = "a\u0666b".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, Charset.forName("UTF-8")));
}
public void testStringFromCharset_MaliciousCharset() {
Charset cs = EVIL_CHARSET;
byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'};
final String result = new String(bytes, cs);
assertEquals("YY", result); // (Our decoder always outputs 'Y's.)
// Check that even if the decoder messes with the output CharBuffer
// after we've created a string from it, it doesn't affect the string.
EvilCharsetDecoder.corrupt();
assertEquals("YY", result);
}
public void test_getBytes_bad() throws Exception {
// Check that we use '?' as the replacement byte for invalid characters.
assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII")));
assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII"))));
}
public void test_getBytes_UTF_8() {
// We have a fast path implementation of String.getBytes for UTF-8.
Charset cs = Charset.forName("UTF-8");
// Test the empty string.
assertEquals("[]", Arrays.toString("".getBytes(cs)));
// Test one-byte characters.
assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs)));
assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs)));
assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs)));
// Test two-byte characters.
assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs)));
assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs)));
assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs)));
assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs)));
// Test three-byte characters.
assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs)));
assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs)));
assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs)));
assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs)));
// Test supplementary characters.
// Minimum supplementary character: U+10000
assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs)));
// Random supplementary character: U+10381 Ugaritic letter beta
assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs)));
// Maximum supplementary character: U+10FFFF
assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs)));
// A high surrogate at end of string is an error replaced with '?'.
assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs)));
// A high surrogate not followed by a low surrogate is an error replaced with '?'.
assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs)));
}
public void test_new_String_bad() throws Exception {
// Check that we use U+FFFD as the replacement string for invalid bytes.
assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII"));
assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII")));
}
/**
* Test that strings interned manually and then later loaded as literals
* maintain reference equality. http://b/3098960
*/
public void testInternBeforeLiteralIsLoaded() throws Exception{
String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern();
String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral")
.getDeclaredField("literal").get(null);
assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal));
assertSame(programmatic, literal);
}
static class HasLiteral {
static String literal = "[5058, 9962, 1563, 5744]";
}
private static final String COMBINING_DOT_ABOVE = "\u0307";
private static final String LATIN_CAPITAL_I = "I";
private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130";
private static final String LATIN_SMALL_I = "i";
private static final String LATIN_SMALL_DOTLESS_I = "\u0131";
private static final String[] LATIN_I_VARIANTS = {
LATIN_SMALL_I,
LATIN_SMALL_DOTLESS_I,
LATIN_CAPITAL_I,
LATIN_CAPITAL_I_WITH_DOT_ABOVE,
};
public void testCaseMapping_tr_TR() {
Locale tr_TR = new Locale("tr", "TR");
assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(tr_TR));
assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(tr_TR));
assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(tr_TR));
assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(tr_TR));
assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(tr_TR));
assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(tr_TR));
assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(tr_TR));
assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(tr_TR));
}
public void testCaseMapping_en_US() {
Locale en_US = new Locale("en", "US");
assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(en_US));
assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(en_US));
assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(en_US));
assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(en_US));
assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(en_US));
assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(en_US));
assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(en_US));
// http://b/3325799: the RI fails this because it's using an obsolete version of the Unicode rules.
// Android correctly preserves canonical equivalence. (See the separate test for tr_TR.)
assertEquals(LATIN_SMALL_I + COMBINING_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(en_US));
}
public void testCaseMapping_el() {
Locale el_GR = new Locale("el", "GR");
assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR));
assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR));
assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR));
Locale en_US = new Locale("en", "US");
assertEquals("ΟΔΌΣ ΟΔΌΣ ΣΟ ΣΟ OΣ ΟΣ Σ ἝΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(en_US));
}
public void testEqualsIgnoreCase_tr_TR() {
testEqualsIgnoreCase(new Locale("tr", "TR"));
}
public void testEqualsIgnoreCase_en_US() {
testEqualsIgnoreCase(new Locale("en", "US"));
}
/**
* String.equalsIgnoreCase should not depend on the locale.
*/
private void testEqualsIgnoreCase(Locale locale) {
Locale defaultLocale = Locale.getDefault();
Locale.setDefault(locale);
try {
for (String a : LATIN_I_VARIANTS) {
for (String b : LATIN_I_VARIANTS) {
if (!a.equalsIgnoreCase(b)) {
fail("Expected " + a + " to equal " + b + " in " + locale);
}
}
}
} finally {
Locale.setDefault(defaultLocale);
}
}
public void testRegionMatches_ignoreCase_en_US() {
testRegionMatches_ignoreCase(new Locale("en", "US"));
}
public void testRegionMatches_ignoreCase_tr_TR() {
testRegionMatches_ignoreCase(new Locale("tr", "TR"));
}
private void testRegionMatches_ignoreCase(Locale locale) {
Locale defaultLocale = Locale.getDefault();
Locale.setDefault(locale);
try {
for (String a : LATIN_I_VARIANTS) {
for (String b : LATIN_I_VARIANTS) {
if (!a.regionMatches(true, 0, b, 0, b.length())) {
fail("Expected " + a + " to equal " + b + " in " + locale);
}
}
}
} finally {
Locale.setDefault(defaultLocale);
}
}
// http://code.google.com/p/android/issues/detail?id=15266
public void test_replaceAll() throws Exception {
assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1"));
}
// Test that CharsetDecoder and fast-path decoder are consistent when handling ill-formed
// sequence. http://b/69599767
// This test was originally created for the bug
// https://code.google.com/p/android/issues/detail?id=23831
public void test_69599767() throws Exception {
byte[] bytes = { (byte) 0xf5, (byte) 0xa9, (byte) 0xea, (byte) 0x21 };
String expected = "\ufffd\ufffd\ufffd\u0021";
// Since we use ICU4C for CharsetDecoder...
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString());
// Our fast-path code in String should behave the same...
assertEquals(expected, new String(bytes, "UTF-8"));
}
public void testFastPathString_wellFormedUtf8Sequence() throws Exception {
// U+0000 null
assertFastPathUtf8DecodedEquals("\u0000", "00");
// U+0031 ASCII char '1'
assertFastPathUtf8DecodedEquals("1", "31");
// U+007f
assertFastPathUtf8DecodedEquals("\u007f", "7f");
// 2-byte UTF-8 sequence
assertFastPathUtf8DecodedEquals("\u0080", "c2 80");
assertFastPathUtf8DecodedEquals("\u07ff", "df bf");
// 3-byte UTF-8 sequence
assertFastPathUtf8DecodedEquals("\u0800", "e0 a0 80");
assertFastPathUtf8DecodedEquals("\ud7ff", "ed 9f bf"); // last code point before surrogate
assertFastPathUtf8DecodedEquals("\ue000", "ee 80 80"); // first code point after surrogate
assertFastPathUtf8DecodedEquals("\uffff", "ef bf bf");
// U+10000 The minimum value of a Unicode supplementary code point
assertEquals("\ud800\udc00", String.valueOf(Character.toChars(0x10000)));
assertFastPathUtf8DecodedEquals("\ud800\udc00", "f0 90 80 80");
// U+10ffff The maximum value of a Unicode code point
assertEquals("\udbff\udfff", String.valueOf(Character.toChars(0x10ffff)));
assertFastPathUtf8DecodedEquals("\udbff\udfff", "f4 8f bf bf");
// Null in the middle
assertFastPathUtf8DecodedEquals("1\u00002\u07ff", "31 00 32 df bf");
assertFastPathUtf8DecodedEquals("\u0800\udbff\udfff\uffff1\u0080",
"e0 a0 80 f4 8f bf bf ef bf bf 31 c2 80");
// Check UTF8 sequences of all code points is decoded correctly.
// Validate the decoder using byte sequence generated by UTF-8 encoder.
for (int codePoint = Character.MIN_CODE_POINT;
codePoint <= Character.MAX_CODE_POINT;
codePoint++) {
if (codePoint < Character.MIN_SURROGATE || codePoint > Character.MAX_SURROGATE) {
String expected = UCharacter.toString(codePoint);
// Android platform default is always UTF-8.
byte[] utf8Bytes = expected.getBytes();
assertEquals(expected, new String(utf8Bytes));
}
}
}
public void testFastPathString_illFormedUtf8Sequence() throws Exception {
// Overlong Sequence of ASCII char '1'
assertFastPathUtf8DecodedEquals("\ufffd\ufffd", "c0 b1");
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "e0 80 b1");
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd", "f0 80 80 b1");
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd", "f8 80 80 80 b1");
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd", "fc 80 80 80 80 b1");
// Overlong null \u0000
// "c0 80" is a Modified UTF-8 sequence representing \u0000, but illegal in UTF-8.
assertEquals("\u0000", decodeModifiedUTF8("c0 80"));
assertFastPathUtf8DecodedEquals("\ufffd\ufffd", "c0 80");
// Overlong BMP char U+0080. The correct UTF-8 encoded form of U+0080 is 2-byte "c2 80".
// The overlong form can be obtained by filling 0x80 into 1110xxxx 10xxxxxx 10xxxxxx
// == 1110000 10000010 10000000. (hex form e0 82 80)
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "e0 82 80");
// Overlong Supplementary Characters U+10000.
// The correct UTF-8 encoded form of U+10000 is 4-byte "f0 90 80 80".
// The overlong form can be obtained by filling 0x10000 into
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
// == 1110000 10000000 10010000 10000000 10000000. (hex form f8 80 90 80 80)
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd", "f8 80 90 80 80");
// Single surrogate in CESU-8 encoding
// A CESU-8 sequence, but illegal in UTF-8.
assertEquals("\ud800", decodeCESU8("ed a0 80"));
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd", "ed a0 80");
// Surrogate pair in CESU-8 encoding. The value is bytes U+10000
// Assert the bytes are valid CESU-8 sequence before decoding using UTF-8
String surrogatePair = decodeCESU8("ed a0 80 ed b0 80");
assertEquals("\ud800\udc00", surrogatePair);
assertEquals(0x10000, Character.codePointAt(surrogatePair.toCharArray(), 0));
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd",
"ed a0 80 ed b0 80");
// Illegal first-byte
assertFastPathUtf8DecodedEquals("\ufffd", "c0");
assertFastPathUtf8DecodedEquals("\ufffd", "80");
// Maximal valid subpart. byte 0x31 should be decoded into ASCII char '1', not part of
// ill-formed byte sequence
assertFastPathUtf8DecodedEquals("\ufffd1", "c2 31");
assertFastPathUtf8DecodedEquals("\ufffd1", "e1 31");
assertFastPathUtf8DecodedEquals("\ufffd1", "e1 80 31");
assertFastPathUtf8DecodedEquals("\ufffd1", "f1 31");
assertFastPathUtf8DecodedEquals("\ufffd1", "f1 80 31");
assertFastPathUtf8DecodedEquals("\ufffd1", "f1 80 80 31");;
// Ill-formed sequence in the end of stream
assertFastPathUtf8DecodedEquals("1\ufffd", "31 c2");
assertFastPathUtf8DecodedEquals("1\ufffd", "31 e1");
assertFastPathUtf8DecodedEquals("1\ufffd", "31 e1 80");
assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1");
assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1 80");
assertFastPathUtf8DecodedEquals("1\ufffd", "31 f1 80 80");
// Test lower and upper bound of first trail byte when leading byte is e0/ed/f0/f4
// Valid range of trail byte is A0..BF.
assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 e0 9f");
assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 e0 c0");
// Valid range of trail byte is 80..9F.
assertFastPathUtf8DecodedEquals("1\ufffd\u007f", "31 ed 7f");
assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 ed a0");
// Valid range of trail byte is 90..BF.
assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f0 8f");
assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f0 c0");
// Valid range of trail byte is 80..8F.
assertFastPathUtf8DecodedEquals("1\ufffd\u007f", "31 f4 7f");
assertFastPathUtf8DecodedEquals("1\ufffd\ufffd", "31 f4 90");
// More ill-formed sequences
assertFastPathUtf8DecodedEquals("\ufffd\ufffd1", "f1 80 80 e1 80 31");
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd1", "f1 80 80 c0 b1 31");
assertFastPathUtf8DecodedEquals("\ufffd\ufffd\ufffd1", "f1 80 80 ed a0 31");
assertFastPathUtf8DecodedEquals("A\ufffd\ufffdA\ufffdA", "41 C0 AF 41 F4 80 80 41");
}
private void assertFastPathUtf8DecodedEquals(String expected, String hexString)
throws Exception {
String actual = new String(hexStringtoBytes(hexString));
assertEquals("Fast-path UTF-8 decoder decodes sequence [" + hexString
+ "] into unexpected String",
expected, actual);
// Since we use ICU4C for CharsetDecoder,
// check UTF-8 CharsetDecoder has the same result as the fast-path decoder
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE);
assertEquals("Fast-path UTF-8 decoder and UTF-8 CharsetDecoder has a different conversion"
+ " result for sequence [" + hexString + "]",
decoder.decode(ByteBuffer.wrap(hexStringtoBytes(hexString))).toString(), actual);
}
private static String decodeCESU8(String hexString) throws IOException {
CharsetDecoder cesu8Decoder = Charset.forName("CESU-8").newDecoder();
return cesu8Decoder.decode(ByteBuffer.wrap(hexStringtoBytes(hexString))).toString();
}
private static String decodeModifiedUTF8(String hexString) throws IOException {
byte[] bytes = hexStringtoBytes(hexString);
// DataInputStream stores length as 2-byte short. Check the length before decoding
if (bytes.length > 0xffff) {
throw new IllegalArgumentException("Modified UTF-8 bytes are too long.");
}
byte[] buf = new byte[bytes.length + 2];
buf[0] = (byte)(bytes.length >>> 8);
buf[1] = (byte) bytes.length;
System.arraycopy(bytes, 0, buf, 2, bytes.length);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf));
return dis.readUTF();
}
private static byte[] hexStringtoBytes(String input) {
String[] parts = input.split(" ");
byte[] bytes = new byte[parts.length];
for (int i = 0; i < parts.length; i++) {
int val = Integer.parseInt(parts[i], 16);
if (val < 0 || val > 255) {
throw new IllegalArgumentException();
}
bytes[i] = (byte) (0xff & val);
}
return bytes;
}
// https://code.google.com/p/android/issues/detail?id=55129
public void test_55129() throws Exception {
assertEquals("-h-e-l-l-o- -w-o-r-l-d-", "hello world".replace("", "-"));
assertEquals("-w-o-r-l-d-", "hello world".substring(6).replace("", "-"));
assertEquals("-*-w-*-o-*-r-*-l-*-d-*-", "hello world".substring(6).replace("", "-*-"));
// Replace on an empty string with an empty target should insert the pattern
// precisely once.
assertEquals("", "".replace("", ""));
assertEquals("food", "".replace("", "food"));
}
public void test_replace() {
// Replace on an empty string is a no-op.
assertEquals("", "".replace("foo", "bar"));
// Replace on a string which doesn't contain the target sequence is a no-op.
assertEquals("baz", "baz".replace("foo", "bar"));
// Test that we iterate forward on the string.
assertEquals("mmmba", "bababa".replace("baba", "mmm"));
// Test replacements at the end of the string.
assertEquals("foodie", "foolish".replace("lish", "die"));
// Test a string that has multiple replacements.
assertEquals("hahahaha", "kkkk".replace("k", "ha"));
}
public void test_String_getBytes() throws Exception {
// http://b/11571917
assertEquals("[-126, -96]", Arrays.toString("あ".getBytes("Shift_JIS")));
assertEquals("[-126, -87]", Arrays.toString("か".getBytes("Shift_JIS")));
assertEquals("[-105, 67]", Arrays.toString("佑".getBytes("Shift_JIS")));
assertEquals("[36]", Arrays.toString("$".getBytes("Shift_JIS")));
assertEquals("[-29, -127, -117]", Arrays.toString("か".getBytes("UTF-8")));
// http://b/11639117
assertEquals("[-79, -72, -70, -48]", Arrays.toString("구분".getBytes("EUC-KR")));
// https://code.google.com/p/android/issues/detail?id=63188
assertEquals("[-77, -10, -64, -76, -63, -53]", Arrays.toString("出来了".getBytes("gbk")));
assertEquals("[-77, -10, -64, -76]", Arrays.toString("出来".getBytes("gbk")));
assertEquals("[-77, -10]", Arrays.toString("出".getBytes("gbk")));
}
public void test_compareTo() throws Exception {
// For strings where a character differs, the result is
// the difference between the characters.
assertEquals(-1, "a".compareTo("b"));
assertEquals(-2, "a".compareTo("c"));
assertEquals(1, "b".compareTo("a"));
assertEquals(2, "c".compareTo("a"));
// For strings where the characters match up to the length of the shorter,
// the result is the difference between the strings' lengths.
assertEquals(0, "a".compareTo("a"));
assertEquals(-1, "a".compareTo("aa"));
assertEquals(-1, "a".compareTo("az"));
assertEquals(-2, "a".compareTo("aaa"));
assertEquals(-2, "a".compareTo("azz"));
assertEquals(-3, "a".compareTo("aaaa"));
assertEquals(-3, "a".compareTo("azzz"));
assertEquals(0, "a".compareTo("a"));
assertEquals(1, "aa".compareTo("a"));
assertEquals(1, "az".compareTo("a"));
assertEquals(2, "aaa".compareTo("a"));
assertEquals(2, "azz".compareTo("a"));
assertEquals(3, "aaaa".compareTo("a"));
assertEquals(3, "azzz".compareTo("a"));
}
public void test_compareToIgnoreCase() throws Exception {
// For strings where a character differs, the result is
// the difference between the characters.
assertEquals(-1, "a".compareToIgnoreCase("b"));
assertEquals(-1, "a".compareToIgnoreCase("B"));
assertEquals(-2, "a".compareToIgnoreCase("c"));
assertEquals(-2, "a".compareToIgnoreCase("C"));
assertEquals(1, "b".compareToIgnoreCase("a"));
assertEquals(1, "B".compareToIgnoreCase("a"));
assertEquals(2, "c".compareToIgnoreCase("a"));
assertEquals(2, "C".compareToIgnoreCase("a"));
// For strings where the characters match up to the length of the shorter,
// the result is the difference between the strings' lengths.
assertEquals(0, "a".compareToIgnoreCase("a"));
assertEquals(0, "a".compareToIgnoreCase("A"));
assertEquals(0, "A".compareToIgnoreCase("a"));
assertEquals(0, "A".compareToIgnoreCase("A"));
assertEquals(-1, "a".compareToIgnoreCase("aa"));
assertEquals(-1, "a".compareToIgnoreCase("aA"));
assertEquals(-1, "a".compareToIgnoreCase("Aa"));
assertEquals(-1, "a".compareToIgnoreCase("az"));
assertEquals(-1, "a".compareToIgnoreCase("aZ"));
assertEquals(-2, "a".compareToIgnoreCase("aaa"));
assertEquals(-2, "a".compareToIgnoreCase("AAA"));
assertEquals(-2, "a".compareToIgnoreCase("azz"));
assertEquals(-2, "a".compareToIgnoreCase("AZZ"));
assertEquals(-3, "a".compareToIgnoreCase("aaaa"));
assertEquals(-3, "a".compareToIgnoreCase("AAAA"));
assertEquals(-3, "a".compareToIgnoreCase("azzz"));
assertEquals(-3, "a".compareToIgnoreCase("AZZZ"));
assertEquals(1, "aa".compareToIgnoreCase("a"));
assertEquals(1, "aA".compareToIgnoreCase("a"));
assertEquals(1, "Aa".compareToIgnoreCase("a"));
assertEquals(1, "az".compareToIgnoreCase("a"));
assertEquals(2, "aaa".compareToIgnoreCase("a"));
assertEquals(2, "azz".compareToIgnoreCase("a"));
assertEquals(3, "aaaa".compareToIgnoreCase("a"));
assertEquals(3, "azzz".compareToIgnoreCase("a"));
}
// http://b/25943996
public void testSplit_trailingSeparators() {
String[] splits = "test\0message\0\0\0\0\0\0".split("\0", -1);
assertEquals("test", splits[0]);
assertEquals("message", splits[1]);
assertEquals("", splits[2]);
assertEquals("", splits[3]);
assertEquals("", splits[4]);
assertEquals("", splits[5]);
assertEquals("", splits[6]);
assertEquals("", splits[7]);
}
// http://b/63745717
// A buffer overflow bug was found in ICU4C. A native crash occurs only when ASAN is enabled.
public void testSplit_lookBehind() {
String string = "a";
String[] words = string.split("(?<!(^|[A-Z]))(?=[A-Z])|(?<!^)(?=[A-Z][a-z])| |_|-");
assertEquals(1, words.length);
assertEquals(string, words[0]);
}
// http://b/26126818
public void testCodePointCount() {
String hello = "Hello, fools";
assertEquals(5, hello.codePointCount(0, 5));
assertEquals(7, hello.codePointCount(5, 12));
assertEquals(2, hello.codePointCount(10, 12));
}
// http://b/26444984
public void testGetCharsOverflow() {
int srcBegin = Integer.MAX_VALUE; //2147483647
int srcEnd = srcBegin + 10; //-2147483639
try {
// The output array size must be larger than |srcEnd - srcBegin|.
"yes".getChars(srcBegin, srcEnd, new char[256], 0);
fail();
} catch (StringIndexOutOfBoundsException expected) {
}
}
// http://b/28998511
public void testGetCharsBoundsChecks() {
// This is the explicit case from the bug: dstBegin == srcEnd - srcBegin
assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[0], -4);
// Some valid cases.
char[] dst = new char[1];
"abcd".getChars(0, 1, dst, 0);
assertEquals('a', dst[0]);
"abcd".getChars(3, 4, dst, 0);
assertEquals('d', dst[0]);
dst = new char[4];
"abcd".getChars(0, 4, dst, 0);
assertTrue(Arrays.equals("abcd".toCharArray(), dst));
// Zero length src.
"abcd".getChars(0, 0, new char[0], 0); // dstBegin == 0 is ok if copying zero chars
"abcd".getChars(0, 0, new char[1], 1); // dstBegin == 1 is ok if copying zero chars
"".getChars(0, 0, new char[0], 0);
"abcd".getChars(1, 1, new char[1], 0);
"abcd".getChars(1, 1, new char[1], 1);
// Valid src args, invalid dst args.
assertGetCharsThrowsAIOOBException("abcd", 3, 4, new char[1], 1); // Out of range dstBegin
assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[3], 0); // Small dst
assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], -1); // Negative dstBegin
// dstBegin + (srcEnd - srcBegin) -> integer overflow OR dstBegin >= dst.length
assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], Integer.MAX_VALUE - 1);
// Invalid src args, valid dst args.
assertGetCharsThrowsSIOOBException("abcd", 2, 1, new char[4], 0); // srcBegin > srcEnd
assertGetCharsThrowsSIOOBException("abcd", -1, 3, new char[4], 0); // Negative srcBegin
assertGetCharsThrowsSIOOBException("abcd", 0, 5, new char[4], 0); // Out of range srcEnd
assertGetCharsThrowsSIOOBException("abcd", 0, -1, new char[4], 0); // Negative srcEnd
// Valid src args, invalid dst args.
assertGetCharsThrowsAIOOBException("abcd", 0, 4, new char[4], 1); // Bad dstBegin
// Zero length src copy, invalid dst args.
assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[4], -1); // Negative dstBegin
assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[0], 1); // Out of range dstBegin
assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[1], 2); // Out of range dstBegin
assertGetCharsThrowsAIOOBException("abcd", 0, 0, new char[4], 5); // Out of range dstBegin
}
private static void assertGetCharsThrowsAIOOBException(String s, int srcBegin, int srcEnd,
char[] dst, int dstBegin) {
try {
s.getChars(srcBegin, srcEnd, dst, dstBegin);
fail();
} catch (ArrayIndexOutOfBoundsException expected) {
}
}
private static void assertGetCharsThrowsSIOOBException(String s, int srcBegin, int srcEnd,
char[] dst, int dstBegin) {
try {
s.getChars(srcBegin, srcEnd, dst, dstBegin);
fail();
} catch (StringIndexOutOfBoundsException expected) {
}
}
public void testChars() {
String s = "Hello\n\tworld";
int[] expected = new int[s.length()];
for (int i = 0; i < s.length(); ++i) {
expected[i] = (int) s.charAt(i);
}
assertTrue(Arrays.equals(expected, s.chars().toArray()));
// Surrogate code point
char high = '\uD83D', low = '\uDE02';
String surrogateCP = new String(new char[]{high, low, low});
assertTrue(Arrays.equals(new int[]{high, low, low}, surrogateCP.chars().toArray()));
}
public void testCodePoints() {
String s = "Hello\n\tworld";
int[] expected = new int[s.length()];
for (int i = 0; i < s.length(); ++i) {
expected[i] = (int) s.charAt(i);
}
assertTrue(Arrays.equals(expected, s.codePoints().toArray()));
// Surrogate code point
char high = '\uD83D', low = '\uDE02';
String surrogateCP = new String(new char[]{high, low, low, '0'});
assertEquals(Character.toCodePoint(high, low), surrogateCP.codePoints().toArray()[0]);
assertEquals((int) low, surrogateCP.codePoints().toArray()[1]); // Unmatched surrogate.
assertEquals((int) '0', surrogateCP.codePoints().toArray()[2]);
}
public void testJoin_CharSequenceArray() {
assertEquals("", String.join("-"));
assertEquals("", String.join("-", ""));
assertEquals("foo", String.join("-", "foo"));
assertEquals("foo---bar---boo", String.join("---", "foo", "bar", "boo"));
assertEquals("foobarboo", String.join("", "foo", "bar", "boo"));
assertEquals("null-null", String.join("-", null, null));
assertEquals("¯\\_(ツ)_/¯", String.join("(ツ)", "¯\\_", "_/¯"));
}
public void testJoin_CharSequenceArray_NPE() {
try {
String.join(null, "foo", "bar");
fail();
} catch (NullPointerException expected) {}
}
public void testJoin_Iterable() {
ArrayList<String> iterable = new ArrayList<>();
assertEquals("", String.join("-", iterable));
iterable.add("foo");
assertEquals("foo", String.join("-", iterable));
iterable.add("bar");
assertEquals("foo...bar", String.join("...", iterable));
iterable.add("foo");
assertEquals("foo-bar-foo", String.join("-", iterable));
assertEquals("foobarfoo", String.join("", iterable));
}
public void testJoin_Iterable_NPE() {
try {
String.join(null, new ArrayList<String>());
fail();
} catch (NullPointerException expected) {}
try {
String.join("-", (Iterable<String>)null);
fail();
} catch (NullPointerException expected) {}
}
/**
* Check that String.format() does not throw when the default locale is invalid.
* http://b/129070579
*/
public void testFormat_invalidLocale() {
Locale defaultLocale = Locale.getDefault();
try {
Locale.setDefault(new Locale("invalidLocale"));
String.format("%s", "");
} finally {
Locale.setDefault(defaultLocale);
}
}
}