blob: bd52e0624025e8e74f7140eb20d6c777d7db05c7 [file] [log] [blame]
* Copyright (C) 2010 The Android Open Source Project
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.ReadOnlyBufferException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
import java.util.Locale;
import junit.framework.TestCase;
public class StringTest extends TestCase {
public void testIsEmpty() {
// The evil decoder keeps hold of the CharBuffer it wrote to.
private static final class EvilCharsetDecoder extends CharsetDecoder {
private static char[] chars;
public EvilCharsetDecoder(Charset cs) {
super(cs, 1.0f, 1.0f);
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
chars = out.array();
int inLength = in.remaining();
for (int i = 0; i < inLength; ++i) {
in.put((byte) 'X');
return CoderResult.UNDERFLOW;
public static void corrupt() {
for (int i = 0; i < chars.length; ++i) {
chars[i] = '$';
// The evil encoder tries to write to the CharBuffer it was given to
// read from.
private static final class EvilCharsetEncoder extends CharsetEncoder {
public EvilCharsetEncoder(Charset cs) {
super(cs, 1.0f, 1.0f);
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
int inLength = in.remaining();
for (int i = 0; i < inLength; ++i) {
out.put((byte) 'y');
return CoderResult.UNDERFLOW;
private static final Charset EVIL_CHARSET = new Charset("evil", null) {
public boolean contains(Charset charset) { return false; }
public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); }
public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); }
public void testGetBytes_MaliciousCharset() {
try {
String s = "hi";
// Check that our encoder can't write to the input CharBuffer
// it was given.
fail(); // We shouldn't have got here!
} catch (ReadOnlyBufferException expected) {
// We caught you trying to be naughty!
public void testString_BII() throws Exception {
byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2));
public void testString_BIIString() throws Exception {
byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, "UTF-8"));
public void testString_BIICharset() throws Exception {
byte[] bytes = "xa\u0666bx".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, Charset.forName("UTF-8")));
public void testString_BCharset() throws Exception {
byte[] bytes = "a\u0666b".getBytes("UTF-8");
assertEquals("a\u0666b", new String(bytes, Charset.forName("UTF-8")));
public void testStringFromCharset_MaliciousCharset() {
Charset cs = EVIL_CHARSET;
byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'};
final String result = new String(bytes, cs);
assertEquals("YY", result); // (Our decoder always outputs 'Y's.)
// Check that even if the decoder messes with the output CharBuffer
// after we've created a string from it, it doesn't affect the string.
assertEquals("YY", result);
public void test_getBytes_bad() throws Exception {
// Check that we use '?' as the replacement byte for invalid characters.
assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII")));
assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII"))));
public void test_getBytes_UTF_8() {
// We have a fast path implementation of String.getBytes for UTF-8.
Charset cs = Charset.forName("UTF-8");
// Test the empty string.
assertEquals("[]", Arrays.toString("".getBytes(cs)));
// Test one-byte characters.
assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs)));
assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs)));
assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs)));
// Test two-byte characters.
assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs)));
assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs)));
assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs)));
assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs)));
// Test three-byte characters.
assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs)));
assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs)));
assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs)));
assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs)));
// Test supplementary characters.
// Minimum supplementary character: U+10000
assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs)));
// Random supplementary character: U+10381 Ugaritic letter beta
assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs)));
// Maximum supplementary character: U+10FFFF
assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs)));
// A high surrogate at end of string is an error replaced with '?'.
assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs)));
// A high surrogate not followed by a low surrogate is an error replaced with '?'.
assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs)));
public void test_new_String_bad() throws Exception {
// Check that we use U+FFFD as the replacement string for invalid bytes.
assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII"));
assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII")));
* Test that strings interned manually and then later loaded as literals
* maintain reference equality. http://b/3098960
public void testInternBeforeLiteralIsLoaded() throws Exception{
String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern();
String literal = (String) Class.forName("$HasLiteral")
assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal));
assertSame(programmatic, literal);
static class HasLiteral {
static String literal = "[5058, 9962, 1563, 5744]";
private static final String COMBINING_DOT_ABOVE = "\u0307";
private static final String LATIN_CAPITAL_I = "I";
private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130";
private static final String LATIN_SMALL_I = "i";
private static final String LATIN_SMALL_DOTLESS_I = "\u0131";
private static final String[] LATIN_I_VARIANTS = {
public void testCaseMapping_tr_TR() {
Locale tr_TR = new Locale("tr", "TR");
assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(tr_TR));
assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(tr_TR));
assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(tr_TR));
assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(tr_TR));
public void testCaseMapping_en_US() {
Locale en_US = new Locale("en", "US");
assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(en_US));
assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(en_US));
assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(en_US));
assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(en_US));
assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(en_US));
// http://b/3325799: the RI fails this because it's using an obsolete version of the Unicode rules.
// Android correctly preserves canonical equivalence. (See the separate test for tr_TR.)
public void testCaseMapping_el() {
Locale el_GR = new Locale("el", "GR");
assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR));
assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR));
assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR));
Locale en_US = new Locale("en", "US");
assertEquals("ΟΔΌΣ ΟΔΌΣ ΣΟ ΣΟ OΣ ΟΣ Σ ἝΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(en_US));
public void testEqualsIgnoreCase_tr_TR() {
testEqualsIgnoreCase(new Locale("tr", "TR"));
public void testEqualsIgnoreCase_en_US() {
testEqualsIgnoreCase(new Locale("en", "US"));
* String.equalsIgnoreCase should not depend on the locale.
private void testEqualsIgnoreCase(Locale locale) {
Locale defaultLocale = Locale.getDefault();
try {
for (String a : LATIN_I_VARIANTS) {
for (String b : LATIN_I_VARIANTS) {
if (!a.equalsIgnoreCase(b)) {
fail("Expected " + a + " to equal " + b + " in " + locale);
} finally {
public void testRegionMatches_ignoreCase_en_US() {
testRegionMatches_ignoreCase(new Locale("en", "US"));
public void testRegionMatches_ignoreCase_tr_TR() {
testRegionMatches_ignoreCase(new Locale("tr", "TR"));
private void testRegionMatches_ignoreCase(Locale locale) {
Locale defaultLocale = Locale.getDefault();
try {
for (String a : LATIN_I_VARIANTS) {
for (String b : LATIN_I_VARIANTS) {
if (!a.regionMatches(true, 0, b, 0, b.length())) {
fail("Expected " + a + " to equal " + b + " in " + locale);
} finally {
public void test_replaceAll() throws Exception {
assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1"));
public void test_23831() throws Exception {
byte[] bytes = { (byte) 0xf5, (byte) 0xa9, (byte) 0xea, (byte) 0x21 };
String expected = "\ufffd\ufffd\u0021";
// Since we use icu4c for CharsetDecoder...
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString());
// Our fast-path code in String should behave the same...
assertEquals(expected, new String(bytes, "UTF-8"));
public void test_55129() throws Exception {
assertEquals("-h-e-l-l-o- -w-o-r-l-d-", "hello world".replace("", "-"));
assertEquals("-w-o-r-l-d-", "hello world".substring(6).replace("", "-"));
assertEquals("-*-w-*-o-*-r-*-l-*-d-*-", "hello world".substring(6).replace("", "-*-"));
public void test_String_getBytes() throws Exception {
// http://b/11571917
assertEquals("[-126, -96]", Arrays.toString("あ".getBytes("Shift_JIS")));
assertEquals("[-126, -87]", Arrays.toString("か".getBytes("Shift_JIS")));
assertEquals("[-105, 67]", Arrays.toString("佑".getBytes("Shift_JIS")));
assertEquals("[36]", Arrays.toString("$".getBytes("Shift_JIS")));
assertEquals("[-29, -127, -117]", Arrays.toString("か".getBytes("UTF-8")));
// http://b/11639117
assertEquals("[-79, -72, -70, -48]", Arrays.toString("구분".getBytes("EUC-KR")));
assertEquals("[-77, -10, -64, -76, -63, -53]", Arrays.toString("出来了".getBytes("gbk")));
assertEquals("[-77, -10, -64, -76]", Arrays.toString("出来".getBytes("gbk")));
assertEquals("[-77, -10]", Arrays.toString("出".getBytes("gbk")));
public void test_compareTo() throws Exception {
// For strings where a character differs, the result is
// the difference between the characters.
assertEquals(-1, "a".compareTo("b"));
assertEquals(-2, "a".compareTo("c"));
assertEquals(1, "b".compareTo("a"));
assertEquals(2, "c".compareTo("a"));
// For strings where the characters match up to the length of the shorter,
// the result is the difference between the strings' lengths.
assertEquals(0, "a".compareTo("a"));
assertEquals(-1, "a".compareTo("aa"));
assertEquals(-1, "a".compareTo("az"));
assertEquals(-2, "a".compareTo("aaa"));
assertEquals(-2, "a".compareTo("azz"));
assertEquals(-3, "a".compareTo("aaaa"));
assertEquals(-3, "a".compareTo("azzz"));
assertEquals(0, "a".compareTo("a"));
assertEquals(1, "aa".compareTo("a"));
assertEquals(1, "az".compareTo("a"));
assertEquals(2, "aaa".compareTo("a"));
assertEquals(2, "azz".compareTo("a"));
assertEquals(3, "aaaa".compareTo("a"));
assertEquals(3, "azzz".compareTo("a"));
public void test_compareToIgnoreCase() throws Exception {
// For strings where a character differs, the result is
// the difference between the characters.
assertEquals(-1, "a".compareToIgnoreCase("b"));
assertEquals(-1, "a".compareToIgnoreCase("B"));
assertEquals(-2, "a".compareToIgnoreCase("c"));
assertEquals(-2, "a".compareToIgnoreCase("C"));
assertEquals(1, "b".compareToIgnoreCase("a"));
assertEquals(1, "B".compareToIgnoreCase("a"));
assertEquals(2, "c".compareToIgnoreCase("a"));
assertEquals(2, "C".compareToIgnoreCase("a"));
// For strings where the characters match up to the length of the shorter,
// the result is the difference between the strings' lengths.
assertEquals(0, "a".compareToIgnoreCase("a"));
assertEquals(0, "a".compareToIgnoreCase("A"));
assertEquals(0, "A".compareToIgnoreCase("a"));
assertEquals(0, "A".compareToIgnoreCase("A"));
assertEquals(-1, "a".compareToIgnoreCase("aa"));
assertEquals(-1, "a".compareToIgnoreCase("aA"));
assertEquals(-1, "a".compareToIgnoreCase("Aa"));
assertEquals(-1, "a".compareToIgnoreCase("az"));
assertEquals(-1, "a".compareToIgnoreCase("aZ"));
assertEquals(-2, "a".compareToIgnoreCase("aaa"));
assertEquals(-2, "a".compareToIgnoreCase("AAA"));
assertEquals(-2, "a".compareToIgnoreCase("azz"));
assertEquals(-2, "a".compareToIgnoreCase("AZZ"));
assertEquals(-3, "a".compareToIgnoreCase("aaaa"));
assertEquals(-3, "a".compareToIgnoreCase("AAAA"));
assertEquals(-3, "a".compareToIgnoreCase("azzz"));
assertEquals(-3, "a".compareToIgnoreCase("AZZZ"));
assertEquals(1, "aa".compareToIgnoreCase("a"));
assertEquals(1, "aA".compareToIgnoreCase("a"));
assertEquals(1, "Aa".compareToIgnoreCase("a"));
assertEquals(1, "az".compareToIgnoreCase("a"));
assertEquals(2, "aaa".compareToIgnoreCase("a"));
assertEquals(2, "azz".compareToIgnoreCase("a"));
assertEquals(3, "aaaa".compareToIgnoreCase("a"));
assertEquals(3, "azzz".compareToIgnoreCase("a"));