Fix String.toLowerCase and toUpperCase.
Rather than try to cope with Lithuanian, let's just hand that one to ICU4C.
I've removed my hand-crafted Azeri/Turkish lowercasing too, in favor of ICU.
Presence of a high surrogate (which implies a supplemental character) is a
good reason to hand over to ICU too.
On the uppercasing side, I've kept our existing hard-coded table and just
added code to defer to ICU for Azeri, Lithuanian, and Turkish (plus
supplemental characters). I don't like the tables, but I don't have proof
that they're incorrect.
Bug: 2340628
Change-Id: I36b556b0444623a5aacc1afc58ebb4d84211d3dc
diff --git a/libcore/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java b/libcore/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
index dc351f4..08fe26a 100644
--- a/libcore/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
+++ b/libcore/icu/src/main/java/com/ibm/icu4jni/lang/UCharacter.java
@@ -42,6 +42,8 @@
public static native int toLowerCase(int codePoint);
public static native int toTitleCase(int codePoint);
public static native int toUpperCase(int codePoint);
+ public static native String toLowerCase(String s, String localeName);
+ public static native String toUpperCase(String s, String localeName);
public static UnicodeBlock[] getBlockTable() {
/**
diff --git a/libcore/icu/src/main/native/NativeIDN.cpp b/libcore/icu/src/main/native/NativeIDN.cpp
index 5ce3e94..72afc74 100644
--- a/libcore/icu/src/main/native/NativeIDN.cpp
+++ b/libcore/icu/src/main/native/NativeIDN.cpp
@@ -36,7 +36,7 @@
const size_t srcLength = sus.unicodeString().length();
UChar dst[256];
UErrorCode status = U_ZERO_ERROR;
- int32_t resultLength = toAscii
+ size_t resultLength = toAscii
? uidna_IDNToASCII(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status)
: uidna_IDNToUnicode(src, srcLength, &dst[0], sizeof(dst), flags, NULL, &status);
if (U_FAILURE(status)) {
diff --git a/libcore/icu/src/main/native/Resources.cpp b/libcore/icu/src/main/native/Resources.cpp
index e4138b1..ba363fe 100644
--- a/libcore/icu/src/main/native/Resources.cpp
+++ b/libcore/icu/src/main/native/Resources.cpp
@@ -17,6 +17,7 @@
#define LOG_TAG "Resources"
#include "JNIHelp.h"
#include "AndroidSystemNatives.h"
+#include "ScopedUtfChars.h"
#include "cutils/log.h"
#include "unicode/numfmt.h"
#include "unicode/locid.h"
@@ -67,11 +68,8 @@
void operator=(const ScopedResourceBundle&);
};
-static Locale getLocale(JNIEnv* env, jstring locale) {
- const char* name = env->GetStringUTFChars(locale, NULL);
- Locale result = Locale::createFromName(name);
- env->ReleaseStringUTFChars(locale, name);
- return result;
+static Locale getLocale(JNIEnv* env, jstring localeName) {
+ return Locale::createFromName(ScopedUtfChars(env, localeName).data());
}
static jint getCurrencyFractionDigitsNative(JNIEnv* env, jclass clazz, jstring currencyCode) {
diff --git a/libcore/icu/src/main/native/ScopedJavaUnicodeString.h b/libcore/icu/src/main/native/ScopedJavaUnicodeString.h
index 69726fb..b108a6b 100644
--- a/libcore/icu/src/main/native/ScopedJavaUnicodeString.h
+++ b/libcore/icu/src/main/native/ScopedJavaUnicodeString.h
@@ -22,9 +22,7 @@
// A smart pointer that provides access to an ICU UnicodeString given a JNI
// jstring. We give ICU a direct pointer to the characters on the Java heap.
-// It's clever enough to copy-on-write if necessary, but we only provide
-// const UnicodeString access anyway because attempted write access seems
-// likely to be an error.
+// It's clever enough to copy-on-write if necessary.
class ScopedJavaUnicodeString {
public:
ScopedJavaUnicodeString(JNIEnv* env, jstring s) : mEnv(env), mString(s) {
@@ -37,7 +35,11 @@
mEnv->ReleaseStringChars(mString, mChars);
}
- const UnicodeString& unicodeString() {
+ const UnicodeString& unicodeString() const {
+ return mUnicodeString;
+ }
+
+ UnicodeString& unicodeString() {
return mUnicodeString;
}
diff --git a/libcore/icu/src/main/native/UCharacter.cpp b/libcore/icu/src/main/native/UCharacter.cpp
index 3fd8151..9856a1a 100644
--- a/libcore/icu/src/main/native/UCharacter.cpp
+++ b/libcore/icu/src/main/native/UCharacter.cpp
@@ -1,12 +1,12 @@
/*
* Copyright (C) 2006 The Android Open Source Project
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,6 +16,9 @@
#include "JNIHelp.h"
#include "AndroidSystemNatives.h"
+#include "ScopedJavaUnicodeString.h"
+#include "ScopedUtfChars.h"
+#include "unicode/locid.h"
#include "unicode/uchar.h"
#include <math.h>
#include <stdlib.h>
@@ -37,12 +40,12 @@
}
static jint getNumericValueImpl(JNIEnv*, jclass, jint codePoint){
- // The letters A-Z in their uppercase ('\u0041' through '\u005A'),
- // lowercase ('\u0061' through '\u007A'),
- // and full width variant ('\uFF21' through '\uFF3A'
- // and '\uFF41' through '\uFF5A') forms
- // have numeric values from 10 through 35. This is independent of the
- // Unicode specification, which does not assign numeric values to these
+ // The letters A-Z in their uppercase ('\u0041' through '\u005A'),
+ // lowercase ('\u0061' through '\u007A'),
+ // and full width variant ('\uFF21' through '\uFF3A'
+ // and '\uFF41' through '\uFF5A') forms
+ // have numeric values from 10 through 35. This is independent of the
+ // Unicode specification, which does not assign numeric values to these
// char values.
if (codePoint >= 0x41 && codePoint <= 0x5A) {
return codePoint - 0x37;
@@ -66,15 +69,15 @@
}
return result;
-}
-
+}
+
static jboolean isDefinedImpl(JNIEnv*, jclass, jint codePoint) {
return u_isdefined(codePoint);
-}
+}
static jboolean isDigitImpl(JNIEnv*, jclass, jint codePoint) {
return u_isdigit(codePoint);
-}
+}
static jboolean isIdentifierIgnorableImpl(JNIEnv*, jclass, jint codePoint) {
// Java also returns TRUE for U+0085 Next Line (it omits U+0085 from whitespace ISO controls)
@@ -82,31 +85,31 @@
return JNI_TRUE;
}
return u_isIDIgnorable(codePoint);
-}
+}
static jboolean isLetterImpl(JNIEnv*, jclass, jint codePoint) {
return u_isalpha(codePoint);
-}
+}
static jboolean isLetterOrDigitImpl(JNIEnv*, jclass, jint codePoint) {
return u_isalnum(codePoint);
-}
+}
static jboolean isSpaceCharImpl(JNIEnv*, jclass, jint codePoint) {
return u_isJavaSpaceChar(codePoint);
-}
+}
static jboolean isTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_istitle(codePoint);
-}
+}
static jboolean isUnicodeIdentifierPartImpl(JNIEnv*, jclass, jint codePoint) {
return u_isIDPart(codePoint);
-}
+}
static jboolean isUnicodeIdentifierStartImpl(JNIEnv*, jclass, jint codePoint) {
return u_isIDStart(codePoint);
-}
+}
static jboolean isWhitespaceImpl(JNIEnv*, jclass, jint codePoint) {
// Java omits U+0085
@@ -114,27 +117,43 @@
return JNI_FALSE;
}
return u_isWhitespace(codePoint);
-}
+}
static jint toLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_tolower(codePoint);
-}
+}
static jint toTitleCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_totitle(codePoint);
-}
+}
static jint toUpperCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_toupper(codePoint);
-}
+}
+
+static jstring toLowerCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) {
+ ScopedJavaUnicodeString scopedString(env, javaString);
+ UnicodeString& s(scopedString.unicodeString());
+ UnicodeString original(s);
+ s.toLower(Locale::createFromName(ScopedUtfChars(env, localeName).data()));
+ return s == original ? javaString : env->NewString(s.getBuffer(), s.length());
+}
+
+static jstring toUpperCaseStringImpl(JNIEnv* env, jclass, jstring javaString, jstring localeName) {
+ ScopedJavaUnicodeString scopedString(env, javaString);
+ UnicodeString& s(scopedString.unicodeString());
+ UnicodeString original(s);
+ s.toUpper(Locale::createFromName(ScopedUtfChars(env, localeName).data()));
+ return s == original ? javaString : env->NewString(s.getBuffer(), s.length());
+}
static jboolean isUpperCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_isupper(codePoint);
-}
+}
static jboolean isLowerCaseImpl(JNIEnv*, jclass, jint codePoint) {
return u_islower(codePoint);
-}
+}
static int forNameImpl(JNIEnv* env, jclass, jstring blockName) {
if (blockName == NULL) {
@@ -178,7 +197,9 @@
{ "toLowerCase", "(I)I", (void*) toLowerCaseImpl },
{ "toTitleCase", "(I)I", (void*) toTitleCaseImpl },
{ "toUpperCase", "(I)I", (void*) toUpperCaseImpl },
-};
+ { "toLowerCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toLowerCaseStringImpl },
+ { "toUpperCase", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", (void*) toUpperCaseStringImpl },
+};
int register_com_ibm_icu4jni_lang_UCharacter(JNIEnv* env) {
return jniRegisterNativeMethods(env, "com/ibm/icu4jni/lang/UCharacter",
diff --git a/libcore/luni/src/main/java/java/lang/CaseMapper.java b/libcore/luni/src/main/java/java/lang/CaseMapper.java
index c74bda0..f2f5ac8 100644
--- a/libcore/luni/src/main/java/java/lang/CaseMapper.java
+++ b/libcore/luni/src/main/java/java/lang/CaseMapper.java
@@ -16,18 +16,17 @@
package java.lang;
+import com.ibm.icu4jni.lang.UCharacter;
import java.util.Locale;
/**
* Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html.
*/
class CaseMapper {
- // Intention-revealing constants for various important characters.
- private static final char LATIN_CAPITAL_I = 'I';
- private static final char LATIN_SMALL_I = 'i';
+ private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray();
+ private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray();
+
private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130';
- private static final char LATIN_SMALL_DOTLESS_I = '\u0131';
- private static final char COMBINING_DOT_ABOVE = '\u0307';
private static final char GREEK_CAPITAL_SIGMA = '\u03a3';
private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2';
@@ -45,20 +44,20 @@
* accessible.
*/
public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) {
+ // Punt hard cases to ICU4C.
String languageCode = locale.getLanguage();
- boolean turkishOrAzeri = languageCode.equals("tr") || languageCode.equals("az");
+ if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
+ return UCharacter.toLowerCase(s, locale.toString());
+ }
char[] newValue = null;
int newCount = 0;
for (int i = offset, end = offset + count; i < end; ++i) {
char ch = value[i];
char newCh = ch;
- if (turkishOrAzeri && ch == LATIN_CAPITAL_I_WITH_DOT) {
- newCh = LATIN_SMALL_I;
- } else if (turkishOrAzeri && ch == LATIN_CAPITAL_I && !followedBy(value, offset, count, i, COMBINING_DOT_ABOVE)) {
- newCh = LATIN_SMALL_DOTLESS_I;
- } else if (turkishOrAzeri && ch == COMBINING_DOT_ABOVE && precededBy(value, offset, count, i, LATIN_CAPITAL_I)) {
- continue; // (We've already converted the preceding I, so we don't need to create newValue.)
+ if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) {
+ // Punt these hard cases.
+ return UCharacter.toLowerCase(s, locale.toString());
} else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) {
newCh = GREEK_SMALL_FINAL_SIGMA;
} else {
@@ -108,4 +107,102 @@
}
return true;
}
+
+ /**
+ * Return the index of the specified character into the upperValues table.
+ * The upperValues table contains three entries at each position. These
+ * three characters are the upper case conversion. If only two characters
+ * are used, the third character in the table is \u0000.
+ * @return the index into the upperValues table, or -1
+ */
+ private static int upperIndex(int ch) {
+ int index = -1;
+ if (ch >= 0xdf) {
+ if (ch <= 0x587) {
+ switch (ch) {
+ case 0xdf: return 0;
+ case 0x149: return 1;
+ case 0x1f0: return 2;
+ case 0x390: return 3;
+ case 0x3b0: return 4;
+ case 0x587: return 5;
+ }
+ } else if (ch >= 0x1e96) {
+ if (ch <= 0x1e9a) {
+ index = 6 + ch - 0x1e96;
+ } else if (ch >= 0x1f50 && ch <= 0x1ffc) {
+ index = upperValues2[ch - 0x1f50];
+ if (index == 0) {
+ index = -1;
+ }
+ } else if (ch >= 0xfb00) {
+ if (ch <= 0xfb06) {
+ index = 90 + ch - 0xfb00;
+ } else if (ch >= 0xfb13 && ch <= 0xfb17) {
+ index = 97 + ch - 0xfb13;
+ }
+ }
+ }
+ }
+ return index;
+ }
+
+ public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) {
+ String languageCode = locale.getLanguage();
+ if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
+ return UCharacter.toUpperCase(s, locale.toString());
+ }
+
+ char[] output = null;
+ int i = 0;
+ for (int o = offset, end = offset + count; o < end; o++) {
+ char ch = value[o];
+ if (Character.isHighSurrogate(ch)) {
+ return UCharacter.toUpperCase(s, locale.toString());
+ }
+ int index = upperIndex(ch);
+ if (index == -1) {
+ if (output != null && i >= output.length) {
+ char[] newoutput = new char[output.length + (count / 6) + 2];
+ System.arraycopy(output, 0, newoutput, 0, output.length);
+ output = newoutput;
+ }
+ char upch = Character.toUpperCase(ch);
+ if (ch != upch) {
+ if (output == null) {
+ output = new char[count];
+ i = o - offset;
+ System.arraycopy(value, offset, output, 0, i);
+ }
+ output[i++] = upch;
+ } else if (output != null) {
+ output[i++] = ch;
+ }
+ } else {
+ int target = index * 3;
+ char val3 = upperValues[target + 2];
+ if (output == null) {
+ output = new char[count + (count / 6) + 2];
+ i = o - offset;
+ System.arraycopy(value, offset, output, 0, i);
+ } else if (i + (val3 == 0 ? 1 : 2) >= output.length) {
+ char[] newoutput = new char[output.length + (count / 6) + 3];
+ System.arraycopy(output, 0, newoutput, 0, output.length);
+ output = newoutput;
+ }
+
+ char val = upperValues[target];
+ output[i++] = val;
+ val = upperValues[target + 1];
+ output[i++] = val;
+ if (val3 != 0) {
+ output[i++] = val3;
+ }
+ }
+ }
+ if (output == null) {
+ return s;
+ }
+ return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i);
+ }
}
diff --git a/libcore/luni/src/main/java/java/lang/String.java b/libcore/luni/src/main/java/java/lang/String.java
index 7de8094..495cc63 100644
--- a/libcore/luni/src/main/java/java/lang/String.java
+++ b/libcore/luni/src/main/java/java/lang/String.java
@@ -44,18 +44,11 @@
* @see Charset
* @since 1.0
*/
-public final class String implements Serializable, Comparable<String>,
- CharSequence {
+public final class String implements Serializable, Comparable<String>, CharSequence {
private static final long serialVersionUID = -6849794470754667710L;
- // BEGIN android-added
private static final char REPLACEMENT_CHAR = (char) 0xfffd;
- // END android-added
-
- // BEGIN android-removed
- // static class ConsolePrintStream extends java.io.PrintStream ...
- // END android-removed
/**
* CaseInsensitiveComparator compares Strings ignoring the case of the
@@ -394,7 +387,7 @@
} catch (Exception e) {
// do nothing. according to spec:
// behavior is unspecified for invalid array
- cb = CharBuffer.wrap("\u003f".toCharArray()); //$NON-NLS-1$
+ cb = CharBuffer.wrap("\u003f".toCharArray());
}
if ((result = cb.length()) > 0) {
value = cb.array();
@@ -688,7 +681,7 @@
@SuppressWarnings("unused")
private String(String s1, int v1) {
if (s1 == null) {
- s1 = "null"; //$NON-NLS-1$
+ s1 = "null";
}
String s2 = String.valueOf(v1);
int len = s1.count + s2.count;
@@ -856,8 +849,7 @@
private Charset defaultCharset() {
if (DefaultCharset == null) {
String encoding = AccessController
- .doPrivileged(new PriviAction<String>(
- "file.encoding", "ISO8859_1")); //$NON-NLS-1$ //$NON-NLS-2$
+ .doPrivileged(new PriviAction<String>("file.encoding", "ISO8859_1"));
// calling System.getProperty() may cause DefaultCharset to be
// initialized
try {
@@ -869,7 +861,7 @@
}
if (DefaultCharset == null) {
- DefaultCharset = Charset.forName("ISO-8859-1"); //$NON-NLS-1$
+ DefaultCharset = Charset.forName("ISO-8859-1");
}
}
return DefaultCharset;
@@ -1713,18 +1705,16 @@
}
/**
- * Converts this string to lowercase, using the rules of the specified locale.
- * <p>
- * Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include
+ * Converts this string to lowercase, using the rules of {@code locale}.
+ *
+ * <p>Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include
* dotted and dotless I in Azeri and Turkish locales, and dotted and dotless I and J in
- * Lithuanian locales. On the other hand, it isn't necessary to provide, a Greek locale to get
+ * Lithuanian locales. On the other hand, it isn't necessary to provide a Greek locale to get
* correct case mapping of Greek characters: any locale will do.
- * <p>
- * See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a>
+ *
+ * <p>See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a>
* for full details of context- and language-specific special cases.
- *
- * @param locale
- * the Locale to use.
+ *
* @return a new lowercase string, or {@code this} if it's already all-lowercase.
*/
public String toLowerCase(Locale locale) {
@@ -1742,145 +1732,29 @@
}
/**
- * Converts the characters in this string to uppercase, using the default
- * Locale.
+ * Converts this this string to uppercase, using the rules of the default locale.
*
- * @return a new string containing the uppercase characters equivalent to
- * the characters in this string.
+ * @return a new uppercase string, or {@code this} if it's already all-uppercase.
*/
public String toUpperCase() {
- return toUpperCase(Locale.getDefault());
- }
-
- // BEGIN android-note
- // put this in a helper class so that it's only initialized on demand?
- // END android-note
- private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".value; //$NON-NLS-1$
-
- /**
- * Return the index of the specified character into the upperValues table.
- * The upperValues table contains three entries at each position. These
- * three characters are the upper case conversion. If only two characters
- * are used, the third character in the table is \u0000.
- *
- * @param ch
- * the char being converted to upper case
- *
- * @return the index into the upperValues table, or -1
- */
- private int upperIndex(int ch) {
- int index = -1;
- if (ch >= 0xdf) {
- if (ch <= 0x587) {
- if (ch == 0xdf) {
- index = 0;
- } else if (ch <= 0x149) {
- if (ch == 0x149) {
- index = 1;
- }
- } else if (ch <= 0x1f0) {
- if (ch == 0x1f0) {
- index = 2;
- }
- } else if (ch <= 0x390) {
- if (ch == 0x390) {
- index = 3;
- }
- } else if (ch <= 0x3b0) {
- if (ch == 0x3b0) {
- index = 4;
- }
- } else if (ch <= 0x587) {
- if (ch == 0x587) {
- index = 5;
- }
- }
- } else if (ch >= 0x1e96) {
- if (ch <= 0x1e9a) {
- index = 6 + ch - 0x1e96;
- } else if (ch >= 0x1f50 && ch <= 0x1ffc) {
- index = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".value[ch - 0x1f50]; //$NON-NLS-1$
- if (index == 0) {
- index = -1;
- }
- } else if (ch >= 0xfb00) {
- if (ch <= 0xfb06) {
- index = 90 + ch - 0xfb00;
- } else if (ch >= 0xfb13 && ch <= 0xfb17) {
- index = 97 + ch - 0xfb13;
- }
- }
- }
- }
- return index;
+ return CaseMapper.toUpperCase(Locale.getDefault(), this, value, offset, count);
}
/**
- * Converts the characters in this string to uppercase, using the specified
- * Locale.
- *
- * @param locale
- * the Locale to use.
- * @return a new string containing the uppercase characters equivalent to
- * the characters in this string.
+ * Converts this this string to uppercase, using the rules of {@code locale}.
+ *
+ * <p>Most case mappings are unaffected by the language of a {@code Locale}. Exceptions include
+ * dotted and dotless I in Azeri and Turkish locales, and dotted and dotless I and J in
+ * Lithuanian locales. On the other hand, it isn't necessary to provide a Greek locale to get
+ * correct case mapping of Greek characters: any locale will do.
+ *
+ * <p>See <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt</a>
+ * for full details of context- and language-specific special cases.
+ *
+ * @return a new uppercase string, or {@code this} if it's already all-uppercase.
*/
public String toUpperCase(Locale locale) {
- // BEGIN android-changed: support Azeri.
- String languageCode = locale.getLanguage();
- boolean turkishOrAzeri = languageCode.equals("tr") || languageCode.equals("az");
-
- char[] output = null;
- int i = 0;
- for (int o = offset, end = offset + count; o < end; o++) {
- char ch = value[o];
- int index = upperIndex(ch);
- if (index == -1) {
- if (output != null && i >= output.length) {
- char[] newoutput = new char[output.length + (count / 6) + 2];
- System.arraycopy(output, 0, newoutput, 0, output.length);
- output = newoutput;
- }
- char upch = !turkishOrAzeri ? Character.toUpperCase(ch)
- : (ch != 0x69 ? Character.toUpperCase(ch)
- : (char) 0x130);
- if (ch != upch) {
- if (output == null) {
- output = new char[count];
- i = o - offset;
- System.arraycopy(value, offset, output, 0, i);
- }
- output[i++] = upch;
- } else if (output != null) {
- output[i++] = ch;
- }
- } else {
- int target = index * 3;
- char val3 = upperValues[target + 2];
- if (output == null) {
- output = new char[count + (count / 6) + 2];
- i = o - offset;
- System.arraycopy(value, offset, output, 0, i);
- } else if (i + (val3 == 0 ? 1 : 2) >= output.length) {
- char[] newoutput = new char[output.length + (count / 6) + 3];
- System.arraycopy(output, 0, newoutput, 0, output.length);
- output = newoutput;
- }
-
- char val = upperValues[target];
- output[i++] = val;
- val = upperValues[target + 1];
- output[i++] = val;
- if (val3 != 0) {
- output[i++] = val3;
- }
- }
- }
- if (output == null) {
- return this;
- }
- return output.length == i || output.length - i < 8 ? new String(0, i,
- output) : new String(output, 0, i);
- // END android-changed
+ return CaseMapper.toUpperCase(locale, this, value, offset, count);
}
/**
@@ -2014,7 +1888,7 @@
* @return the object converted to a string, or the string {@code "null"}.
*/
public static String valueOf(Object value) {
- return value != null ? value.toString() : "null"; //$NON-NLS-1$
+ return value != null ? value.toString() : "null";
}
/**
@@ -2027,7 +1901,7 @@
* @return the boolean converted to a string.
*/
public static String valueOf(boolean value) {
- return value ? "true" : "false"; //$NON-NLS-1$ //$NON-NLS-2$
+ return value ? "true" : "false";
}
/**